Changeset 1457 for proto/SymbolTable


Ignore:
Timestamp:
Sep 16, 2011, 5:42:17 PM (8 years ago)
Author:
vla24
Message:

SymbolTable?: updated implementation after Ken's refactoring

Location:
proto/SymbolTable
Files:
7 edited
1 moved

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/Makefile

    r1442 r1457  
    1313HASH_SYMBOLTABLE_TEMPLATE=symtab_hash_template.cpp
    1414SYMBOLTABLE_IDENTITY_TEMPLATE=symtab_identity_template.cpp
    15 PBGS_SYMBOLTABLE_TEMPLATE=symtab_pbgs_template.cpp
     15PBGS_SYMBOLTABLE_ID_TEMPLATE=symtab_pbgs_identity_template.cpp
    1616PBGS_SYMBOLTABLE_LOG_TEMPLATE=symtab_pbgs_log_template.cpp
    1717PBGS_SYMBOLTABLE_DIV_TEMPLATE=symtab_pbgs_div_template.cpp
     
    3232
    3333symtab_pbgs_id: $(PABLO_SYMTAB_PBS) # Paralel bitstream based group sorting
    34         python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_PBS) -t $(PBGS_SYMBOLTABLE_TEMPLATE) -o $(OUTFILE)
     34        python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_PBS) -t $(PBGS_SYMBOLTABLE_ID_TEMPLATE) -o $(OUTFILE)
    3535
    3636symtab_pbgs_id_adv:$(PABLO_SYMTAB_PBS_ADV) # Paralel bitstream based group sorting using Advance32 and Interpose32
    37         python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_PBS_ADV) -t $(PBGS_SYMBOLTABLE_TEMPLATE) -o $(OUTFILE)
     37        python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_PBS_ADV) -t $(PBGS_SYMBOLTABLE_ID_TEMPLATE) -o $(OUTFILE)
    3838
    3939symtab_pbgs_log:$(PABLO_SYMTAB_PBS_LOG)
  • proto/SymbolTable/symtab_hash_template.cpp

    r1428 r1457  
     1#define TEMPLATED_SIMD_LIB
     2
    13#include <stdio.h>
    24#include <stdlib.h>
     
    68#include <../lib_simd.h>
    79#include <hash_symbol_table.h>
    8 
    910#include <queue>
    1011#include <string>
     12
     13typedef long ScanBlock;
     14typedef SIMD_type BytePack;
     15typedef SIMD_type BitBlock;
     16
     17using namespace std;
    1118
    1219#define DEBUG 0
     
    1623#define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
    1724
    18 typedef long ScanBlock;
    19 typedef SIMD_type BytePack;
    20 typedef SIMD_type BitBlock;
    21 
    22 using namespace std;
    23 
    2425#include <../carryQ.h>
    2526#include <xmldecl.h>
    26 #include <xml_error.c>
    27 #include <xmldecl.c>
    2827#include <namechars.h>
    29 
    3028#include <../perflib/perfsec.h>
    3129#include <../s2p.h>
     
    3331#include <TagMatcher.h>
    3432#include <LineColTracker.h>
     33#include <ErrorUtil.h>
     34#include <ErrorTracker.h>
     35#include <XMLTestSuiteError.h>
     36
     37#include <xml_error.c>
     38#include <ErrorUtil.cpp>
     39#include <ErrorTracker.cpp>
     40#include <XMLTestSuiteError.cpp>
    3541
    3642#ifdef BUFFER_PROFILING
     
    5359char * source;
    5460LineColTracker tracker;
    55 
    56 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    57   int error_line, error_column;
    58   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    59   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    60 }
    61 
    62 class ErrorTracker {
    63 public:
    64     ErrorTracker() { noted_pos_in_block = -1;}
    65 
    66     inline void NoteError(const char * error_msg, BitBlock err_strm) {
    67       int pos_in_block = count_forward_zeroes(err_strm);
    68       if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
    69         noted_pos_in_block = pos_in_block;
    70         noted_error = error_msg;
    71       }
    72     }
    73 
    74     inline void If_Error_Report_First() {
    75       if (noted_pos_in_block > -1) {
    76               int error_line, error_column;
    77               ReportError(noted_error, noted_pos_in_block);
    78               exit(-1);
    79       }
    80     }
    81 
    82 private:
    83   const char * noted_error;
    84   int noted_pos_in_block;
    85 };
    86 
    87 
    8861TagMatcher matcher;
     62ErrorTracker error_tracker;
    8963BitBlock EOF_mask = simd_const_1(1);
    90 
    91 ErrorTracker error_tracker;
    9264
    9365queue <size_t> elem_starts_buf;
     
    9668HashSymbolTable symbol_table;
    9769
    98 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int)) {
     70/* StreamScan & Post Process Declarations */
     71//      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
     72static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
     73
     74static inline int NameStrt_check(int pos);
     75static inline int Name_check(int pos);
     76static inline int PIName_check(int pos);
     77static inline int CD_check(int pos);
     78static inline int GenRef_check(int pos);
     79static inline int HexRef_check(int pos);
     80static inline int DecRef_check(int pos);
     81static inline int AttRef_check(int pos);
     82
     83@global
     84
     85static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
     86static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     87static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
     88static inline void do_symbol_table_lookup();
     89
     90void do_process(FILE *infile, FILE *outfile);
     91
     92int main(int argc, char * argv[]) {
     93        char * infilename, * outfilename;
     94        FILE *infile, *outfile;
     95        struct stat fileinfo;
     96
     97        if (argc < 2) {
     98                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
     99                exit(-1);
     100        }
     101
     102        infilename = argv[1];
     103        stat(infilename, &fileinfo);
     104        infile = fopen(infilename, "rb");
     105        if (!infile) {
     106                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
     107                exit(-1);
     108        }
     109
     110        if (argc < 3) outfile = stdout;
     111        else {
     112                outfilename = argv[2];
     113                outfile = fopen(outfilename, "wb");
     114                if (!outfile) {
     115                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
     116                        exit(-1);
     117                }
     118        }
     119
     120//      PERF_SEC_BIND(1);
     121
     122        PERF_SEC_INIT(parser_timer);
     123
     124        do_process(infile, outfile);
     125
     126        PERF_SEC_DUMP(parser_timer);
     127
     128        PERF_SEC_DESTROY(parser_timer);
     129
     130        fclose(infile);
     131        fclose(outfile);
     132
     133        return(0);
     134}
     135
     136/* s2p Definitions */
     137static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
     138  s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
     139        basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
     140}
     141
     142static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
     143  s2p_do_block(U8, basis_bits);
     144  basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
     145  basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
     146  basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
     147  basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
     148  basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
     149  basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
     150  basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
     151  basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     152}
     153
     154/* StreamScan & Post Process Definitions */
     155static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block) {
     156
    99157        int blk;
    100         int block_pos = 0;
     158        int block_pos = 0;
     159        int pos;
     160
    101161        for (blk = 0; blk < blk_count; blk++) {
    102                 ScanBlock s = stream[blk];
    103                 while(s) {
    104                         int code = (ProcessPos(cfzl(s) + block_pos));
    105                         if (code) return code;
     162                ScanBlock s = stream[blk];
     163                while(s) {
     164                        pos = (cfzl(s) + block_pos);
     165                        int code = (ProcessPos(pos));
     166                        if (code) {
     167                                *error_pos_in_block = pos;
     168                                return code; // error code
     169                        }
    106170                        s = s & (s-1);  // clear rightmost bit.
    107171                }
     
    112176
    113177static inline int ElemStrt_check(int pos) {
    114         int block_pos = block_base + pos;
     178        int block_pos = block_base + pos;
    115179        elem_starts_buf.push(buffer_base + block_pos);
    116         return 0;
     180        return 0;
    117181}
    118182
    119183static inline int ElemEnd_check(int pos) {
    120         int block_pos = block_base + pos;
     184        int block_pos = block_base + pos;
    121185        elem_ends_buf.push(buffer_base + block_pos);
    122         return 0;
     186        return 0;
    123187}
    124188
    125189static inline int NameStrt_check(int pos) {
    126         int block_pos = block_base + pos;
     190        int block_pos = block_base + pos;
    127191        if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[block_pos]) == 0){
    128               ReportError("name start error", pos);
    129               exit(-1);
     192              return XMLTestSuiteError::NAME_START;
    130193        }
    131194        return 0;
     
    135198        int block_pos = block_base + pos;
    136199        if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[block_pos]) == 0){
    137               ReportError("name error", pos);
    138               exit(-1);
     200                  return XMLTestSuiteError::NAME;
    139201        }
    140202        return 0;
     
    144206        int block_pos = block_base + pos;
    145207        int file_pos = block_pos+buffer_base;
    146         printf ("%s:%i\n",__FUNCTION__,pos);
    147208        if (at_XxMmLll<ASCII>((unsigned char*)&source[block_pos]) && (source[block_pos+3]=='?' || source[block_pos+3]<= ' ')) {
    148209              // "<?xml" legal at start of file.
    149               if ((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0])) return 0;
    150               ReportError("[Xx][Mm][Ll] illegal as PI name", pos);
    151               exit(-1);
     210              if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
     211                  return XMLTestSuiteError::XMLPINAME;
     212              }
    152213        }
    153214        return 0;
     
    157218        int block_pos = block_base + pos;
    158219        if (!at_CDATA1<ASCII>((unsigned char*)&source[block_pos])){
    159               ReportError("CDATA error", pos);
    160               exit(-1);
     220                  return XMLTestSuiteError::CDATA;
    161221        }
    162222        return 0;
     
    167227        unsigned char* s = (unsigned char*)&source[block_pos];
    168228        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    169               ReportError("Undefined reference", pos);
    170               exit(-1);
     229              return XMLTestSuiteError::UNDEFREF;
    171230        }
    172231        return 0;
     
    180239          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    181240          if (ch_val> 0x10FFFF ){
    182             ReportError("Illegal character reference", pos);
    183             exit(-1);
     241                return XMLTestSuiteError::CHARREF;
    184242          }
    185243          s++;
    186244        }
    187245        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    188           ReportError("Illegal character reference", pos);
    189           exit(-1);
     246          return XMLTestSuiteError::CHARREF;
    190247        }
    191248        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    192           ReportError("Illegal XML 1.0 character reference", pos);
    193           exit(-1);
     249          return XMLTestSuiteError::XML10CHARREF;
    194250        }
    195251        return 0;
     
    203259          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    204260          if (ch_val> 0x10FFFF ){
    205             ReportError("Illegal character reference", pos);
    206             exit(-1);
     261                        return XMLTestSuiteError::CHARREF;
    207262          }
    208263          s++;
    209264        }
    210265        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    211           ReportError("Illegal character reference", pos);
    212           exit(-1);
     266                  return XMLTestSuiteError::CHARREF;
    213267        }
    214268        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    215           ReportError("Illegal XML 1.0 character reference", pos);
    216           exit(-1);
     269                  return XMLTestSuiteError::XML10CHARREF;
    217270        }
    218271        return 0;
     
    239292          }
    240293          if (ch_val==60){
    241             ReportError("Attribute values contain '<' characters after reference expansion", pos);
    242             exit(-1);
     294            return XMLTestSuiteError::ATTREF;
    243295          }
    244296        }
    245297        else if(at_Ref_lt<ASCII>(s)){
    246           ReportError("Attribute values contain '<' characters after reference expansion", pos);
    247           exit(-1);
    248         }
    249         return 0;
    250 }
    251 
    252 
    253 
    254 @global
    255 
    256 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    257   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    258         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    259 }
    260 
    261 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    262   s2p_do_block(U8, basis_bits);
    263   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    264   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    265   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    266   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    267   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    268   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    269   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    270   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     298          return XMLTestSuiteError::ATTREF;
     299        }
     300        return 0;
    271301}
    272302
     
    281311        int length = end - start;
    282312
    283         int gid = -1;
    284 
    285 
    286313        //lookup or insert to symbol table
    287314#if DEBUG
     
    294321#endif
    295322
    296         gid = symbol_table.Lookup_or_Insert_Name(source + start - buffer_base, length);
     323        int gid = symbol_table.Lookup_or_Insert_Name(source + start - buffer_base, length);
    297324        gids.push_back(gid);
    298325    }
     
    301328static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail){
    302329
    303     //NOTE: We are NOT handling the case for an extremely long symbol name (length > 1541).
    304     //      TagMatcher will crash if we feed a long symbol name.
    305     //      Sample file: test/long_sym_name.xml
    306 
    307             tracker.StoreNewlines(lex.LF);
    308 
    309             if ( bitblock_has_bit(tag_Callouts.ElemName_starts))
    310             {
    311                 StreamScan((ScanBlock *) &tag_Callouts.ElemName_starts, sizeof(BitBlock)/sizeof(ScanBlock), ElemStrt_check);
    312             }
    313 
    314             if ( bitblock_has_bit(tag_Callouts.ElemName_ends) )
    315             {
    316                 StreamScan((ScanBlock *) &tag_Callouts.ElemName_ends, sizeof(BitBlock)/sizeof(ScanBlock), ElemEnd_check);
    317             }
    318 
    319             do_symbol_table_lookup();
    320 
    321                 if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
    322                   StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    323                   StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
    324                 }
    325 
    326                 if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    327                   StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
    328                 }
    329 
    330                 if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    331                   StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
    332                 }
    333 
    334                 if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    335                   StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
    336                 }
    337 
    338                 if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    339                   StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
    340                 }
    341 
    342                 if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    343                   StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
    344                 }
    345 
    346                 if (bitblock_has_bit(check_streams.att_refs)){
    347                   StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    348                 }
    349 
    350                 error_tracker.If_Error_Report_First();
    351 
    352                 matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    353                 tracker.AdvanceBlock();
     330    tracker.StoreNewlines(lex.LF);
     331    int rv, error_pos_in_block, error_line, error_column;
     332
     333    if ( bitblock_has_bit(tag_Callouts.ElemName_starts))
     334    {
     335        StreamScan((ScanBlock *) &tag_Callouts.ElemName_starts, sizeof(BitBlock)/sizeof(ScanBlock), ElemStrt_check, &error_pos_in_block);
     336    }
     337
     338    if ( bitblock_has_bit(tag_Callouts.ElemName_ends) )
     339    {
     340        StreamScan((ScanBlock *) &tag_Callouts.ElemName_ends, sizeof(BitBlock)/sizeof(ScanBlock), ElemEnd_check, &error_pos_in_block);
     341    }
     342
     343    do_symbol_table_lookup();
     344
     345
     346    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
     347      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check, &error_pos_in_block);
     348      if (rv) {
     349              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     350              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     351              exit(-1);
     352      }
     353
     354      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check, &error_pos_in_block);
     355      if (rv) {
     356              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     357              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     358              exit(-1);
     359      }
     360    }
     361
     362    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
     363      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check, &error_pos_in_block);
     364      if (rv) {
     365              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     366              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     367              exit(-1);
     368      }
     369    }
     370
     371    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
     372      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check, &error_pos_in_block);
     373      if (rv) {
     374              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     375              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     376              exit(-1);
     377      }
     378    }
     379
     380    if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
     381      rv = StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check, &error_pos_in_block);
     382      if (rv) {
     383              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     384              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     385              exit(-1);
     386      }
     387    }
     388
     389    if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
     390      rv = StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check, &error_pos_in_block);
     391      if (rv) {
     392              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     393              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     394              exit(-1);
     395      }
     396    }
     397
     398    if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
     399      rv = StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check, &error_pos_in_block);
     400      if (rv) {
     401              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     402              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     403              exit(-1);
     404      }
     405    }
     406
     407    if (bitblock_has_bit(check_streams.att_refs)){
     408      rv = StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check, &error_pos_in_block);
     409      if (rv) {
     410              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     411              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     412              exit(-1);
     413      }
     414    }
     415
     416    if(error_tracker.Has_Noted_Error()){
     417            tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     418            ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
     419            exit(-1);
     420    }
     421
     422    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     423    tracker.AdvanceBlock();
    354424}
    355425
     
    427497
    428498/* Full Buffers */
    429     int block_segment_num = 0;
    430499    while (chars_avail == BUFFER_SIZE) {
    431500      PERF_SEC_START(parser_timer);
     
    449518      buf_pos += chars_avail;
    450519      buffer_base = buf_pos;
    451       block_segment_num++;
    452520    }
    453521/* Final Partial Buffer */
     
    488556#endif
    489557}
    490 
    491 
    492 
    493 int
    494 main(int argc, char * argv[]) {
    495         char * infilename, * outfilename;
    496         FILE *infile, *outfile;
    497         struct stat fileinfo;
    498 
    499         if (argc < 2) {
    500                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    501                 exit(-1);
    502         }
    503 
    504         infilename = argv[1];
    505         stat(infilename, &fileinfo);
    506         infile = fopen(infilename, "rb");
    507         if (!infile) {
    508                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    509                 exit(-1);
    510         }
    511 
    512         if (argc < 3) outfile = stdout;
    513         else {
    514                 outfilename = argv[2];
    515                 outfile = fopen(outfilename, "wb");
    516                 if (!outfile) {
    517                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    518                         exit(-1);
    519                 }
    520         }
    521 
    522 //      PERF_SEC_BIND(1);
    523 
    524         PERF_SEC_INIT(parser_timer);
    525 
    526         do_process(infile, outfile);
    527 
    528         PERF_SEC_DUMP(parser_timer);
    529 
    530         PERF_SEC_DESTROY(parser_timer);
    531 
    532         fclose(infile);
    533         fclose(outfile);
    534 
    535         printf ("Done procressing\n");
    536         return(0);
    537 }
  • proto/SymbolTable/symtab_identity_template.cpp

    r1442 r1457  
    1 #define USE_FUNCTION_TEMPLATES
    21#define TEMPLATED_SIMD_LIB
    3 
    4 #define DEBUG 1
    5 #define BLOCK_SIZE (sizeof(SIMD_type) * 8)
    6 #define SEGMENT_BLOCKS 12
    7 #define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    8 #define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
    92
    103#include <stdio.h>
     
    158#include <../lib_simd.h>
    169#include <pbgs_identity_symbol_table.h>
    17 
    1810#include <queue>
    1911#include <string>
     
    2517using namespace std;
    2618
     19#define DEBUG 0
     20#define BLOCK_SIZE (sizeof(SIMD_type) * 8)
     21#define SEGMENT_BLOCKS 12
     22#define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
     23#define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
     24
    2725#include <../carryQ.h>
    2826#include <xmldecl.h>
    29 #include <xml_error.c>
    30 #include <xmldecl.c>
    3127#include <namechars.h>
    32 
    3328#include <../perflib/perfsec.h>
    3429#include <../s2p.h>
     
    3631#include <TagMatcher.h>
    3732#include <LineColTracker.h>
     33#include <ErrorUtil.h>
     34#include <ErrorTracker.h>
     35#include <XMLTestSuiteError.h>
     36
     37#include <xml_error.c>
     38#include <ErrorUtil.cpp>
     39#include <ErrorTracker.cpp>
     40#include <XMLTestSuiteError.cpp>
    3841
    3942#ifdef BUFFER_PROFILING
     
    5659char * source;
    5760LineColTracker tracker;
    58 
    59 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    60   int error_line, error_column;
    61   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    62   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    63 }
    64 
    65 class ErrorTracker {
    66 public:
    67     ErrorTracker() { noted_pos_in_block = -1;}
    68 
    69     inline void NoteError(const char * error_msg, BitBlock err_strm) {
    70       int pos_in_block = count_forward_zeroes(err_strm);
    71       if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
    72         noted_pos_in_block = pos_in_block;
    73         noted_error = error_msg;
    74       }
    75     }
    76 
    77     inline void If_Error_Report_First() {
    78       if (noted_pos_in_block > -1) {
    79               int error_line, error_column;
    80               ReportError(noted_error, noted_pos_in_block);
    81               exit(-1);
    82       }
    83     }
    84 
    85 private:
    86   const char * noted_error;
    87   int noted_pos_in_block;
    88 };
    89 
    90 
    9161TagMatcher matcher;
     62ErrorTracker error_tracker;
    9263BitBlock EOF_mask = simd_const_1(1);
    93 
    94 ErrorTracker error_tracker;
    9564
    9665BitBlock elem_ends;
     
    10271PBGSIdentitySymbolTable pbgs_symbol_table;
    10372
     73/* StreamScan & Post Process Declarations */
     74//      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
     75static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
     76
     77static inline int NameStrt_check(int pos);
     78static inline int Name_check(int pos);
     79static inline int PIName_check(int pos);
     80static inline int CD_check(int pos);
     81static inline int GenRef_check(int pos);
     82static inline int HexRef_check(int pos);
     83static inline int DecRef_check(int pos);
     84static inline int AttRef_check(int pos);
     85
     86@global
     87
     88static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
     89static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     90static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
     91void do_process(FILE *infile, FILE *outfile);
    10492static inline int ElemStart_grouping(int start_pos, int L) ;
    10593static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    10694static inline int ScanForwardPos(BitBlock * block, int pos);
    10795static inline int compute_hash_value (int lgth, int start);
     96
     97int main(int argc, char * argv[]) {
     98        char * infilename, * outfilename;
     99        FILE *infile, *outfile;
     100        struct stat fileinfo;
     101
     102        if (argc < 2) {
     103                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
     104                exit(-1);
     105        }
     106
     107        infilename = argv[1];
     108        stat(infilename, &fileinfo);
     109        infile = fopen(infilename, "rb");
     110        if (!infile) {
     111                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
     112                exit(-1);
     113        }
     114
     115        if (argc < 3) outfile = stdout;
     116        else {
     117                outfilename = argv[2];
     118                outfile = fopen(outfilename, "wb");
     119                if (!outfile) {
     120                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
     121                        exit(-1);
     122                }
     123        }
     124
     125//      PERF_SEC_BIND(1);
     126
     127        PERF_SEC_INIT(parser_timer);
     128
     129        do_process(infile, outfile);
     130
     131        PERF_SEC_DUMP(parser_timer);
     132
     133        PERF_SEC_DESTROY(parser_timer);
     134
     135        fclose(infile);
     136        fclose(outfile);
     137
     138        return(0);
     139}
     140
     141/* s2p Definitions */
     142static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
     143  s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
     144        basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
     145}
     146
     147static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
     148  s2p_do_block(U8, basis_bits);
     149  basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
     150  basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
     151  basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
     152  basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
     153  basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
     154  basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
     155  basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
     156  basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     157}
    108158
    109159static inline int ScanForwardPos(BitBlock * block, int pos)
     
    125175}
    126176
    127 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int)) {
     177/* StreamScan & Post Process Definitions */
     178static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block) {
     179
    128180        int blk;
    129181        int block_pos = 0;
     182        int pos;
     183
    130184        for (blk = 0; blk < blk_count; blk++) {
    131185                ScanBlock s = stream[blk];
    132186                while(s) {
    133                         int code = (ProcessPos(cfzl(s) + block_pos));
    134                         if (code) return code;
     187                        pos = (cfzl(s) + block_pos);
     188                        int code = (ProcessPos(pos));
     189                        if (code) {
     190                                *error_pos_in_block = pos;
     191                                return code; // error code
     192                        }
    135193                        s = s & (s-1);  // clear rightmost bit.
    136194                }
     
    241299        int block_pos = block_base + pos;
    242300        if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[block_pos]) == 0){
    243               ReportError("name start error", pos);
    244               exit(-1);
     301              return XMLTestSuiteError::NAME_START;
    245302        }
    246303        return 0;
     
    250307        int block_pos = block_base + pos;
    251308        if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[block_pos]) == 0){
    252               ReportError("name error", pos);
    253               exit(-1);
     309                  return XMLTestSuiteError::NAME;
    254310        }
    255311        return 0;
     
    259315        int block_pos = block_base + pos;
    260316        int file_pos = block_pos+buffer_base;
    261         printf ("%s:%i\n",__FUNCTION__,pos);
    262317        if (at_XxMmLll<ASCII>((unsigned char*)&source[block_pos]) && (source[block_pos+3]=='?' || source[block_pos+3]<= ' ')) {
    263318              // "<?xml" legal at start of file.
    264               if ((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0])) return 0;
    265               ReportError("[Xx][Mm][Ll] illegal as PI name", pos);
    266               exit(-1);
     319              if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
     320                  return XMLTestSuiteError::XMLPINAME;
     321              }
    267322        }
    268323        return 0;
     
    272327        int block_pos = block_base + pos;
    273328        if (!at_CDATA1<ASCII>((unsigned char*)&source[block_pos])){
    274               ReportError("CDATA error", pos);
    275               exit(-1);
     329                  return XMLTestSuiteError::CDATA;
    276330        }
    277331        return 0;
     
    282336        unsigned char* s = (unsigned char*)&source[block_pos];
    283337        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    284               ReportError("Undefined reference", pos);
    285               exit(-1);
     338              return XMLTestSuiteError::UNDEFREF;
    286339        }
    287340        return 0;
     
    295348          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    296349          if (ch_val> 0x10FFFF ){
    297             ReportError("Illegal character reference", pos);
    298             exit(-1);
     350                return XMLTestSuiteError::CHARREF;
    299351          }
    300352          s++;
    301353        }
    302354        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    303           ReportError("Illegal character reference", pos);
    304           exit(-1);
     355          return XMLTestSuiteError::CHARREF;
    305356        }
    306357        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    307           ReportError("Illegal XML 1.0 character reference", pos);
    308           exit(-1);
     358          return XMLTestSuiteError::XML10CHARREF;
    309359        }
    310360        return 0;
     
    318368          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    319369          if (ch_val> 0x10FFFF ){
    320             ReportError("Illegal character reference", pos);
    321             exit(-1);
     370                        return XMLTestSuiteError::CHARREF;
    322371          }
    323372          s++;
    324373        }
    325374        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    326           ReportError("Illegal character reference", pos);
    327           exit(-1);
     375                  return XMLTestSuiteError::CHARREF;
    328376        }
    329377        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    330           ReportError("Illegal XML 1.0 character reference", pos);
    331           exit(-1);
     378                  return XMLTestSuiteError::XML10CHARREF;
    332379        }
    333380        return 0;
     
    354401          }
    355402          if (ch_val==60){
    356             ReportError("Attribute values contain '<' characters after reference expansion", pos);
    357             exit(-1);
     403            return XMLTestSuiteError::ATTREF;
    358404          }
    359405        }
    360406        else if(at_Ref_lt<ASCII>(s)){
    361           ReportError("Attribute values contain '<' characters after reference expansion", pos);
    362           exit(-1);
    363         }
    364         return 0;
    365 }
    366 
    367 
    368 
    369 @global
    370 
    371 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    372   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    373         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    374 }
    375 
    376 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    377   s2p_do_block(U8, basis_bits);
    378   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    379   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    380   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    381   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    382   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    383   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    384   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    385   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     407          return XMLTestSuiteError::ATTREF;
     408        }
     409        return 0;
    386410}
    387411
     
    389413
    390414    tracker.StoreNewlines(lex.LF);
     415    int rv, error_pos_in_block, error_line, error_column;
    391416    elem_ends = tag_Callouts.ElemName_ends;
    392417    hashvalues[1] = hash_data.Hash_value;
     
    408433
    409434    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
    410       StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    411       StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
     435      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check, &error_pos_in_block);
     436      if (rv) {
     437              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     438              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     439              exit(-1);
     440      }
     441
     442      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check, &error_pos_in_block);
     443      if (rv) {
     444              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     445              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     446              exit(-1);
     447      }
    412448    }
    413449
    414450    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    415       StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
     451      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check, &error_pos_in_block);
     452      if (rv) {
     453              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     454              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     455              exit(-1);
     456      }
    416457    }
    417458
    418459    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    419       StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
     460      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check, &error_pos_in_block);
     461      if (rv) {
     462              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     463              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     464              exit(-1);
     465      }
    420466    }
    421467
    422468    if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    423       StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
     469      rv = StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check, &error_pos_in_block);
     470      if (rv) {
     471              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     472              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     473              exit(-1);
     474      }
    424475    }
    425476
    426477    if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    427       StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
     478      rv = StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check, &error_pos_in_block);
     479      if (rv) {
     480              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     481              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     482              exit(-1);
     483      }
    428484    }
    429485
    430486    if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    431       StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
     487      rv = StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check, &error_pos_in_block);
     488      if (rv) {
     489              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     490              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     491              exit(-1);
     492      }
    432493    }
    433494
    434495    if (bitblock_has_bit(check_streams.att_refs)){
    435       StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    436     }
    437 
    438     error_tracker.If_Error_Report_First();
     496      rv = StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check, &error_pos_in_block);
     497      if (rv) {
     498              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     499              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     500              exit(-1);
     501      }
     502    }
     503
     504    if(error_tracker.Has_Noted_Error()){
     505            tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     506            ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
     507            exit(-1);
     508    }
    439509
    440510    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     
    450520    }
    451521    cout << endl;
    452 }
    453 
    454 static inline int test(int)
    455 {
    456     return 0;
    457522}
    458523
     
    519584
    520585/* Full Buffers */
    521     int block_segment_num = 0;
    522586    while (chars_avail == BUFFER_SIZE) {
    523587      PERF_SEC_START(parser_timer);
     
    541605      buf_pos += chars_avail;
    542606      buffer_base = buf_pos;
    543       block_segment_num++;
    544607
    545608  }
     
    580643//    pbgs_symbol_table.Print_Symbol_Table_Distribution();
    581644}
    582 
    583 
    584 
    585 int
    586 main(int argc, char * argv[]) {
    587         char * infilename, * outfilename;
    588         FILE *infile, *outfile;
    589         struct stat fileinfo;
    590 
    591         if (argc < 2) {
    592                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    593                 exit(-1);
    594         }
    595 
    596         infilename = argv[1];
    597         stat(infilename, &fileinfo);
    598         infile = fopen(infilename, "rb");
    599         if (!infile) {
    600                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    601                 exit(-1);
    602         }
    603 
    604         if (argc < 3) outfile = stdout;
    605         else {
    606                 outfilename = argv[2];
    607                 outfile = fopen(outfilename, "wb");
    608                 if (!outfile) {
    609                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    610                         exit(-1);
    611                 }
    612         }
    613 
    614 //      PERF_SEC_BIND(1);
    615 
    616         PERF_SEC_INIT(parser_timer);
    617 
    618         do_process(infile, outfile);
    619 
    620         PERF_SEC_DUMP(parser_timer);
    621 
    622         PERF_SEC_DESTROY(parser_timer);
    623 
    624         fclose(infile);
    625         fclose(outfile);
    626         return(0);
    627 }
  • proto/SymbolTable/symtab_ls_template.cpp

    r1426 r1457  
    1 #define USE_FUNCTION_TEMPLATES
    21#define TEMPLATED_SIMD_LIB
    32
     
    98#include <../lib_simd.h>
    109#include <ls_symbol_table.h>
    11 
    1210#include <queue>
    1311#include <string>
    1412
    15 #define DEBUG 0
     13typedef long ScanBlock;
     14typedef SIMD_type BytePack;
     15typedef SIMD_type BitBlock;
     16
     17using namespace std;
     18
     19#define DEBUG 1
    1620#define BLOCK_SIZE (sizeof(SIMD_type) * 8)
    1721#define SEGMENT_BLOCKS 12
     
    1923#define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
    2024
    21 typedef long ScanBlock;
    22 typedef SIMD_type BytePack;
    23 typedef SIMD_type BitBlock;
    24 
    25 using namespace std;
    26 
    2725#include <../carryQ.h>
    2826#include <xmldecl.h>
    29 #include <xml_error.c>
    30 #include <xmldecl.c>
    3127#include <namechars.h>
    32 
    3328#include <../perflib/perfsec.h>
    3429#include <../s2p.h>
     
    3631#include <TagMatcher.h>
    3732#include <LineColTracker.h>
     33#include <ErrorUtil.h>
     34#include <ErrorTracker.h>
     35#include <XMLTestSuiteError.h>
     36
     37#include <xml_error.c>
     38#include <ErrorUtil.cpp>
     39#include <ErrorTracker.cpp>
     40#include <XMLTestSuiteError.cpp>
    3841
    3942#ifdef BUFFER_PROFILING
     
    5659char * source;
    5760LineColTracker tracker;
    58 
    59 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    60   int error_line, error_column;
    61   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    62   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    63 }
    64 
    65 class ErrorTracker {
    66 public:
    67     ErrorTracker() { noted_pos_in_block = -1;}
    68 
    69     inline void NoteError(const char * error_msg, BitBlock err_strm) {
    70       int pos_in_block = count_forward_zeroes(err_strm);
    71       if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
    72         noted_pos_in_block = pos_in_block;
    73         noted_error = error_msg;
    74       }
    75     }
    76 
    77     inline void If_Error_Report_First() {
    78       if (noted_pos_in_block > -1) {
    79               int error_line, error_column;
    80               ReportError(noted_error, noted_pos_in_block);
    81               exit(-1);
    82       }
    83     }
    84 
    85 private:
    86   const char * noted_error;
    87   int noted_pos_in_block;
    88 };
    89 
    90 
    9161TagMatcher matcher;
     62ErrorTracker error_tracker;
    9263BitBlock EOF_mask = simd_const_1(1);
    93 
    94 ErrorTracker error_tracker;
    9564
    9665queue <size_t> elem_starts_buf;
     
    9867LSSymbolTable ls_symbol_table;
    9968
    100 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int)) {
     69/* StreamScan & Post Process Declarations */
     70//      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
     71static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
     72
     73static inline int NameStrt_check(int pos);
     74static inline int Name_check(int pos);
     75static inline int PIName_check(int pos);
     76static inline int CD_check(int pos);
     77static inline int GenRef_check(int pos);
     78static inline int HexRef_check(int pos);
     79static inline int DecRef_check(int pos);
     80static inline int AttRef_check(int pos);
     81
     82@global
     83
     84static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
     85static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     86static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
     87static inline void do_symbol_table_lookup();
     88
     89void do_process(FILE *infile, FILE *outfile);
     90
     91int main(int argc, char * argv[]) {
     92        char * infilename, * outfilename;
     93        FILE *infile, *outfile;
     94        struct stat fileinfo;
     95
     96        if (argc < 2) {
     97                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
     98                exit(-1);
     99        }
     100
     101        infilename = argv[1];
     102        stat(infilename, &fileinfo);
     103        infile = fopen(infilename, "rb");
     104        if (!infile) {
     105                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
     106                exit(-1);
     107        }
     108
     109        if (argc < 3) outfile = stdout;
     110        else {
     111                outfilename = argv[2];
     112                outfile = fopen(outfilename, "wb");
     113                if (!outfile) {
     114                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
     115                        exit(-1);
     116                }
     117        }
     118
     119//      PERF_SEC_BIND(1);
     120
     121        PERF_SEC_INIT(parser_timer);
     122
     123        do_process(infile, outfile);
     124
     125        PERF_SEC_DUMP(parser_timer);
     126
     127        PERF_SEC_DESTROY(parser_timer);
     128
     129        fclose(infile);
     130        fclose(outfile);
     131
     132        return(0);
     133}
     134
     135/* s2p Definitions */
     136static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
     137  s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
     138        basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
     139}
     140
     141static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
     142  s2p_do_block(U8, basis_bits);
     143  basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
     144  basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
     145  basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
     146  basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
     147  basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
     148  basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
     149  basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
     150  basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     151}
     152
     153/* StreamScan & Post Process Definitions */
     154static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block) {
     155
    101156        int blk;
    102157        int block_pos = 0;
     158        int pos;
     159
    103160        for (blk = 0; blk < blk_count; blk++) {
    104161                ScanBlock s = stream[blk];
    105162                while(s) {
    106                         int code = (ProcessPos(cfzl(s) + block_pos));
    107                         if (code) return code;
     163                        pos = (cfzl(s) + block_pos);
     164                        int code = (ProcessPos(pos));
     165                        if (code) {
     166                                *error_pos_in_block = pos;
     167                                return code; // error code
     168                        }
    108169                        s = s & (s-1);  // clear rightmost bit.
    109170                }
     
    128189        int block_pos = block_base + pos;
    129190        if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[block_pos]) == 0){
    130               ReportError("name start error", pos);
    131               exit(-1);
     191              return XMLTestSuiteError::NAME_START;
    132192        }
    133193        return 0;
     
    137197        int block_pos = block_base + pos;
    138198        if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[block_pos]) == 0){
    139               ReportError("name error", pos);
    140               exit(-1);
     199                  return XMLTestSuiteError::NAME;
    141200        }
    142201        return 0;
     
    146205        int block_pos = block_base + pos;
    147206        int file_pos = block_pos+buffer_base;
    148         printf ("%s:%i\n",__FUNCTION__,pos);
    149207        if (at_XxMmLll<ASCII>((unsigned char*)&source[block_pos]) && (source[block_pos+3]=='?' || source[block_pos+3]<= ' ')) {
    150208              // "<?xml" legal at start of file.
    151               if ((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0])) return 0;
    152               ReportError("[Xx][Mm][Ll] illegal as PI name", pos);
    153               exit(-1);
     209              if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
     210                  return XMLTestSuiteError::XMLPINAME;
     211              }
    154212        }
    155213        return 0;
     
    159217        int block_pos = block_base + pos;
    160218        if (!at_CDATA1<ASCII>((unsigned char*)&source[block_pos])){
    161               ReportError("CDATA error", pos);
    162               exit(-1);
     219                  return XMLTestSuiteError::CDATA;
    163220        }
    164221        return 0;
     
    169226        unsigned char* s = (unsigned char*)&source[block_pos];
    170227        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    171               ReportError("Undefined reference", pos);
    172               exit(-1);
     228              return XMLTestSuiteError::UNDEFREF;
    173229        }
    174230        return 0;
     
    182238          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    183239          if (ch_val> 0x10FFFF ){
    184             ReportError("Illegal character reference", pos);
    185             exit(-1);
     240                return XMLTestSuiteError::CHARREF;
    186241          }
    187242          s++;
    188243        }
    189244        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    190           ReportError("Illegal character reference", pos);
    191           exit(-1);
     245          return XMLTestSuiteError::CHARREF;
    192246        }
    193247        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    194           ReportError("Illegal XML 1.0 character reference", pos);
    195           exit(-1);
     248          return XMLTestSuiteError::XML10CHARREF;
    196249        }
    197250        return 0;
     
    205258          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    206259          if (ch_val> 0x10FFFF ){
    207             ReportError("Illegal character reference", pos);
    208             exit(-1);
     260                        return XMLTestSuiteError::CHARREF;
    209261          }
    210262          s++;
    211263        }
    212264        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    213           ReportError("Illegal character reference", pos);
    214           exit(-1);
     265                  return XMLTestSuiteError::CHARREF;
    215266        }
    216267        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    217           ReportError("Illegal XML 1.0 character reference", pos);
    218           exit(-1);
     268                  return XMLTestSuiteError::XML10CHARREF;
    219269        }
    220270        return 0;
     
    241291          }
    242292          if (ch_val==60){
    243             ReportError("Attribute values contain '<' characters after reference expansion", pos);
    244             exit(-1);
     293            return XMLTestSuiteError::ATTREF;
    245294          }
    246295        }
    247296        else if(at_Ref_lt<ASCII>(s)){
    248           ReportError("Attribute values contain '<' characters after reference expansion", pos);
    249           exit(-1);
    250         }
    251         return 0;
    252 }
    253 
    254 
    255 
    256 @global
    257 
    258 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    259   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    260         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    261 }
    262 
    263 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    264   s2p_do_block(U8, basis_bits);
    265   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    266   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    267   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    268   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    269   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    270   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    271   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    272   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     297          return XMLTestSuiteError::ATTREF;
     298        }
     299        return 0;
    273300}
    274301
     
    300327static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail){
    301328
    302     //NOTE: We are NOT handling the case for an extremely long symbol name (length > 1541).
    303     //      TagMatcher will crash if we feed a long symbol name.
    304     //      Sample file: test/long_sym_name.xml
    305 
    306             tracker.StoreNewlines(lex.LF);
    307 
    308             if ( bitblock_has_bit(tag_Callouts.ElemName_starts))
    309             {
    310                 StreamScan((ScanBlock *) &tag_Callouts.ElemName_starts, sizeof(BitBlock)/sizeof(ScanBlock), ElemStrt_check);
    311             }
    312 
    313             if ( bitblock_has_bit(tag_Callouts.ElemName_ends) )
    314             {
    315                 StreamScan((ScanBlock *) &tag_Callouts.ElemName_ends, sizeof(BitBlock)/sizeof(ScanBlock), ElemEnd_check);
    316             }
    317 
    318             do_symbol_table_lookup();
    319 
    320                 if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
    321                   StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    322                   StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
    323                 }
    324 
    325                 if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    326                   StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
    327                 }
    328 
    329                 if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    330                   StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
    331                 }
    332 
    333                 if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    334                   StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
    335                 }
    336 
    337                 if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    338                   StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
    339                 }
    340 
    341                 if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    342                   StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
    343                 }
    344 
    345                 if (bitblock_has_bit(check_streams.att_refs)){
    346                   StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    347                 }
    348 
    349                 error_tracker.If_Error_Report_First();
    350 
    351                 matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    352                 tracker.AdvanceBlock();
     329    tracker.StoreNewlines(lex.LF);
     330    int rv, error_pos_in_block, error_line, error_column;
     331
     332    if ( bitblock_has_bit(tag_Callouts.ElemName_starts))
     333    {
     334        StreamScan((ScanBlock *) &tag_Callouts.ElemName_starts, sizeof(BitBlock)/sizeof(ScanBlock), ElemStrt_check, &error_pos_in_block);
     335    }
     336
     337    if ( bitblock_has_bit(tag_Callouts.ElemName_ends) )
     338    {
     339        StreamScan((ScanBlock *) &tag_Callouts.ElemName_ends, sizeof(BitBlock)/sizeof(ScanBlock), ElemEnd_check, &error_pos_in_block);
     340    }
     341
     342    do_symbol_table_lookup();
     343
     344    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
     345      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check, &error_pos_in_block);
     346      if (rv) {
     347              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     348              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     349              exit(-1);
     350      }
     351
     352      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check, &error_pos_in_block);
     353      if (rv) {
     354              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     355              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     356              exit(-1);
     357      }
     358    }
     359
     360    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
     361      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check, &error_pos_in_block);
     362      if (rv) {
     363              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     364              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     365              exit(-1);
     366      }
     367    }
     368
     369    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
     370      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check, &error_pos_in_block);
     371      if (rv) {
     372              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     373              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     374              exit(-1);
     375      }
     376    }
     377
     378    if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
     379      rv = StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check, &error_pos_in_block);
     380      if (rv) {
     381              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     382              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     383              exit(-1);
     384      }
     385    }
     386
     387    if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
     388      rv = StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check, &error_pos_in_block);
     389      if (rv) {
     390              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     391              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     392              exit(-1);
     393      }
     394    }
     395
     396    if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
     397      rv = StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check, &error_pos_in_block);
     398      if (rv) {
     399              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     400              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     401              exit(-1);
     402      }
     403    }
     404
     405    if (bitblock_has_bit(check_streams.att_refs)){
     406      rv = StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check, &error_pos_in_block);
     407      if (rv) {
     408              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     409              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     410              exit(-1);
     411      }
     412    }
     413
     414    if(error_tracker.Has_Noted_Error()){
     415            tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     416            ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
     417            exit(-1);
     418    }
     419
     420    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     421    tracker.AdvanceBlock();
    353422}
    354423
     
    423492
    424493/* Full Buffers */
    425     int block_segment_num = 0;
    426494    while (chars_avail == BUFFER_SIZE) {
    427495      PERF_SEC_START(parser_timer);
     
    444512      buf_pos += chars_avail;
    445513      buffer_base = buf_pos;
    446       block_segment_num++;
    447514    }
    448515/* Final Partial Buffer */
     
    488555    ls_symbol_table.clear();
    489556}
    490 
    491 
    492 
    493 int
    494 main(int argc, char * argv[]) {
    495         char * infilename, * outfilename;
    496         FILE *infile, *outfile;
    497         struct stat fileinfo;
    498 
    499         if (argc < 2) {
    500                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    501                 exit(-1);
    502         }
    503 
    504         infilename = argv[1];
    505         stat(infilename, &fileinfo);
    506         infile = fopen(infilename, "rb");
    507         if (!infile) {
    508                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    509                 exit(-1);
    510         }
    511 
    512         if (argc < 3) outfile = stdout;
    513         else {
    514                 outfilename = argv[2];
    515                 outfile = fopen(outfilename, "wb");
    516                 if (!outfile) {
    517                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    518                         exit(-1);
    519                 }
    520         }
    521 
    522 //      PERF_SEC_BIND(1);
    523 
    524         PERF_SEC_INIT(parser_timer);
    525 
    526         do_process(infile, outfile);
    527 
    528         PERF_SEC_DUMP(parser_timer);
    529 
    530         PERF_SEC_DESTROY(parser_timer);
    531 
    532         fclose(infile);
    533         fclose(outfile);
    534 
    535         printf ("Done procressing\n");
    536         return(0);
    537 }
  • proto/SymbolTable/symtab_pbgs_div_template.cpp

    r1442 r1457  
    1 #define USE_FUNCTION_TEMPLATES
    21#define TEMPLATED_SIMD_LIB
    3 
    4 #define DEBUG 0
    5 #define BLOCK_SIZE (sizeof(SIMD_type) * 8)
    6 #define SEGMENT_BLOCKS 12
    7 #define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    8 #define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
    92
    103#include <stdio.h>
     
    2417using namespace std;
    2518
     19#define DEBUG 0
     20#define BLOCK_SIZE (sizeof(SIMD_type) * 8)
     21#define SEGMENT_BLOCKS 12
     22#define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
     23#define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
     24
    2625#include <../carryQ.h>
    2726#include <xmldecl.h>
    28 #include <xml_error.c>
    29 #include <xmldecl.c>
    3027#include <namechars.h>
    31 
    3228#include <../perflib/perfsec.h>
    3329#include <../s2p.h>
     
    3531#include <TagMatcher.h>
    3632#include <LineColTracker.h>
     33#include <ErrorUtil.h>
     34#include <ErrorTracker.h>
     35#include <XMLTestSuiteError.h>
     36
     37#include <xml_error.c>
     38#include <ErrorUtil.cpp>
     39#include <ErrorTracker.cpp>
     40#include <XMLTestSuiteError.cpp>
    3741
    3842#ifdef BUFFER_PROFILING
     
    5054#endif
    5155
     56
    5257int block_base=0;
    5358int buffer_base=0;
     
    5560char * source;
    5661LineColTracker tracker;
    57 
    58 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    59   int error_line, error_column;
    60   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    61   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    62 }
    63 
    64 class ErrorTracker {
    65 public:
    66     ErrorTracker() { noted_pos_in_block = -1;}
    67 
    68     inline void NoteError(const char * error_msg, BitBlock err_strm) {
    69       int pos_in_block = count_forward_zeroes(err_strm);
    70       if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
    71         noted_pos_in_block = pos_in_block;
    72         noted_error = error_msg;
    73       }
    74     }
    75 
    76     inline void If_Error_Report_First() {
    77       if (noted_pos_in_block > -1) {
    78               int error_line, error_column;
    79               ReportError(noted_error, noted_pos_in_block);
    80               exit(-1);
    81       }
    82     }
    83 
    84 private:
    85   const char * noted_error;
    86   int noted_pos_in_block;
    87 };
    88 
    89 
    9062TagMatcher matcher;
    9163BitBlock EOF_mask = simd_const_1(1);
    92 
    9364ErrorTracker error_tracker;
     65
    9466BitBlock elem_ends;
    9567int last_elem_start;
     
    10072PBGSDivSymbolTable pbgs_symbol_table;
    10173
    102 
     74/* StreamScan & Post Process Declarations */
     75//      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
     76static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
     77
     78static inline int NameStrt_check(int pos);
     79static inline int Name_check(int pos);
     80static inline int PIName_check(int pos);
     81static inline int CD_check(int pos);
     82static inline int GenRef_check(int pos);
     83static inline int HexRef_check(int pos);
     84static inline int DecRef_check(int pos);
     85static inline int AttRef_check(int pos);
     86
     87@global
     88
     89static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
     90static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     91static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
     92void do_process(FILE *infile, FILE *outfile);
    10393static inline int ScanForwardPos(BitBlock * block, int pos);
    10494static inline int compute_hash_value (int lgth, int start);
     
    10797template <int L> static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    10898
     99int main(int argc, char * argv[]) {
     100        char * infilename, * outfilename;
     101        FILE *infile, *outfile;
     102        struct stat fileinfo;
     103
     104        if (argc < 2) {
     105                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
     106                exit(-1);
     107        }
     108
     109        infilename = argv[1];
     110        stat(infilename, &fileinfo);
     111        infile = fopen(infilename, "rb");
     112        if (!infile) {
     113                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
     114                exit(-1);
     115        }
     116
     117        if (argc < 3) outfile = stdout;
     118        else {
     119                outfilename = argv[2];
     120                outfile = fopen(outfilename, "wb");
     121                if (!outfile) {
     122                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
     123                        exit(-1);
     124                }
     125        }
     126
     127//      PERF_SEC_BIND(1);
     128
     129        PERF_SEC_INIT(parser_timer);
     130
     131        do_process(infile, outfile);
     132
     133        PERF_SEC_DUMP(parser_timer);
     134
     135        PERF_SEC_DESTROY(parser_timer);
     136
     137        fclose(infile);
     138        fclose(outfile);
     139
     140        return(0);
     141}
     142
     143/* s2p Definitions */
     144static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
     145  s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
     146        basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
     147}
     148
     149static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
     150  s2p_do_block(U8, basis_bits);
     151  basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
     152  basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
     153  basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
     154  basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
     155  basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
     156  basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
     157  basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
     158  basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     159}
    109160
    110161static inline int ScanForwardPos(BitBlock * block, int pos)
     
    135186}
    136187
    137 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int)) {
     188/* StreamScan & Post Process Definitions */
     189static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block) {
     190
    138191        int blk;
    139192        int block_pos = 0;
     193        int pos;
     194
    140195        for (blk = 0; blk < blk_count; blk++) {
    141196                ScanBlock s = stream[blk];
    142197                while(s) {
    143                         int code = (ProcessPos(cfzl(s) + block_pos));
    144                         if (code) return code;
     198                        pos = (cfzl(s) + block_pos);
     199                        int code = (ProcessPos(pos));
     200                        if (code) {
     201                                *error_pos_in_block = pos;
     202                                return code; // error code
     203                        }
    145204                        s = s & (s-1);  // clear rightmost bit.
    146205                }
     
    238297        int block_pos = block_base + pos;
    239298        if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[block_pos]) == 0){
    240               ReportError("name start error", pos);
    241               exit(-1);
     299              return XMLTestSuiteError::NAME_START;
    242300        }
    243301        return 0;
     
    247305        int block_pos = block_base + pos;
    248306        if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[block_pos]) == 0){
    249               ReportError("name error", pos);
    250               exit(-1);
     307                  return XMLTestSuiteError::NAME;
    251308        }
    252309        return 0;
     
    256313        int block_pos = block_base + pos;
    257314        int file_pos = block_pos+buffer_base;
    258         printf ("%s:%i\n",__FUNCTION__,pos);
    259315        if (at_XxMmLll<ASCII>((unsigned char*)&source[block_pos]) && (source[block_pos+3]=='?' || source[block_pos+3]<= ' ')) {
    260316              // "<?xml" legal at start of file.
    261               if ((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0])) return 0;
    262               ReportError("[Xx][Mm][Ll] illegal as PI name", pos);
    263               exit(-1);
     317              if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
     318                  return XMLTestSuiteError::XMLPINAME;
     319              }
    264320        }
    265321        return 0;
     
    269325        int block_pos = block_base + pos;
    270326        if (!at_CDATA1<ASCII>((unsigned char*)&source[block_pos])){
    271               ReportError("CDATA error", pos);
    272               exit(-1);
     327                  return XMLTestSuiteError::CDATA;
    273328        }
    274329        return 0;
     
    279334        unsigned char* s = (unsigned char*)&source[block_pos];
    280335        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    281               ReportError("Undefined reference", pos);
    282               exit(-1);
     336              return XMLTestSuiteError::UNDEFREF;
    283337        }
    284338        return 0;
     
    292346          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    293347          if (ch_val> 0x10FFFF ){
    294             ReportError("Illegal character reference", pos);
    295             exit(-1);
     348                return XMLTestSuiteError::CHARREF;
    296349          }
    297350          s++;
    298351        }
    299352        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    300           ReportError("Illegal character reference", pos);
    301           exit(-1);
     353          return XMLTestSuiteError::CHARREF;
    302354        }
    303355        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    304           ReportError("Illegal XML 1.0 character reference", pos);
    305           exit(-1);
     356          return XMLTestSuiteError::XML10CHARREF;
    306357        }
    307358        return 0;
     
    315366          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    316367          if (ch_val> 0x10FFFF ){
    317             ReportError("Illegal character reference", pos);
    318             exit(-1);
     368                        return XMLTestSuiteError::CHARREF;
    319369          }
    320370          s++;
    321371        }
    322372        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    323           ReportError("Illegal character reference", pos);
    324           exit(-1);
     373                  return XMLTestSuiteError::CHARREF;
    325374        }
    326375        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    327           ReportError("Illegal XML 1.0 character reference", pos);
    328           exit(-1);
     376                  return XMLTestSuiteError::XML10CHARREF;
    329377        }
    330378        return 0;
     
    351399          }
    352400          if (ch_val==60){
    353             ReportError("Attribute values contain '<' characters after reference expansion", pos);
    354             exit(-1);
     401            return XMLTestSuiteError::ATTREF;
    355402          }
    356403        }
    357404        else if(at_Ref_lt<ASCII>(s)){
    358           ReportError("Attribute values contain '<' characters after reference expansion", pos);
    359           exit(-1);
    360         }
    361         return 0;
    362 }
    363 
    364 
    365 
    366 @global
    367 
    368 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    369   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    370         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    371 }
    372 
    373 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    374   s2p_do_block(U8, basis_bits);
    375   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    376   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    377   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    378   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    379   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    380   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    381   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    382   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     405          return XMLTestSuiteError::ATTREF;
     406        }
     407        return 0;
    383408}
    384409
    385410static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail){
    386411
    387 
    388     //NOTE: We are NOT handling the case for an extremely long symbol name (length > 1541).
    389     //      TagMatcher will crash if we feed a long symbol name.
    390     //      Sample file: test/long_sym_name.xml
    391 
    392412    tracker.StoreNewlines(lex.LF);
     413    int rv, error_pos_in_block, error_line, error_column;
    393414    elem_ends = tag_Callouts.ElemName_ends;
    394415    hashvalues[1] = hash_data.Hash_value;
     
    459480    memmove (&hashvalues[0], &hashvalues[1], 16);
    460481
     482
    461483    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
    462       StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    463       StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
     484      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check, &error_pos_in_block);
     485      if (rv) {
     486              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     487              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     488              exit(-1);
     489      }
     490
     491      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check, &error_pos_in_block);
     492      if (rv) {
     493              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     494              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     495              exit(-1);
     496      }
    464497    }
    465498
    466499    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    467       StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
     500      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check, &error_pos_in_block);
     501      if (rv) {
     502              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     503              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     504              exit(-1);
     505      }
    468506    }
    469507
    470508    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    471       StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
     509      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check, &error_pos_in_block);
     510      if (rv) {
     511              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     512              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     513              exit(-1);
     514      }
    472515    }
    473516
    474517    if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    475       StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
     518      rv = StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check, &error_pos_in_block);
     519      if (rv) {
     520              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     521              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     522              exit(-1);
     523      }
    476524    }
    477525
    478526    if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    479       StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
     527      rv = StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check, &error_pos_in_block);
     528      if (rv) {
     529              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     530              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     531              exit(-1);
     532      }
    480533    }
    481534
    482535    if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    483       StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
     536      rv = StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check, &error_pos_in_block);
     537      if (rv) {
     538              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     539              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     540              exit(-1);
     541      }
    484542    }
    485543
    486544    if (bitblock_has_bit(check_streams.att_refs)){
    487       StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    488     }
    489 
    490     error_tracker.If_Error_Report_First();
     545      rv = StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check, &error_pos_in_block);
     546      if (rv) {
     547              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     548              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     549              exit(-1);
     550      }
     551    }
     552
     553    if(error_tracker.Has_Noted_Error()){
     554            tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     555            ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
     556            exit(-1);
     557    }
    491558
    492559    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     
    571638
    572639/* Full Buffers */
    573     int block_segment_num = 0;
    574640    while (chars_avail == BUFFER_SIZE) {
    575641      PERF_SEC_START(parser_timer);
     
    594660      buf_pos += chars_avail;
    595661      buffer_base = buf_pos;
    596       block_segment_num++;
    597662
    598663  }
     
    634699//    pbgs_symbol_table.Print_Symbol_Table_Distribution();
    635700}
    636 
    637 
    638 
    639 int
    640 main(int argc, char * argv[]) {
    641         char * infilename, * outfilename;
    642         FILE *infile, *outfile;
    643         struct stat fileinfo;
    644 
    645         if (argc < 2) {
    646                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    647                 exit(-1);
    648         }
    649 
    650         infilename = argv[1];
    651         stat(infilename, &fileinfo);
    652         infile = fopen(infilename, "rb");
    653         if (!infile) {
    654                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    655                 exit(-1);
    656         }
    657 
    658         if (argc < 3) outfile = stdout;
    659         else {
    660                 outfilename = argv[2];
    661                 outfile = fopen(outfilename, "wb");
    662                 if (!outfile) {
    663                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    664                         exit(-1);
    665                 }
    666         }
    667 
    668 //      PERF_SEC_BIND(1);
    669 
    670         PERF_SEC_INIT(parser_timer);
    671 
    672         do_process(infile, outfile);
    673 
    674         PERF_SEC_DUMP(parser_timer);
    675 
    676         PERF_SEC_DESTROY(parser_timer);
    677 
    678         fclose(infile);
    679         fclose(outfile);
    680         return(0);
    681 }
  • proto/SymbolTable/symtab_pbgs_identity_template.cpp

    r1456 r1457  
    1 #define USE_FUNCTION_TEMPLATES
    21#define TEMPLATED_SIMD_LIB
    32
    4 #define DEBUG 0
    5 #define BLOCK_SIZE (sizeof(SIMD_type) * 8)
    6 #define SEGMENT_BLOCKS 12
    7 #define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    8 #define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
     3#define USE_MASK_COMPARE    //Comparison using masking technique.
    94
    105#include <stdio.h>
     
    1510#include <../lib_simd.h>
    1611#include <pbgs_identity_symbol_table.h>
    17 
    1812#include <queue>
    1913#include <string>
     
    2519using namespace std;
    2620
     21#define DEBUG 0
     22#define BLOCK_SIZE (sizeof(SIMD_type) * 8)
     23#define SEGMENT_BLOCKS 12
     24#define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
     25#define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
     26
    2727#include <../carryQ.h>
    2828#include <xmldecl.h>
    29 #include <xml_error.c>
    30 #include <xmldecl.c>
    3129#include <namechars.h>
    32 
    3330#include <../perflib/perfsec.h>
    3431#include <../s2p.h>
     
    3633#include <TagMatcher.h>
    3734#include <LineColTracker.h>
     35#include <ErrorUtil.h>
     36#include <ErrorTracker.h>
     37#include <XMLTestSuiteError.h>
     38
     39#include <xml_error.c>
     40#include <ErrorUtil.cpp>
     41#include <ErrorTracker.cpp>
     42#include <XMLTestSuiteError.cpp>
    3843
    3944#ifdef BUFFER_PROFILING
     
    5661char * source;
    5762LineColTracker tracker;
    58 
    59 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    60   int error_line, error_column;
    61   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    62   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    63 }
    64 
    65 class ErrorTracker {
    66 public:
    67     ErrorTracker() { noted_pos_in_block = -1;}
    68 
    69     inline void NoteError(const char * error_msg, BitBlock err_strm) {
    70       int pos_in_block = count_forward_zeroes(err_strm);
    71       if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
    72         noted_pos_in_block = pos_in_block;
    73         noted_error = error_msg;
    74       }
    75     }
    76 
    77     inline void If_Error_Report_First() {
    78       if (noted_pos_in_block > -1) {
    79               int error_line, error_column;
    80               ReportError(noted_error, noted_pos_in_block);
    81               exit(-1);
    82       }
    83     }
    84 
    85 private:
    86   const char * noted_error;
    87   int noted_pos_in_block;
    88 };
    89 
    90 
    9163TagMatcher matcher;
    9264BitBlock EOF_mask = simd_const_1(1);
    93 
    9465ErrorTracker error_tracker;
     66
    9567BitBlock elem_starts;
    9668int previous_block_last_elem_start;
     
    9971vector <int> gids;
    10072PBGSIdentitySymbolTable pbgs_symbol_table;
     73
     74/* StreamScan & Post Process Declarations */
     75//      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
     76static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
     77
     78static inline int NameStrt_check(int pos);
     79static inline int Name_check(int pos);
     80static inline int PIName_check(int pos);
     81static inline int CD_check(int pos);
     82static inline int GenRef_check(int pos);
     83static inline int HexRef_check(int pos);
     84static inline int DecRef_check(int pos);
     85static inline int AttRef_check(int pos);
     86
     87@global
     88
     89static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
     90static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     91static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
     92void do_process(FILE *infile, FILE *outfile);
     93static inline int ScanBackwardPos(BitBlock * block, int pos);
     94static inline int compute_hash_value (int lgth, int start);
     95template <int L> static inline int ElemEnd_grouping(int pos);
     96template <int L> static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
     97
     98int main(int argc, char * argv[]) {
     99        char * infilename, * outfilename;
     100        FILE *infile, *outfile;
     101        struct stat fileinfo;
     102
     103        if (argc < 2) {
     104                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
     105                exit(-1);
     106        }
     107
     108        infilename = argv[1];
     109        stat(infilename, &fileinfo);
     110        infile = fopen(infilename, "rb");
     111        if (!infile) {
     112                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
     113                exit(-1);
     114        }
     115
     116        if (argc < 3) outfile = stdout;
     117        else {
     118                outfilename = argv[2];
     119                outfile = fopen(outfilename, "wb");
     120                if (!outfile) {
     121                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
     122                        exit(-1);
     123                }
     124        }
     125
     126//      PERF_SEC_BIND(1);
     127
     128        PERF_SEC_INIT(parser_timer);
     129
     130        do_process(infile, outfile);
     131
     132        PERF_SEC_DUMP(parser_timer);
     133
     134        PERF_SEC_DESTROY(parser_timer);
     135
     136        fclose(infile);
     137        fclose(outfile);
     138
     139        return(0);
     140}
     141
     142/* s2p Definitions */
     143static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
     144  s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
     145        basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
     146}
     147
     148static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
     149  s2p_do_block(U8, basis_bits);
     150  basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
     151  basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
     152  basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
     153  basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
     154  basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
     155  basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
     156  basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
     157  basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     158}
    101159
    102160static inline int ScanBackwardPos(BitBlock * block, int pos)
     
    117175}
    118176
    119 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int)) {
     177/* StreamScan & Post Process Definitions */
     178static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block) {
     179
    120180        int blk;
    121181        int block_pos = 0;
     182        int pos;
     183
    122184        for (blk = 0; blk < blk_count; blk++) {
    123185                ScanBlock s = stream[blk];
    124186                while(s) {
    125                         int code = (ProcessPos(cfzl(s) + block_pos));
    126                         if (code) return code;
     187                        pos = (cfzl(s) + block_pos);
     188                        int code = (ProcessPos(pos));
     189                        if (code) {
     190                                *error_pos_in_block = pos;
     191                                return code; // error code
     192                        }
    127193                        s = s & (s-1);  // clear rightmost bit.
    128194                }
     
    201267        int block_pos = block_base + pos;
    202268        if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[block_pos]) == 0){
    203               ReportError("name start error", pos);
    204               exit(-1);
     269              return XMLTestSuiteError::NAME_START;
    205270        }
    206271        return 0;
     
    210275        int block_pos = block_base + pos;
    211276        if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[block_pos]) == 0){
    212               ReportError("name error", pos);
    213               exit(-1);
     277                  return XMLTestSuiteError::NAME;
    214278        }
    215279        return 0;
     
    219283        int block_pos = block_base + pos;
    220284        int file_pos = block_pos+buffer_base;
    221         printf ("%s:%i\n",__FUNCTION__,pos);
    222285        if (at_XxMmLll<ASCII>((unsigned char*)&source[block_pos]) && (source[block_pos+3]=='?' || source[block_pos+3]<= ' ')) {
    223286              // "<?xml" legal at start of file.
    224               if ((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0])) return 0;
    225               ReportError("[Xx][Mm][Ll] illegal as PI name", pos);
    226               exit(-1);
     287              if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
     288                  return XMLTestSuiteError::XMLPINAME;
     289              }
    227290        }
    228291        return 0;
     
    232295        int block_pos = block_base + pos;
    233296        if (!at_CDATA1<ASCII>((unsigned char*)&source[block_pos])){
    234               ReportError("CDATA error", pos);
    235               exit(-1);
     297                  return XMLTestSuiteError::CDATA;
    236298        }
    237299        return 0;
     
    242304        unsigned char* s = (unsigned char*)&source[block_pos];
    243305        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    244               ReportError("Undefined reference", pos);
    245               exit(-1);
     306              return XMLTestSuiteError::UNDEFREF;
    246307        }
    247308        return 0;
     
    255316          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    256317          if (ch_val> 0x10FFFF ){
    257             ReportError("Illegal character reference", pos);
    258             exit(-1);
     318                return XMLTestSuiteError::CHARREF;
    259319          }
    260320          s++;
    261321        }
    262322        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    263           ReportError("Illegal character reference", pos);
    264           exit(-1);
     323          return XMLTestSuiteError::CHARREF;
    265324        }
    266325        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    267           ReportError("Illegal XML 1.0 character reference", pos);
    268           exit(-1);
     326          return XMLTestSuiteError::XML10CHARREF;
    269327        }
    270328        return 0;
     
    278336          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    279337          if (ch_val> 0x10FFFF ){
    280             ReportError("Illegal character reference", pos);
    281             exit(-1);
     338                        return XMLTestSuiteError::CHARREF;
    282339          }
    283340          s++;
    284341        }
    285342        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    286           ReportError("Illegal character reference", pos);
    287           exit(-1);
     343                  return XMLTestSuiteError::CHARREF;
    288344        }
    289345        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    290           ReportError("Illegal XML 1.0 character reference", pos);
    291           exit(-1);
     346                  return XMLTestSuiteError::XML10CHARREF;
    292347        }
    293348        return 0;
     
    314369          }
    315370          if (ch_val==60){
    316             ReportError("Attribute values contain '<' characters after reference expansion", pos);
    317             exit(-1);
     371            return XMLTestSuiteError::ATTREF;
    318372          }
    319373        }
    320374        else if(at_Ref_lt<ASCII>(s)){
    321           ReportError("Attribute values contain '<' characters after reference expansion", pos);
    322           exit(-1);
    323         }
    324         return 0;
    325 }
    326 
    327 
    328 
    329 @global
    330 
    331 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    332   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    333         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    334 }
    335 
    336 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    337   s2p_do_block(U8, basis_bits);
    338   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    339   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    340   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    341   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    342   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    343   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    344   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    345   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     375          return XMLTestSuiteError::ATTREF;
     376        }
     377        return 0;
    346378}
    347379
    348380static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail){
    349381
    350 
    351     //NOTE: We are NOT handling the case for an extremely long symbol name (length > 1541).
    352     //      TagMatcher will crash if we feed a long symbol name.
    353     //      Sample file: test/long_sym_name.xml
    354 
    355382    tracker.StoreNewlines(lex.LF);
     383    int rv, error_pos_in_block, error_line, error_column;
    356384    elem_starts = tag_Callouts.ElemName_starts;
    357385    hashvalues[1] = hash_data.Hash_value;
     
    449477
    450478    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
    451       StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    452       StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
     479      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check, &error_pos_in_block);
     480      if (rv) {
     481              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     482              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     483              exit(-1);
     484      }
     485
     486      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check, &error_pos_in_block);
     487      if (rv) {
     488              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     489              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     490              exit(-1);
     491      }
    453492    }
    454493
    455494    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    456       StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
     495      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check, &error_pos_in_block);
     496      if (rv) {
     497              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     498              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     499              exit(-1);
     500      }
    457501    }
    458502
    459503    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    460       StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
     504      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check, &error_pos_in_block);
     505      if (rv) {
     506              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     507              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     508              exit(-1);
     509      }
    461510    }
    462511
    463512    if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    464       StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
     513      rv = StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check, &error_pos_in_block);
     514      if (rv) {
     515              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     516              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     517              exit(-1);
     518      }
    465519    }
    466520
    467521    if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    468       StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
     522      rv = StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check, &error_pos_in_block);
     523      if (rv) {
     524              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     525              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     526              exit(-1);
     527      }
    469528    }
    470529
    471530    if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    472       StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
     531      rv = StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check, &error_pos_in_block);
     532      if (rv) {
     533              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     534              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     535              exit(-1);
     536      }
    473537    }
    474538
    475539    if (bitblock_has_bit(check_streams.att_refs)){
    476       StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    477     }
    478 
    479     error_tracker.If_Error_Report_First();
     540      rv = StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check, &error_pos_in_block);
     541      if (rv) {
     542              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     543              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     544              exit(-1);
     545      }
     546    }
     547
     548    if(error_tracker.Has_Noted_Error()){
     549            tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     550            ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
     551            exit(-1);
     552    }
    480553
    481554    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     
    560633
    561634/* Full Buffers */
    562     int block_segment_num = 0;
    563635    while (chars_avail == BUFFER_SIZE) {
    564636      PERF_SEC_START(parser_timer);
     
    582654      buf_pos += chars_avail;
    583655      buffer_base = buf_pos;
    584       block_segment_num++;
    585656
    586657  }
     
    621692//    pbgs_symbol_table.Print_Symbol_Table_Distribution();
    622693}
    623 
    624 
    625 
    626 int
    627 main(int argc, char * argv[]) {
    628         char * infilename, * outfilename;
    629         FILE *infile, *outfile;
    630         struct stat fileinfo;
    631 
    632         if (argc < 2) {
    633                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    634                 exit(-1);
    635         }
    636 
    637         infilename = argv[1];
    638         stat(infilename, &fileinfo);
    639         infile = fopen(infilename, "rb");
    640         if (!infile) {
    641                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    642                 exit(-1);
    643         }
    644 
    645         if (argc < 3) outfile = stdout;
    646         else {
    647                 outfilename = argv[2];
    648                 outfile = fopen(outfilename, "wb");
    649                 if (!outfile) {
    650                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    651                         exit(-1);
    652                 }
    653         }
    654 
    655 //      PERF_SEC_BIND(1);
    656 
    657         PERF_SEC_INIT(parser_timer);
    658 
    659         do_process(infile, outfile);
    660 
    661         PERF_SEC_DUMP(parser_timer);
    662 
    663         PERF_SEC_DESTROY(parser_timer);
    664 
    665         fclose(infile);
    666         fclose(outfile);
    667 
    668         printf ("Done procressing\n");
    669         return(0);
    670 }
  • proto/SymbolTable/symtab_pbgs_log_template.cpp

    r1442 r1457  
    1 #define USE_FUNCTION_TEMPLATES
    21#define TEMPLATED_SIMD_LIB
    32
    43#define USE_MASK_COMPARE    //Comparison using masking technique.
     4
     5#include <stdio.h>
     6#include <stdlib.h>
     7#include <errno.h>
     8#include <sys/types.h>
     9#include <sys/stat.h>
     10#include <../lib_simd.h>
     11#include <pbgs_log_symbol_table.h>
     12#include <queue>
     13#include <string>
     14
     15typedef long ScanBlock;
     16typedef SIMD_type BytePack;
     17typedef SIMD_type BitBlock;
     18
     19using namespace std;
    520
    621#define DEBUG 0
     
    1025#define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
    1126
    12 #include <stdio.h>
    13 #include <stdlib.h>
    14 #include <errno.h>
    15 #include <sys/types.h>
    16 #include <sys/stat.h>
    17 
    18 #include <../lib_simd.h>
    19 #include <pbgs_log_symbol_table.h>
    20 #include <queue>
    21 #include <string>
    22 
    23 typedef long ScanBlock;
    24 typedef SIMD_type BytePack;
    25 typedef SIMD_type BitBlock;
    26 
    27 using namespace std;
    28 
    2927#include <../carryQ.h>
    3028#include <xmldecl.h>
    31 #include <xml_error.c>
    32 #include <xmldecl.c>
    3329#include <namechars.h>
    34 
    3530#include <../perflib/perfsec.h>
    3631#include <../s2p.h>
     
    3833#include <TagMatcher.h>
    3934#include <LineColTracker.h>
     35#include <ErrorUtil.h>
     36#include <ErrorTracker.h>
     37#include <XMLTestSuiteError.h>
     38
     39#include <xml_error.c>
     40#include <ErrorUtil.cpp>
     41#include <ErrorTracker.cpp>
     42#include <XMLTestSuiteError.cpp>
    4043
    4144#ifdef BUFFER_PROFILING
     
    5861char * source;
    5962LineColTracker tracker;
    60 
    61 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    62   int error_line, error_column;
    63   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    64   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    65 }
    66 
    67 class ErrorTracker {
    68 public:
    69     ErrorTracker() { noted_pos_in_block = -1;}
    70 
    71     inline void NoteError(const char * error_msg, BitBlock err_strm) {
    72       int pos_in_block = count_forward_zeroes(err_strm);
    73       if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
    74         noted_pos_in_block = pos_in_block;
    75         noted_error = error_msg;
    76       }
    77     }
    78 
    79     inline void If_Error_Report_First() {
    80       if (noted_pos_in_block > -1) {
    81               int error_line, error_column;
    82               ReportError(noted_error, noted_pos_in_block);
    83               exit(-1);
    84       }
    85     }
    86 
    87 private:
    88   const char * noted_error;
    89   int noted_pos_in_block;
    90 };
    91 
    92 
    9363TagMatcher matcher;
    9464BitBlock EOF_mask = simd_const_1(1);
    95 
    9665ErrorTracker error_tracker;
    9766
     
    10372PBGSLogSymbolTable pbgs_symbol_table;
    10473
     74/* StreamScan & Post Process Declarations */
     75//      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
     76static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
     77
     78static inline int NameStrt_check(int pos);
     79static inline int Name_check(int pos);
     80static inline int PIName_check(int pos);
     81static inline int CD_check(int pos);
     82static inline int GenRef_check(int pos);
     83static inline int HexRef_check(int pos);
     84static inline int DecRef_check(int pos);
     85static inline int AttRef_check(int pos);
     86
     87@global
     88
     89static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
     90static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     91static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
     92void do_process(FILE *infile, FILE *outfile);
    10593static inline int ScanBackwardPos(BitBlock * block, int pos);
    10694static inline int compute_hash_value (int lgth, int start);
     
    10896template <int L> static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    10997
    110 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int)) {
     98int main(int argc, char * argv[]) {
     99        char * infilename, * outfilename;
     100        FILE *infile, *outfile;
     101        struct stat fileinfo;
     102
     103        if (argc < 2) {
     104                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
     105                exit(-1);
     106        }
     107
     108        infilename = argv[1];
     109        stat(infilename, &fileinfo);
     110        infile = fopen(infilename, "rb");
     111        if (!infile) {
     112                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
     113                exit(-1);
     114        }
     115
     116        if (argc < 3) outfile = stdout;
     117        else {
     118                outfilename = argv[2];
     119                outfile = fopen(outfilename, "wb");
     120                if (!outfile) {
     121                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
     122                        exit(-1);
     123                }
     124        }
     125
     126//      PERF_SEC_BIND(1);
     127
     128        PERF_SEC_INIT(parser_timer);
     129
     130        do_process(infile, outfile);
     131
     132        PERF_SEC_DUMP(parser_timer);
     133
     134        PERF_SEC_DESTROY(parser_timer);
     135
     136        fclose(infile);
     137        fclose(outfile);
     138
     139        return(0);
     140}
     141
     142/* s2p Definitions */
     143static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
     144  s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
     145        basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
     146}
     147
     148static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
     149  s2p_do_block(U8, basis_bits);
     150  basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
     151  basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
     152  basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
     153  basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
     154  basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
     155  basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
     156  basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
     157  basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     158}
     159
     160/* StreamScan & Post Process Definitions */
     161static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block) {
     162
    111163        int blk;
    112164        int block_pos = 0;
     165        int pos;
     166
    113167        for (blk = 0; blk < blk_count; blk++) {
    114168                ScanBlock s = stream[blk];
    115169                while(s) {
    116                         int code = (ProcessPos(cfzl(s) + block_pos));
    117                         if (code) return code;
     170                        pos = (cfzl(s) + block_pos);
     171                        int code = (ProcessPos(pos));
     172                        if (code) {
     173                                *error_pos_in_block = pos;
     174                                return code; // error code
     175                        }
    118176                        s = s & (s-1);  // clear rightmost bit.
    119177                }
     
    332390        int block_pos = block_base + pos;
    333391        if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[block_pos]) == 0){
    334               ReportError("name start error", pos);
    335               exit(-1);
     392              return XMLTestSuiteError::NAME_START;
    336393        }
    337394        return 0;
     
    341398        int block_pos = block_base + pos;
    342399        if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[block_pos]) == 0){
    343               ReportError("name error", pos);
    344               exit(-1);
     400                  return XMLTestSuiteError::NAME;
    345401        }
    346402        return 0;
     
    350406        int block_pos = block_base + pos;
    351407        int file_pos = block_pos+buffer_base;
    352         printf ("%s:%i\n",__FUNCTION__,pos);
    353408        if (at_XxMmLll<ASCII>((unsigned char*)&source[block_pos]) && (source[block_pos+3]=='?' || source[block_pos+3]<= ' ')) {
    354409              // "<?xml" legal at start of file.
    355               if ((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0])) return 0;
    356               ReportError("[Xx][Mm][Ll] illegal as PI name", pos);
    357               exit(-1);
     410              if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
     411                  return XMLTestSuiteError::XMLPINAME;
     412              }
    358413        }
    359414        return 0;
     
    363418        int block_pos = block_base + pos;
    364419        if (!at_CDATA1<ASCII>((unsigned char*)&source[block_pos])){
    365               ReportError("CDATA error", pos);
    366               exit(-1);
     420                  return XMLTestSuiteError::CDATA;
    367421        }
    368422        return 0;
     
    373427        unsigned char* s = (unsigned char*)&source[block_pos];
    374428        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    375               ReportError("Undefined reference", pos);
    376               exit(-1);
     429              return XMLTestSuiteError::UNDEFREF;
    377430        }
    378431        return 0;
     
    386439          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    387440          if (ch_val> 0x10FFFF ){
    388             ReportError("Illegal character reference", pos);
    389             exit(-1);
     441                return XMLTestSuiteError::CHARREF;
    390442          }
    391443          s++;
    392444        }
    393445        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    394           ReportError("Illegal character reference", pos);
    395           exit(-1);
     446          return XMLTestSuiteError::CHARREF;
    396447        }
    397448        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    398           ReportError("Illegal XML 1.0 character reference", pos);
    399           exit(-1);
     449          return XMLTestSuiteError::XML10CHARREF;
    400450        }
    401451        return 0;
     
    409459          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    410460          if (ch_val> 0x10FFFF ){
    411             ReportError("Illegal character reference", pos);
    412             exit(-1);
     461                        return XMLTestSuiteError::CHARREF;
    413462          }
    414463          s++;
    415464        }
    416465        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    417           ReportError("Illegal character reference", pos);
    418           exit(-1);
     466                  return XMLTestSuiteError::CHARREF;
    419467        }
    420468        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    421           ReportError("Illegal XML 1.0 character reference", pos);
    422           exit(-1);
     469                  return XMLTestSuiteError::XML10CHARREF;
    423470        }
    424471        return 0;
     
    445492          }
    446493          if (ch_val==60){
    447             ReportError("Attribute values contain '<' characters after reference expansion", pos);
    448             exit(-1);
     494            return XMLTestSuiteError::ATTREF;
    449495          }
    450496        }
    451497        else if(at_Ref_lt<ASCII>(s)){
    452           ReportError("Attribute values contain '<' characters after reference expansion", pos);
    453           exit(-1);
    454         }
    455         return 0;
    456 }
    457 
    458 
    459 
    460 @global
    461 
    462 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    463   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    464         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    465 }
    466 
    467 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    468   s2p_do_block(U8, basis_bits);
    469   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    470   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    471   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    472   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    473   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    474   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    475   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    476   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     498          return XMLTestSuiteError::ATTREF;
     499        }
     500        return 0;
    477501}
    478502
    479503static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail){
    480504
    481 
    482     //NOTE: We are NOT handling the case for an extremely long symbol name (length > 1541).
    483     //      TagMatcher will crash if we feed a long symbol name.
    484     //      Sample file: test/long_sym_name.xml
    485 
    486505    tracker.StoreNewlines(lex.LF);
     506    int rv, error_pos_in_block, error_line, error_column;
    487507    elem_starts = tag_Callouts.ElemName_starts;
    488508    hashvalues[1] = hash_data.Hash_value;
     
    525545
    526546    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
    527       StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    528       StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
     547      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check, &error_pos_in_block);
     548      if (rv) {
     549              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     550              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     551              exit(-1);
     552      }
     553
     554      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check, &error_pos_in_block);
     555      if (rv) {
     556              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     557              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     558              exit(-1);
     559      }
    529560    }
    530561
    531562    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    532       StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
     563      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check, &error_pos_in_block);
     564      if (rv) {
     565              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     566              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     567              exit(-1);
     568      }
    533569    }
    534570
    535571    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    536       StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
     572      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check, &error_pos_in_block);
     573      if (rv) {
     574              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     575              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     576              exit(-1);
     577      }
    537578    }
    538579
    539580    if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    540       StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
     581      rv = StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check, &error_pos_in_block);
     582      if (rv) {
     583              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     584              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     585              exit(-1);
     586      }
    541587    }
    542588
    543589    if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    544       StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
     590      rv = StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check, &error_pos_in_block);
     591      if (rv) {
     592              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     593              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     594              exit(-1);
     595      }
    545596    }
    546597
    547598    if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    548       StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
     599      rv = StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check, &error_pos_in_block);
     600      if (rv) {
     601              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     602              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     603              exit(-1);
     604      }
    549605    }
    550606
    551607    if (bitblock_has_bit(check_streams.att_refs)){
    552       StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    553     }
    554 
    555     error_tracker.If_Error_Report_First();
     608      rv = StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check, &error_pos_in_block);
     609      if (rv) {
     610              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     611              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     612              exit(-1);
     613      }
     614    }
     615
     616    if(error_tracker.Has_Noted_Error()){
     617            tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     618            ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
     619            exit(-1);
     620    }
    556621
    557622    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     
    566631    }
    567632    cout << endl;
    568 }
    569 
    570 static inline int test(int)
    571 {
    572     return 0;
    573633}
    574634
     
    636696
    637697/* Full Buffers */
    638     int block_segment_num = 0;
    639698    while (chars_avail == BUFFER_SIZE) {
    640699      PERF_SEC_START(parser_timer);
     
    659718      buf_pos += chars_avail;
    660719      buffer_base = buf_pos;
    661       block_segment_num++;
    662720
    663721  }
     
    699757//  print_GIDS();
    700758}
    701 
    702 
    703 
    704 int
    705 main(int argc, char * argv[]) {
    706         char * infilename, * outfilename;
    707         FILE *infile, *outfile;
    708         struct stat fileinfo;
    709 
    710         if (argc < 2) {
    711                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    712                 exit(-1);
    713         }
    714 
    715         infilename = argv[1];
    716         stat(infilename, &fileinfo);
    717         infile = fopen(infilename, "rb");
    718         if (!infile) {
    719                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    720                 exit(-1);
    721         }
    722 
    723         if (argc < 3) outfile = stdout;
    724         else {
    725                 outfilename = argv[2];
    726                 outfile = fopen(outfilename, "wb");
    727                 if (!outfile) {
    728                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    729                         exit(-1);
    730                 }
    731         }
    732 
    733 //      PERF_SEC_BIND(1);
    734 
    735         PERF_SEC_INIT(parser_timer);
    736 
    737         do_process(infile, outfile);
    738 
    739         PERF_SEC_DUMP(parser_timer);
    740 
    741         PERF_SEC_DESTROY(parser_timer);
    742 
    743         fclose(infile);
    744         fclose(outfile);
    745 
    746         printf ("Done procressing\n");
    747         return(0);
    748 }
  • proto/SymbolTable/symtab_stl_template.cpp

    r1442 r1457  
    88#include <../lib_simd.h>
    99#include <symtab.h>
    10 
    1110#include <queue>
    1211#include <string>
     12
     13typedef long ScanBlock;
     14typedef SIMD_type BytePack;
     15typedef SIMD_type BitBlock;
     16
     17using namespace std;
    1318
    1419#define DEBUG 0
     
    1823#define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
    1924
    20 typedef long ScanBlock;
    21 typedef SIMD_type BytePack;
    22 typedef SIMD_type BitBlock;
    23 
    24 using namespace std;
    25 
    2625#include <../carryQ.h>
    2726#include <xmldecl.h>
    28 #include <xml_error.c>
    29 #include <xmldecl.c>
    3027#include <namechars.h>
    31 
    3228#include <../perflib/perfsec.h>
    3329#include <../s2p.h>
     
    3531#include <TagMatcher.h>
    3632#include <LineColTracker.h>
     33#include <ErrorUtil.h>
     34#include <ErrorTracker.h>
     35#include <XMLTestSuiteError.h>
     36
     37#include <xml_error.c>
     38#include <ErrorUtil.cpp>
     39#include <ErrorTracker.cpp>
     40#include <XMLTestSuiteError.cpp>
    3741
    3842#ifdef BUFFER_PROFILING
     
    5256int block_base=0;
    5357int buffer_base=0;
    54 int buffer_last;
    5558char * source;
    5659LineColTracker tracker;
    57 
    58 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    59   int error_line, error_column;
    60   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    61   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    62 }
    63 
    64 class ErrorTracker {
    65 public:
    66     ErrorTracker() { noted_pos_in_block = -1;}
    67 
    68     inline void NoteError(const char * error_msg, BitBlock err_strm) {
    69       int pos_in_block = count_forward_zeroes(err_strm);
    70       if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
    71         noted_pos_in_block = pos_in_block;
    72         noted_error = error_msg;
    73       }
    74     }
    75 
    76     inline void If_Error_Report_First() {
    77       if (noted_pos_in_block > -1) {
    78               int error_line, error_column;
    79               ReportError(noted_error, noted_pos_in_block);
    80               exit(-1);
    81       }
    82     }
    83 
    84 private:
    85   const char * noted_error;
    86   int noted_pos_in_block;
    87 };
    88 
    89 
    9060TagMatcher matcher;
     61ErrorTracker error_tracker;
    9162BitBlock EOF_mask = simd_const_1(1);
    92 
    93 ErrorTracker error_tracker;
    9463
    9564queue <size_t> elem_starts_buf;
     
    9867SymbolTable symbol_table;
    9968
    100 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int)) {
     69/* StreamScan & Post Process Declarations */
     70//      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
     71static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
     72
     73static inline int NameStrt_check(int pos);
     74static inline int Name_check(int pos);
     75static inline int PIName_check(int pos);
     76static inline int CD_check(int pos);
     77static inline int GenRef_check(int pos);
     78static inline int HexRef_check(int pos);
     79static inline int DecRef_check(int pos);
     80static inline int AttRef_check(int pos);
     81
     82@global
     83
     84static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
     85static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     86static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
     87static inline void do_symbol_table_lookup();
     88
     89void do_process(FILE *infile, FILE *outfile);
     90
     91int main(int argc, char * argv[]) {
     92        char * infilename, * outfilename;
     93        FILE *infile, *outfile;
     94        struct stat fileinfo;
     95
     96        if (argc < 2) {
     97                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
     98                exit(-1);
     99        }
     100
     101        infilename = argv[1];
     102        stat(infilename, &fileinfo);
     103        infile = fopen(infilename, "rb");
     104        if (!infile) {
     105                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
     106                exit(-1);
     107        }
     108
     109        if (argc < 3) outfile = stdout;
     110        else {
     111                outfilename = argv[2];
     112                outfile = fopen(outfilename, "wb");
     113                if (!outfile) {
     114                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
     115                        exit(-1);
     116                }
     117        }
     118
     119//      PERF_SEC_BIND(1);
     120
     121        PERF_SEC_INIT(parser_timer);
     122
     123        do_process(infile, outfile);
     124
     125        PERF_SEC_DUMP(parser_timer);
     126
     127        PERF_SEC_DESTROY(parser_timer);
     128
     129        fclose(infile);
     130        fclose(outfile);
     131
     132        return(0);
     133}
     134
     135/* s2p Definitions */
     136static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
     137  s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
     138        basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
     139}
     140
     141static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
     142  s2p_do_block(U8, basis_bits);
     143  basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
     144  basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
     145  basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
     146  basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
     147  basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
     148  basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
     149  basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
     150  basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     151}
     152
     153
     154/* StreamScan & Post Process Definitions */
     155static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block) {
     156
    101157        int blk;
    102         int block_pos = 0;
     158        int block_pos = 0;
     159        int pos;
     160
    103161        for (blk = 0; blk < blk_count; blk++) {
    104                 ScanBlock s = stream[blk];
    105                 while(s) {
    106                         int code = (ProcessPos(cfzl(s) + block_pos));
    107                         if (code) return code;
     162                ScanBlock s = stream[blk];
     163                while(s) {
     164                        pos = (cfzl(s) + block_pos);
     165                        int code = (ProcessPos(pos));
     166                        if (code) {
     167                                *error_pos_in_block = pos;
     168                                return code; // error code
     169                        }
    108170                        s = s & (s-1);  // clear rightmost bit.
    109171                }
     
    126188
    127189static inline int NameStrt_check(int pos) {
    128         int block_pos = block_base + pos;
     190        int block_pos = block_base + pos;
    129191        if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[block_pos]) == 0){
    130               ReportError("name start error", pos);
    131               exit(-1);
     192              return XMLTestSuiteError::NAME_START;
    132193        }
    133194        return 0;
     
    137198        int block_pos = block_base + pos;
    138199        if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[block_pos]) == 0){
    139               ReportError("name error", pos);
    140               exit(-1);
     200                  return XMLTestSuiteError::NAME;
    141201        }
    142202        return 0;
     
    146206        int block_pos = block_base + pos;
    147207        int file_pos = block_pos+buffer_base;
    148         printf ("%s:%i\n",__FUNCTION__,pos);
    149208        if (at_XxMmLll<ASCII>((unsigned char*)&source[block_pos]) && (source[block_pos+3]=='?' || source[block_pos+3]<= ' ')) {
    150209              // "<?xml" legal at start of file.
    151               if ((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0])) return 0;
    152               ReportError("[Xx][Mm][Ll] illegal as PI name", pos);
    153               exit(-1);
     210              if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
     211                  return XMLTestSuiteError::XMLPINAME;
     212              }
    154213        }
    155214        return 0;
     
    159218        int block_pos = block_base + pos;
    160219        if (!at_CDATA1<ASCII>((unsigned char*)&source[block_pos])){
    161               ReportError("CDATA error", pos);
    162               exit(-1);
     220                  return XMLTestSuiteError::CDATA;
    163221        }
    164222        return 0;
     
    169227        unsigned char* s = (unsigned char*)&source[block_pos];
    170228        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    171               ReportError("Undefined reference", pos);
    172               exit(-1);
     229              return XMLTestSuiteError::UNDEFREF;
    173230        }
    174231        return 0;
     
    182239          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    183240          if (ch_val> 0x10FFFF ){
    184             ReportError("Illegal character reference", pos);
    185             exit(-1);
     241                return XMLTestSuiteError::CHARREF;
    186242          }
    187243          s++;
    188244        }
    189245        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    190           ReportError("Illegal character reference", pos);
    191           exit(-1);
     246          return XMLTestSuiteError::CHARREF;
    192247        }
    193248        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    194           ReportError("Illegal XML 1.0 character reference", pos);
    195           exit(-1);
     249          return XMLTestSuiteError::XML10CHARREF;
    196250        }
    197251        return 0;
     
    205259          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    206260          if (ch_val> 0x10FFFF ){
    207             ReportError("Illegal character reference", pos);
    208             exit(-1);
     261                        return XMLTestSuiteError::CHARREF;
    209262          }
    210263          s++;
    211264        }
    212265        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    213           ReportError("Illegal character reference", pos);
    214           exit(-1);
     266                  return XMLTestSuiteError::CHARREF;
    215267        }
    216268        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    217           ReportError("Illegal XML 1.0 character reference", pos);
    218           exit(-1);
     269                  return XMLTestSuiteError::XML10CHARREF;
    219270        }
    220271        return 0;
     
    241292          }
    242293          if (ch_val==60){
    243             ReportError("Attribute values contain '<' characters after reference expansion", pos);
    244             exit(-1);
     294            return XMLTestSuiteError::ATTREF;
    245295          }
    246296        }
    247297        else if(at_Ref_lt<ASCII>(s)){
    248           ReportError("Attribute values contain '<' characters after reference expansion", pos);
    249           exit(-1);
    250         }
    251         return 0;
    252 }
    253 
    254 
    255 
    256 @global
    257 
    258 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    259   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    260         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    261 }
    262 
    263 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    264   s2p_do_block(U8, basis_bits);
    265   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    266   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    267   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    268   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    269   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    270   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    271   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    272   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     298          return XMLTestSuiteError::ATTREF;
     299        }
     300        return 0;
    273301}
    274302
     
    283311        int length = end - start;
    284312
    285         int gid = -1;
    286 
    287 
    288313        //lookup or insert to symbol table
    289314#if DEBUG
     
    296321#endif
    297322
    298         gid = symbol_table.Lookup_or_Insert_Name(source + start - buffer_base, length);
    299 
     323        int gid = symbol_table.Lookup_or_Insert_Name(source + start - buffer_base, length);
    300324        gids.push_back(gid);
    301325    }
     
    304328static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail){
    305329
    306     //NOTE: We are NOT handling the case for an extremely long symbol name (length > 1541).
    307     //      TagMatcher will crash if we feed a long symbol name.
    308     //      Sample file: test/long_sym_name.xml
    309 
    310             tracker.StoreNewlines(lex.LF);
    311 
    312             if ( bitblock_has_bit(tag_Callouts.ElemName_starts))
    313             {
    314                 StreamScan((ScanBlock *) &tag_Callouts.ElemName_starts, sizeof(BitBlock)/sizeof(ScanBlock), ElemStrt_check);
    315             }
    316 
    317             if ( bitblock_has_bit(tag_Callouts.ElemName_ends) )
    318             {
    319                 StreamScan((ScanBlock *) &tag_Callouts.ElemName_ends, sizeof(BitBlock)/sizeof(ScanBlock), ElemEnd_check);
    320             }
    321 
    322             do_symbol_table_lookup();
    323 
    324                 if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
    325                   StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    326                   StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
    327                 }
    328 
    329                 if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    330                   StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
    331                 }
    332 
    333                 if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    334                   StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
    335                 }
    336 
    337                 if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    338                   StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
    339                 }
    340 
    341                 if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    342                   StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
    343                 }
    344 
    345                 if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    346                   StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
    347                 }
    348 
    349                 if (bitblock_has_bit(check_streams.att_refs)){
    350                   StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    351                 }
    352 
    353                 error_tracker.If_Error_Report_First();
    354 
    355                 matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    356                 tracker.AdvanceBlock();
     330    tracker.StoreNewlines(lex.LF);
     331    int rv, error_pos_in_block, error_line, error_column;
     332
     333    if ( bitblock_has_bit(tag_Callouts.ElemName_starts))
     334    {
     335        StreamScan((ScanBlock *) &tag_Callouts.ElemName_starts, sizeof(BitBlock)/sizeof(ScanBlock), ElemStrt_check, &error_pos_in_block);
     336    }
     337
     338    if ( bitblock_has_bit(tag_Callouts.ElemName_ends) )
     339    {
     340        StreamScan((ScanBlock *) &tag_Callouts.ElemName_ends, sizeof(BitBlock)/sizeof(ScanBlock), ElemEnd_check, &error_pos_in_block);
     341    }
     342
     343    do_symbol_table_lookup();
     344
     345    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
     346      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check, &error_pos_in_block);
     347      if (rv) {
     348              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     349              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     350              exit(-1);
     351      }
     352
     353      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check, &error_pos_in_block);
     354      if (rv) {
     355              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     356              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     357              exit(-1);
     358      }
     359    }
     360
     361    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
     362      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check, &error_pos_in_block);
     363      if (rv) {
     364              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     365              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     366              exit(-1);
     367      }
     368    }
     369
     370    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
     371      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check, &error_pos_in_block);
     372      if (rv) {
     373              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     374              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     375              exit(-1);
     376      }
     377    }
     378
     379    if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
     380      rv = StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check, &error_pos_in_block);
     381      if (rv) {
     382              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     383              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     384              exit(-1);
     385      }
     386    }
     387
     388    if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
     389      rv = StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check, &error_pos_in_block);
     390      if (rv) {
     391              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     392              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     393              exit(-1);
     394      }
     395    }
     396
     397    if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
     398      rv = StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check, &error_pos_in_block);
     399      if (rv) {
     400              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     401              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     402              exit(-1);
     403      }
     404    }
     405
     406    if (bitblock_has_bit(check_streams.att_refs)){
     407      rv = StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check, &error_pos_in_block);
     408      if (rv) {
     409              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     410              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     411              exit(-1);
     412      }
     413    }
     414
     415    if(error_tracker.Has_Noted_Error()){
     416            tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     417            ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
     418            exit(-1);
     419    }
     420
     421    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     422    tracker.AdvanceBlock();
    357423}
    358424
     
    381447  buffer_base = buf_pos;
    382448  source = srcbuf;
     449
    383450  chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
    384451  chars_avail = chars_read;
     
    430497
    431498/* Full Buffers */
    432     int block_segment_num = 0;
     499
    433500    while (chars_avail == BUFFER_SIZE) {
    434501      PERF_SEC_START(parser_timer);
     
    451518      buf_pos += chars_avail;
    452519      buffer_base = buf_pos;
    453       block_segment_num++;
    454520    }
    455521/* Final Partial Buffer */
     
    491557#endif
    492558}
    493 
    494 
    495 
    496 int
    497 main(int argc, char * argv[]) {
    498         char * infilename, * outfilename;
    499         FILE *infile, *outfile;
    500         struct stat fileinfo;
    501 
    502         if (argc < 2) {
    503                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    504                 exit(-1);
    505         }
    506 
    507         infilename = argv[1];
    508         stat(infilename, &fileinfo);
    509         infile = fopen(infilename, "rb");
    510         if (!infile) {
    511                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    512                 exit(-1);
    513         }
    514 
    515         if (argc < 3) outfile = stdout;
    516         else {
    517                 outfilename = argv[2];
    518                 outfile = fopen(outfilename, "wb");
    519                 if (!outfile) {
    520                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    521                         exit(-1);
    522                 }
    523         }
    524 
    525 //      PERF_SEC_BIND(1);
    526 
    527         PERF_SEC_INIT(parser_timer);
    528 
    529         do_process(infile, outfile);
    530 
    531         PERF_SEC_DUMP(parser_timer);
    532 
    533         PERF_SEC_DESTROY(parser_timer);
    534 
    535         fclose(infile);
    536         fclose(outfile);
    537 
    538         printf ("Done procressing\n");
    539         return(0);
    540 }
Note: See TracChangeset for help on using the changeset viewer.