Ignore:
Timestamp:
Sep 16, 2011, 5:42:17 PM (8 years ago)
Author:
vla24
Message:

SymbolTable?: updated implementation after Ken's refactoring

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/symtab_pbgs_div_template.cpp

    r1442 r1457  
    1 #define USE_FUNCTION_TEMPLATES
    21#define TEMPLATED_SIMD_LIB
    3 
    4 #define DEBUG 0
    5 #define BLOCK_SIZE (sizeof(SIMD_type) * 8)
    6 #define SEGMENT_BLOCKS 12
    7 #define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    8 #define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
    92
    103#include <stdio.h>
     
    2417using namespace std;
    2518
     19#define DEBUG 0
     20#define BLOCK_SIZE (sizeof(SIMD_type) * 8)
     21#define SEGMENT_BLOCKS 12
     22#define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
     23#define OVERLAP_BUFSIZE 2*(sizeof(SIMD_type))
     24
    2625#include <../carryQ.h>
    2726#include <xmldecl.h>
    28 #include <xml_error.c>
    29 #include <xmldecl.c>
    3027#include <namechars.h>
    31 
    3228#include <../perflib/perfsec.h>
    3329#include <../s2p.h>
     
    3531#include <TagMatcher.h>
    3632#include <LineColTracker.h>
     33#include <ErrorUtil.h>
     34#include <ErrorTracker.h>
     35#include <XMLTestSuiteError.h>
     36
     37#include <xml_error.c>
     38#include <ErrorUtil.cpp>
     39#include <ErrorTracker.cpp>
     40#include <XMLTestSuiteError.cpp>
    3741
    3842#ifdef BUFFER_PROFILING
     
    5054#endif
    5155
     56
    5257int block_base=0;
    5358int buffer_base=0;
     
    5560char * source;
    5661LineColTracker tracker;
    57 
    58 static inline void ReportError(const char * error_msg, int error_pos_in_block) {
    59   int error_line, error_column;
    60   tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
    61   fprintf(stderr, "%s at line %i, column %i\n", error_msg, error_line, error_column);
    62 }
    63 
    64 class ErrorTracker {
    65 public:
    66     ErrorTracker() { noted_pos_in_block = -1;}
    67 
    68     inline void NoteError(const char * error_msg, BitBlock err_strm) {
    69       int pos_in_block = count_forward_zeroes(err_strm);
    70       if ((noted_pos_in_block == -1) || (noted_pos_in_block > pos_in_block)) {
    71         noted_pos_in_block = pos_in_block;
    72         noted_error = error_msg;
    73       }
    74     }
    75 
    76     inline void If_Error_Report_First() {
    77       if (noted_pos_in_block > -1) {
    78               int error_line, error_column;
    79               ReportError(noted_error, noted_pos_in_block);
    80               exit(-1);
    81       }
    82     }
    83 
    84 private:
    85   const char * noted_error;
    86   int noted_pos_in_block;
    87 };
    88 
    89 
    9062TagMatcher matcher;
    9163BitBlock EOF_mask = simd_const_1(1);
    92 
    9364ErrorTracker error_tracker;
     65
    9466BitBlock elem_ends;
    9567int last_elem_start;
     
    10072PBGSDivSymbolTable pbgs_symbol_table;
    10173
    102 
     74/* StreamScan & Post Process Declarations */
     75//      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
     76static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
     77
     78static inline int NameStrt_check(int pos);
     79static inline int Name_check(int pos);
     80static inline int PIName_check(int pos);
     81static inline int CD_check(int pos);
     82static inline int GenRef_check(int pos);
     83static inline int HexRef_check(int pos);
     84static inline int DecRef_check(int pos);
     85static inline int AttRef_check(int pos);
     86
     87@global
     88
     89static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
     90static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     91static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
     92void do_process(FILE *infile, FILE *outfile);
    10393static inline int ScanForwardPos(BitBlock * block, int pos);
    10494static inline int compute_hash_value (int lgth, int start);
     
    10797template <int L> static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    10898
     99int main(int argc, char * argv[]) {
     100        char * infilename, * outfilename;
     101        FILE *infile, *outfile;
     102        struct stat fileinfo;
     103
     104        if (argc < 2) {
     105                printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
     106                exit(-1);
     107        }
     108
     109        infilename = argv[1];
     110        stat(infilename, &fileinfo);
     111        infile = fopen(infilename, "rb");
     112        if (!infile) {
     113                fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
     114                exit(-1);
     115        }
     116
     117        if (argc < 3) outfile = stdout;
     118        else {
     119                outfilename = argv[2];
     120                outfile = fopen(outfilename, "wb");
     121                if (!outfile) {
     122                        fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
     123                        exit(-1);
     124                }
     125        }
     126
     127//      PERF_SEC_BIND(1);
     128
     129        PERF_SEC_INIT(parser_timer);
     130
     131        do_process(infile, outfile);
     132
     133        PERF_SEC_DUMP(parser_timer);
     134
     135        PERF_SEC_DESTROY(parser_timer);
     136
     137        fclose(infile);
     138        fclose(outfile);
     139
     140        return(0);
     141}
     142
     143/* s2p Definitions */
     144static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
     145  s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
     146        basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
     147}
     148
     149static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
     150  s2p_do_block(U8, basis_bits);
     151  basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
     152  basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
     153  basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
     154  basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
     155  basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
     156  basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
     157  basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
     158  basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     159}
    109160
    110161static inline int ScanForwardPos(BitBlock * block, int pos)
     
    135186}
    136187
    137 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int)) {
     188/* StreamScan & Post Process Definitions */
     189static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block) {
     190
    138191        int blk;
    139192        int block_pos = 0;
     193        int pos;
     194
    140195        for (blk = 0; blk < blk_count; blk++) {
    141196                ScanBlock s = stream[blk];
    142197                while(s) {
    143                         int code = (ProcessPos(cfzl(s) + block_pos));
    144                         if (code) return code;
     198                        pos = (cfzl(s) + block_pos);
     199                        int code = (ProcessPos(pos));
     200                        if (code) {
     201                                *error_pos_in_block = pos;
     202                                return code; // error code
     203                        }
    145204                        s = s & (s-1);  // clear rightmost bit.
    146205                }
     
    238297        int block_pos = block_base + pos;
    239298        if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[block_pos]) == 0){
    240               ReportError("name start error", pos);
    241               exit(-1);
     299              return XMLTestSuiteError::NAME_START;
    242300        }
    243301        return 0;
     
    247305        int block_pos = block_base + pos;
    248306        if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[block_pos]) == 0){
    249               ReportError("name error", pos);
    250               exit(-1);
     307                  return XMLTestSuiteError::NAME;
    251308        }
    252309        return 0;
     
    256313        int block_pos = block_base + pos;
    257314        int file_pos = block_pos+buffer_base;
    258         printf ("%s:%i\n",__FUNCTION__,pos);
    259315        if (at_XxMmLll<ASCII>((unsigned char*)&source[block_pos]) && (source[block_pos+3]=='?' || source[block_pos+3]<= ' ')) {
    260316              // "<?xml" legal at start of file.
    261               if ((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0])) return 0;
    262               ReportError("[Xx][Mm][Ll] illegal as PI name", pos);
    263               exit(-1);
     317              if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
     318                  return XMLTestSuiteError::XMLPINAME;
     319              }
    264320        }
    265321        return 0;
     
    269325        int block_pos = block_base + pos;
    270326        if (!at_CDATA1<ASCII>((unsigned char*)&source[block_pos])){
    271               ReportError("CDATA error", pos);
    272               exit(-1);
     327                  return XMLTestSuiteError::CDATA;
    273328        }
    274329        return 0;
     
    279334        unsigned char* s = (unsigned char*)&source[block_pos];
    280335        if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    281               ReportError("Undefined reference", pos);
    282               exit(-1);
     336              return XMLTestSuiteError::UNDEFREF;
    283337        }
    284338        return 0;
     
    292346          ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    293347          if (ch_val> 0x10FFFF ){
    294             ReportError("Illegal character reference", pos);
    295             exit(-1);
     348                return XMLTestSuiteError::CHARREF;
    296349          }
    297350          s++;
    298351        }
    299352        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    300           ReportError("Illegal character reference", pos);
    301           exit(-1);
     353          return XMLTestSuiteError::CHARREF;
    302354        }
    303355        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    304           ReportError("Illegal XML 1.0 character reference", pos);
    305           exit(-1);
     356          return XMLTestSuiteError::XML10CHARREF;
    306357        }
    307358        return 0;
     
    315366          ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    316367          if (ch_val> 0x10FFFF ){
    317             ReportError("Illegal character reference", pos);
    318             exit(-1);
     368                        return XMLTestSuiteError::CHARREF;
    319369          }
    320370          s++;
    321371        }
    322372        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    323           ReportError("Illegal character reference", pos);
    324           exit(-1);
     373                  return XMLTestSuiteError::CHARREF;
    325374        }
    326375        else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    327           ReportError("Illegal XML 1.0 character reference", pos);
    328           exit(-1);
     376                  return XMLTestSuiteError::XML10CHARREF;
    329377        }
    330378        return 0;
     
    351399          }
    352400          if (ch_val==60){
    353             ReportError("Attribute values contain '<' characters after reference expansion", pos);
    354             exit(-1);
     401            return XMLTestSuiteError::ATTREF;
    355402          }
    356403        }
    357404        else if(at_Ref_lt<ASCII>(s)){
    358           ReportError("Attribute values contain '<' characters after reference expansion", pos);
    359           exit(-1);
    360         }
    361         return 0;
    362 }
    363 
    364 
    365 
    366 @global
    367 
    368 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    369   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    370         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    371 }
    372 
    373 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    374   s2p_do_block(U8, basis_bits);
    375   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    376   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    377   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    378   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    379   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    380   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    381   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    382   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     405          return XMLTestSuiteError::ATTREF;
     406        }
     407        return 0;
    383408}
    384409
    385410static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail){
    386411
    387 
    388     //NOTE: We are NOT handling the case for an extremely long symbol name (length > 1541).
    389     //      TagMatcher will crash if we feed a long symbol name.
    390     //      Sample file: test/long_sym_name.xml
    391 
    392412    tracker.StoreNewlines(lex.LF);
     413    int rv, error_pos_in_block, error_line, error_column;
    393414    elem_ends = tag_Callouts.ElemName_ends;
    394415    hashvalues[1] = hash_data.Hash_value;
     
    459480    memmove (&hashvalues[0], &hashvalues[1], 16);
    460481
     482
    461483    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
    462       StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check);
    463       StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check);
     484      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), NameStrt_check, &error_pos_in_block);
     485      if (rv) {
     486              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     487              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     488              exit(-1);
     489      }
     490
     491      rv = StreamScan((ScanBlock *) &check_streams.non_ascii_names, sizeof(BitBlock)/sizeof(ScanBlock), Name_check, &error_pos_in_block);
     492      if (rv) {
     493              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     494              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     495              exit(-1);
     496      }
    464497    }
    465498
    466499    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
    467       StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check);
     500      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.PI_name_starts, sizeof(BitBlock)/sizeof(ScanBlock), PIName_check, &error_pos_in_block);
     501      if (rv) {
     502              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     503              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     504              exit(-1);
     505      }
    468506    }
    469507
    470508    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
    471       StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check);
     509      rv = StreamScan((ScanBlock *) &ctCDPI_Callouts.CD_starts, sizeof(BitBlock)/sizeof(ScanBlock), CD_check, &error_pos_in_block);
     510      if (rv) {
     511              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     512              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     513              exit(-1);
     514      }
    472515    }
    473516
    474517    if (bitblock_has_bit(ref_Callouts.GenRef_starts)){
    475       StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check);
     518      rv = StreamScan((ScanBlock *) &ref_Callouts.GenRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), GenRef_check, &error_pos_in_block);
     519      if (rv) {
     520              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     521              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     522              exit(-1);
     523      }
    476524    }
    477525
    478526    if (bitblock_has_bit(ref_Callouts.DecRef_starts)){
    479       StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check);
     527      rv = StreamScan((ScanBlock *) &ref_Callouts.DecRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), DecRef_check, &error_pos_in_block);
     528      if (rv) {
     529              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     530              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     531              exit(-1);
     532      }
    480533    }
    481534
    482535    if (bitblock_has_bit(ref_Callouts.HexRef_starts)){
    483       StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check);
     536      rv = StreamScan((ScanBlock *) &ref_Callouts.HexRef_starts, sizeof(BitBlock)/sizeof(ScanBlock), HexRef_check, &error_pos_in_block);
     537      if (rv) {
     538              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     539              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     540              exit(-1);
     541      }
    484542    }
    485543
    486544    if (bitblock_has_bit(check_streams.att_refs)){
    487       StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check);
    488     }
    489 
    490     error_tracker.If_Error_Report_First();
     545      rv = StreamScan((ScanBlock *) &check_streams.att_refs, sizeof(BitBlock)/sizeof(ScanBlock), AttRef_check, &error_pos_in_block);
     546      if (rv) {
     547              tracker.get_Line_and_Column(error_pos_in_block, error_line, error_column);
     548              ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
     549              exit(-1);
     550      }
     551    }
     552
     553    if(error_tracker.Has_Noted_Error()){
     554            tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
     555            ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
     556            exit(-1);
     557    }
    491558
    492559    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
     
    571638
    572639/* Full Buffers */
    573     int block_segment_num = 0;
    574640    while (chars_avail == BUFFER_SIZE) {
    575641      PERF_SEC_START(parser_timer);
     
    594660      buf_pos += chars_avail;
    595661      buffer_base = buf_pos;
    596       block_segment_num++;
    597662
    598663  }
     
    634699//    pbgs_symbol_table.Print_Symbol_Table_Distribution();
    635700}
    636 
    637 
    638 
    639 int
    640 main(int argc, char * argv[]) {
    641         char * infilename, * outfilename;
    642         FILE *infile, *outfile;
    643         struct stat fileinfo;
    644 
    645         if (argc < 2) {
    646                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    647                 exit(-1);
    648         }
    649 
    650         infilename = argv[1];
    651         stat(infilename, &fileinfo);
    652         infile = fopen(infilename, "rb");
    653         if (!infile) {
    654                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    655                 exit(-1);
    656         }
    657 
    658         if (argc < 3) outfile = stdout;
    659         else {
    660                 outfilename = argv[2];
    661                 outfile = fopen(outfilename, "wb");
    662                 if (!outfile) {
    663                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    664                         exit(-1);
    665                 }
    666         }
    667 
    668 //      PERF_SEC_BIND(1);
    669 
    670         PERF_SEC_INIT(parser_timer);
    671 
    672         do_process(infile, outfile);
    673 
    674         PERF_SEC_DUMP(parser_timer);
    675 
    676         PERF_SEC_DESTROY(parser_timer);
    677 
    678         fclose(infile);
    679         fclose(outfile);
    680         return(0);
    681 }
Note: See TracChangeset for help on using the changeset viewer.