Ignore:
Timestamp:
Nov 21, 2011, 4:09:54 PM (8 years ago)
Author:
vla24
Message:

SymbolTable?: completed dictionary implementation and refactored templates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/symtab_pbgs_div_template.cpp

    r1684 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <pbgs_div_symbol_table.h>
     3
     4#include "../symtab_common_functions.h"
     5#include "../xmlwf_common_functions.h"
    36
    47#ifdef BUFFER_PROFILING
     
    2023int buffer_base=0;
    2124int buffer_last;
    22 char * source;
    23 LineColTracker tracker;
     25
    2426TagMatcher matcher;
    25 BitBlock EOF_mask = simd<1>::constant<1>();
    26 ErrorTracker error_tracker;
    2727
    2828BitBlock elem_ends;
     
    3434PBGSDivSymbolTable pbgs_symbol_table;
    3535
    36 static inline int NameStrt_check(int pos);
    37 static inline int Name_check(int pos);
    38 static inline int PIName_check(int pos);
    39 static inline int CD_check(int pos);
    40 static inline int GenRef_check(int pos);
    41 static inline int HexRef_check(int pos);
    42 static inline int DecRef_check(int pos);
    43 static inline int AttRef_check(int pos);
    44 
    45 @global
    46 
    47 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    48 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    4936static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail);
    5037
    5138void do_process(FILE *infile, FILE *outfile);
    5239
    53 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    54 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    5540template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
    5641
    57 static inline int ScanForwardPos(BitBlock * block, int pos);
    58 static inline int compute_hash_value (int lgth, int start);
    5942static inline int ElemStart_grouping(int start_pos, int lgth); // lgth > 16
    6043template <int L> static inline int ElemEnd_grouping(int pos, int length);
    61 template <int L> static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    6244
    6345int main(int argc, char * argv[]) {
    64         char * infilename, * outfilename;
    65         FILE *infile, *outfile;
    66         struct stat fileinfo;
    67 
    68         if (argc < 2) {
    69                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    70                 exit(-1);
    71         }
    72 
    73         infilename = argv[1];
    74         stat(infilename, &fileinfo);
    75         infile = fopen(infilename, "rb");
    76         if (!infile) {
    77                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    78                 exit(-1);
    79         }
    80 
    81         if (argc < 3) outfile = stdout;
    82         else {
    83                 outfilename = argv[2];
    84                 outfile = fopen(outfilename, "wb");
    85                 if (!outfile) {
    86                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    87                         exit(-1);
    88                 }
    89         }
     46    char * infilename, * outfilename;
     47    FILE *infile, *outfile;
     48
     49    getFilenames(argc, argv, infilename, outfilename);
     50    openInputOutputFiles(infilename, outfilename,
     51                         infile, outfile);
    9052
    9153//      PERF_SEC_BIND(1);
    9254
    93         PERF_SEC_INIT(parser_timer);
    94 
    95         do_process(infile, outfile);
    96 
    97         PERF_SEC_DUMP(parser_timer);
    98 
    99         PERF_SEC_DESTROY(parser_timer);
    100 
    101         fclose(infile);
    102         fclose(outfile);
     55    PERF_SEC_INIT(parser_timer);
     56
     57    // store symbols form text to Symbol Table
     58    do_process(infile, outfile);
     59
     60    PERF_SEC_DUMP(parser_timer);
     61
     62    PERF_SEC_DESTROY(parser_timer);
     63
     64    fclose(infile);
     65    fclose(outfile);
     66
     67#if PRINT_SYMBOL_DISTRIBUTION
     68//    print_GIDS();
     69    pbgs_symbol_table.Print_Symbol_Table_Distribution();
     70#endif
    10371
    10472        return(0);
    105 }
    106 
    107 /* s2p Definitions */
    108 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    109   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    110         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    111 }
    112 
    113 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    114   s2p_do_block(U8, basis_bits);
    115   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    116   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    117   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    118   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    119   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    120   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    121   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    122   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    123 }
    124 
    125 static inline int ScanForwardPos(BitBlock * block, int pos)
    126 {
    127     BitBlock s = block[0];
    128     BitBlock temp = simd_and(s, simd<128>::sll(simd<2>::constant<3>(), convert(pos)));
    129 
    130     if (bitblock_has_bit(temp))
    131     {
    132         return count_forward_zeroes (temp);
    133     }
    134     return 0;
    135 }
    136 
    137 static inline int compute_hash_value (int lgth, int start)
    138 {
    139     unsigned int offset_bit = start + 128;
    140     uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
    141     return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
    14273}
    14374
     
    14677static inline int ElemEnd_grouping(int end) {
    14778    int start = end - L;
    148     int hashvalue = compute_hash_value(L, start - block_base);
     79    int hashvalue = compute_hash_value(L, start - block_base, hashvalues);
    14980    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
    15081    gids.push_back(gid);
     
    16192// length > 16
    16293static inline int ElemStart_grouping(int start, int lgth) {
    163     int hashvalue = compute_hash_value(lgth, start - block_base);
     94    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
    16495    int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
    16596    gids.push_back(gid);
     
    171102#endif
    172103    return 0;
    173 }
    174 
    175 static inline int NameStrt_check(int pos) {
    176         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
    177               return XMLTestSuiteError::NAME_START;
    178         }
    179         return 0;
    180 }
    181 
    182 static inline int Name_check(int pos) {
    183         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
    184                   return XMLTestSuiteError::NAME;
    185         }
    186         return 0;
    187 }
    188 
    189 static inline int PIName_check(int pos, int file_pos) {
    190         if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
    191               // "<?xml" legal at start of file.
    192               if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
    193                   return XMLTestSuiteError::XMLPINAME;
    194               }
    195         }
    196         return 0;
    197 }
    198 
    199 static inline int CD_check(int pos) {
    200         if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
    201                   return XMLTestSuiteError::CDATA;
    202         }
    203         return 0;
    204 }
    205 
    206 static inline int GenRef_check(int pos) {
    207         unsigned char* s = (unsigned char*)&source[pos];
    208         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    209               return XMLTestSuiteError::UNDEFREF;
    210         }
    211         return 0;
    212 }
    213 
    214 static inline int HexRef_check(int pos) {
    215         unsigned char* s = (unsigned char*)&source[pos];
    216         int ch_val = 0;
    217         while(at_HexDigit<ASCII>(s)){
    218           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    219           if (ch_val> 0x10FFFF ){
    220                 return XMLTestSuiteError::CHARREF;
    221           }
    222           s++;
    223         }
    224         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    225           return XMLTestSuiteError::CHARREF;
    226         }
    227         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    228           return XMLTestSuiteError::XML10CHARREF;
    229         }
    230         return 0;
    231 }
    232 
    233 static inline int DecRef_check(int pos) {
    234         unsigned char* s = (unsigned char*)&source[pos];
    235         int ch_val = 0;
    236         while(at_HexDigit<ASCII>(s)){
    237           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    238           if (ch_val> 0x10FFFF ){
    239                         return XMLTestSuiteError::CHARREF;
    240           }
    241           s++;
    242         }
    243         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    244                   return XMLTestSuiteError::CHARREF;
    245         }
    246         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    247                   return XMLTestSuiteError::XML10CHARREF;
    248         }
    249         return 0;
    250 }
    251 
    252 static inline int AttRef_check(int pos) {
    253         unsigned char* s = (unsigned char*)&source[pos];
    254         int ch_val = 0;
    255         if(s[0]=='#'){
    256           s++;
    257           if(s[0]=='x' || s[0]=='X'){
    258             s++;
    259             while(at_HexDigit<ASCII>(s)){
    260               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    261               s++;
    262             }
    263           }
    264           else{
    265             while(at_HexDigit<ASCII>(s)){
    266               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    267               s++;
    268             }
    269           }
    270           if (ch_val==60){
    271             return XMLTestSuiteError::ATTREF;
    272           }
    273         }
    274         else if(at_Ref_lt<ASCII>(s)){
    275           return XMLTestSuiteError::ATTREF;
    276         }
    277         return 0;
    278 }
    279 
    280 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    281 
    282         BitBlockForwardIterator end;
    283         int pos, block_pos;
    284 
    285         while(start != end) {
    286 
    287                 block_pos = block_base + *start;
    288                 int rv = is_valid(block_pos);
    289 
    290                 if (rv) {
    291                         int error_line, error_column;
    292                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    293                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    294                         exit(-1);
    295                 }
    296                 start++;
    297         }
    298 }
    299 
    300 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    301 
    302         BitBlockForwardIterator end;
    303         int pos, block_pos, file_pos;
    304 
    305         while(start != end) {
    306 
    307                 block_pos = block_base + *start;
    308                 file_pos = block_pos+buffer_base;
    309 
    310 
    311                 int rv = is_valid(block_pos, file_pos);
    312 
    313                 if (rv) {
    314                         int error_line, error_column;
    315                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    316                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    317                         exit(-1);
    318                 }
    319                 start++;
    320         }
    321104}
    322105
     
    388171        int lgth = count_forward_zeroes(elem_ends)-last_elem_start;
    389172        int start = block_base + last_elem_start;
    390         int hashvalue = compute_hash_value(lgth, last_elem_start);
     173        int hashvalue = compute_hash_value(lgth, last_elem_start, hashvalues);
    391174        int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
    392175        gids.push_back(gid);
     
    494277    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    495278    tracker.AdvanceBlock();
    496 }
    497 
    498 static inline void print_GIDS()
    499 {
    500     int span_count = gids.size();
    501     for(int i=0;i<span_count;i++) {
    502              cout << gids[i] << " ";
    503     }
    504     cout << endl;
    505 }
    506 
    507 static inline int test(int)
    508 {
    509     return 0;
    510279}
    511280
     
    630399      exit(-1);
    631400    }
    632 
    633 //  print_GIDS();
    634 #if DEBUG
    635     pbgs_symbol_table.Print_Symbol_Table_Distribution();
    636 #endif
    637 }
     401}
Note: See TracChangeset for help on using the changeset viewer.