Changeset 1721


Ignore:
Timestamp:
Nov 21, 2011, 4:09:54 PM (8 years ago)
Author:
vla24
Message:

SymbolTable?: completed dictionary implementation and refactored templates

Location:
proto/SymbolTable
Files:
4 added
22 edited
1 moved

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/Makefile

    r1688 r1721  
    11WCD_OUTFILE=src/wcd.cpp
     2PARSER_COMMON_FUNCTIONS_OUTFILE=src/parser_common_functions_generated.h
     3
     4PARSER_COMMON_FUNCTIONS=parser_common_functions.h
    25
    36PABLO_WCD=wcd.py
     
    3841
    3942wcd_stl:        $(PABLO_WCD)
     43        python $(PABLO_COMPILER) $(PABLO_WCD) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    4044        python $(PABLO_COMPILER) $(PABLO_WCD) -t $(WCD_STL_TEMPLATE) -o $(WCD_OUTFILE)
    4145
    4246wcd_ls: $(PABLO_WCD)
     47        python $(PABLO_COMPILER) $(PABLO_WCD) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    4348        python $(PABLO_COMPILER) $(PABLO_WCD) -t $(WCD_LS_TEMPLATE) -o $(WCD_OUTFILE)
    4449
    4550wcd_hash:       $(PABLO_WCD)
     51        python $(PABLO_COMPILER) $(PABLO_WCD) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    4652        python $(PABLO_COMPILER) $(PABLO_WCD) -t $(WCD_HASH_TEMPLATE) -o $(WCD_OUTFILE)
    4753
    4854wcd_id: $(PABLO_WCD_ID)
     55        python $(PABLO_COMPILER) $(PABLO_WCD_ID) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    4956        python $(PABLO_COMPILER) $(PABLO_WCD_ID) -t $(WCD_IDENTITY_TEMPLATE) -o $(WCD_OUTFILE)
    5057
    5158wcd_pbgs_id:    $(PABLO_WCD_PBGS_ID) # Paralel bitstream based group sorting
     59        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_ID) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    5260        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_ID) -t $(WCD_PBGS_ID_TEMPLATE) -o $(WCD_OUTFILE)
    5361
    5462wcd_pbgs_id_adv:        $(PABLO_WCD_PBGS_ID_ADV) # Paralel bitstream based group sorting
     63        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_ID_ADV) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    5564        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_ID_ADV) -t $(WCD_PBGS_ID_TEMPLATE) -o $(WCD_OUTFILE)
    5665
    5766wcd_pbgs_log:   $(PABLO_WCD_PBGS_LOG) # Paralel bitstream based group sorting
     67        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_LOG) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    5868        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_LOG) -t $(WCD_PBGS_LOG_TEMPLATE) -o $(WCD_OUTFILE)
    5969
    6070wcd_pbgs_div:   $(PABLO_WCD_PBGS_DIV) # Paralel bitstream based group sorting
     71        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_DIV) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    6172        python $(PABLO_COMPILER) $(PABLO_WCD_PBGS_DIV) -t $(WCD_PBGS_DIV_TEMPLATE) -o $(WCD_OUTFILE)
    6273
    6374symtab_stl:     $(PABLO_SRCFILE)
     75        python $(PABLO_COMPILER) $(PABLO_SRCFILE) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    6476        python $(PABLO_COMPILER) $(PABLO_SRCFILE) -t $(SYMBOLTABLE_STL_TEMPLATE) -o $(XMLWF_OUTFILE)
    6577
    6678symtab_ls:      $(PABLO_SRCFILE)
     79        python $(PABLO_COMPILER) $(PABLO_SRCFILE) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    6780        python $(PABLO_COMPILER) $(PABLO_SRCFILE) -t $(SYMBOLTABLE_LS_TEMPLATE) -o $(XMLWF_OUTFILE)
    6881
    6982symtab_hash:    $(PABLO_SRCFILE)
     83        python $(PABLO_COMPILER) $(PABLO_SRCFILE) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    7084        python $(PABLO_COMPILER) $(PABLO_SRCFILE) -t $(HASH_SYMBOLTABLE_TEMPLATE) -o $(XMLWF_OUTFILE)
    7185
    7286symtab_id:      $(PABLO_SYMTAB_ID) # Paralel bitstream based group sorting
     87        python $(PABLO_COMPILER) $(PABLO_SYMTAB_ID) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    7388        python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_ID) -t $(SYMBOLTABLE_IDENTITY_TEMPLATE) -o $(XMLWF_OUTFILE)
    7489
    7590symtab_pbgs_id: $(PABLO_SYMTAB_PBS) # Paralel bitstream based group sorting
     91        python $(PABLO_COMPILER) $(PABLO_SYMTAB_PBS) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    7692        python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_PBS) -t $(PBGS_SYMBOLTABLE_ID_TEMPLATE) -o $(XMLWF_OUTFILE)
    7793
    7894symtab_pbgs_id_adv:$(PABLO_SYMTAB_PBS_ADV) # Paralel bitstream based group sorting using Advance32 and Interpose32
     95        python $(PABLO_COMPILER) $(PABLO_SYMTAB_PBS_ADV) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    7996        python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_PBS_ADV) -t $(PBGS_SYMBOLTABLE_ID_TEMPLATE) -o $(XMLWF_OUTFILE)
    8097
    8198symtab_pbgs_log:$(PABLO_SYMTAB_PBS_LOG)
     99        python $(PABLO_COMPILER) $(PABLO_SYMTAB_PBS_LOG) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    82100        python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_PBS_LOG) -t $(PBGS_SYMBOLTABLE_LOG_TEMPLATE) -o $(XMLWF_OUTFILE)
    83101
    84102symtab_pbgs_div:$(PABLO_SYMTAB_PBS_DIV)
     103        python $(PABLO_COMPILER) $(PABLO_SYMTAB_PBS_DIV) -t $(PARSER_COMMON_FUNCTIONS) -o $(PARSER_COMMON_FUNCTIONS_OUTFILE)
    85104        python $(PABLO_COMPILER) $(PABLO_FLAGS) $(PABLO_SYMTAB_PBS_DIV) -t $(PBGS_SYMBOLTABLE_DIV_TEMPLATE) -o $(XMLWF_OUTFILE)
    86105
  • proto/SymbolTable/build_wcd.sh

    r1688 r1721  
    1 make wcd_pbgs_div && cd src && make wcd && cd .. && ./src/wcd test/test_files/dict_bezier.txt
     1make wcd_pbgs_log && cd src && make wcd && cd .. && ./src/wcd test/test_files/test_dictionary.txt test/test_files/test_text.txt
  • proto/SymbolTable/build_xmlwf.sh

    r1688 r1721  
    1 make symtab_pbgs_div
     1make symtab_ls
    22cd src
    33make all
    4 ./xmlwf ../test/test_files/soap_div2.xml
     4./xmlwf ../test/test_files/soap.xml
    55cd ..
    66
  • proto/SymbolTable/common_definitions.h

    r1690 r1721  
    77#define SIMD_type BitBlock
    88#define DEBUG 0
     9#define PRINT_SYMBOL_DISTRIBUTION 0
    910#define SEGMENT_BLOCKS 12
    1011#define BUFFER_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
     
    3536#include <XMLTestSuiteError.h>
    3637
    37 
    3838#endif // SYMTAB_GLOBAL_H
  • proto/SymbolTable/perf-build_xmlwf.sh

    r1688 r1721  
    1 cd src && make buffer_profiling_perf && python perf_script.py xmlwf && cd ..
     1cd src && make buffer_profiling_perf && python perf_script.py && cd ..
    22
  • proto/SymbolTable/src/Makefile

    r1688 r1721  
    77
    88CC= g++ $(CFLAGS)
    9 CFLAGS= $(SSE) -O3 #-g
     9CFLAGS= $(SSE) -O3 -g
    1010SSE=-msse2 #-msse4.1 -DUSE_PTEST #-mssse3 -msse4.1 -msse4.2
    1111SYMTAB_LIB=../symtab
     
    3535
    3636all:
    37         $(CC) $(INCLUDES) -o $(OUTFILE) $(SRCFILE) $(AFLAGS)
     37        $(CC) $(INCLUDES) -o $(OUTFILE) $(SRCFILE) $(AFLAGS) -DUSE_XMLWF
    3838
    3939avx:    $(SRCFILE)
     
    5050
    5151buffer_profiling_perf: $(SRCFILE)
    52         $(CC) $(INCLUDES) -o $(OUTFILE) $(SRCFILE) $(AFLAGS) -DBUFFER_PROFILING  -DPERF_SCRIPT
     52        $(CC) $(INCLUDES) -o $(OUTFILE) $(SRCFILE) $(AFLAGS) -DBUFFER_PROFILING  -DPERF_SCRIPT -DUSE_XMLWF
    5353
    5454asm:  $(SRCFILE)
  • proto/SymbolTable/src/perf_script.py

    r1688 r1721  
    1515        call(['./'+program, testfile_home+testfile], stderr=mes_f)
    1616
    17 def analyze_rslt():     
     17def analyze_rslt():
    1818      mes_f = open('mesfile', 'r')
    1919      results = mes_f.read().split(' ')
  • proto/SymbolTable/src/perf_script_wcd.py

    r1688 r1721  
    66programs = ['wcd']
    77#TODO: Change the testfiles
    8 testfiles = ['dict_bezier.txt']
     8dictionaryfiles = ['test_dictionary.txt']
     9testfiles = ['test_text.txt']
    910
    1011def run_test():     
    1112  mes_f = open('mesfile', 'w')
    1213  for program in programs:
    13     for testfile in testfiles:
     14    for index in range(len(testfiles)):
    1415      for i in range(10):
    15         call(['./'+program, testfile_home+testfile], stderr=mes_f)
     16        call(['./'+program, testfile_home+dictionaryfiles[index], testfile_home+testfiles[index]], stderr=mes_f)
    1617
    1718def analyze_rslt():     
  • proto/SymbolTable/symtab_hash_template.cpp

    r1684 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <hash_symbol_table.h>
     3
     4#include "../symtab_common_functions.h"
     5#include "../xmlwf_common_functions.h"
    36
    47#ifdef BUFFER_PROFILING
     
    1821int block_base=0;
    1922int buffer_base=0;
    20 char * source;
    21 LineColTracker tracker;
     23
    2224TagMatcher matcher;
    23 ErrorTracker error_tracker;
    24 BitBlock EOF_mask = simd<1>::constant<1>();
    2525
    2626queue <size_t> elem_starts_buf;
     
    2929HashSymbolTable symbol_table;
    3030
    31 static inline int NameStrt_check(int pos);
    32 static inline int Name_check(int pos);
    33 static inline int PIName_check(int pos);
    34 static inline int CD_check(int pos);
    35 static inline int GenRef_check(int pos);
    36 static inline int HexRef_check(int pos);
    37 static inline int DecRef_check(int pos);
    38 static inline int AttRef_check(int pos);
    39 
    40 @global
    41 
    42 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    43 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    4431static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
    4532void do_process(FILE *infile, FILE *outfile);
    4633
    47 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    48 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    49 
    5034static inline void do_symbol_table_lookup();
    5135
    52 void do_process(FILE *infile, FILE *outfile);
    53 
    5436int main(int argc, char * argv[]) {
    55         char * infilename, * outfilename;
    56         FILE *infile, *outfile;
    57         struct stat fileinfo;
    58 
    59         if (argc < 2) {
    60                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    61                 exit(-1);
    62         }
    63 
    64         infilename = argv[1];
    65         stat(infilename, &fileinfo);
    66         infile = fopen(infilename, "rb");
    67         if (!infile) {
    68                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    69                 exit(-1);
    70         }
    71 
    72         if (argc < 3) outfile = stdout;
    73         else {
    74                 outfilename = argv[2];
    75                 outfile = fopen(outfilename, "wb");
    76                 if (!outfile) {
    77                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    78                         exit(-1);
    79                 }
    80         }
     37    char * infilename, * outfilename;
     38    FILE *infile, *outfile;
     39
     40    getFilenames(argc, argv, infilename, outfilename);
     41    openInputOutputFiles(infilename, outfilename,
     42                         infile, outfile);
    8143
    8244//      PERF_SEC_BIND(1);
    8345
    84         PERF_SEC_INIT(parser_timer);
    85 
    86         do_process(infile, outfile);
    87 
    88         PERF_SEC_DUMP(parser_timer);
    89 
    90         PERF_SEC_DESTROY(parser_timer);
    91 
    92         fclose(infile);
    93         fclose(outfile);
    94 
    95         return(0);
    96 }
    97 
    98 /* s2p Definitions */
    99 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    100   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    101         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    102 }
    103 
    104 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    105   s2p_do_block(U8, basis_bits);
    106   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    107   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    108   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    109   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    110   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    111   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    112   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    113   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     46    PERF_SEC_INIT(parser_timer);
     47
     48    // store symbols form text to Symbol Table
     49    do_process(infile, outfile);
     50
     51    PERF_SEC_DUMP(parser_timer);
     52
     53    PERF_SEC_DESTROY(parser_timer);
     54
     55    fclose(infile);
     56    fclose(outfile);
     57
     58#if PRINT_SYMBOL_DISTRIBUTION
     59    print_GIDS(gids);
     60#endif
     61    return(0);
    11462}
    11563
     
    12270        elem_ends_buf.push(buffer_base + pos);
    12371        return 0;
    124 }
    125 
    126 static inline int NameStrt_check(int pos) {
    127         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
    128               return XMLTestSuiteError::NAME_START;
    129         }
    130         return 0;
    131 }
    132 
    133 static inline int Name_check(int pos) {
    134         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
    135                   return XMLTestSuiteError::NAME;
    136         }
    137         return 0;
    138 }
    139 
    140 static inline int PIName_check(int pos, int file_pos) {
    141         if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
    142               // "<?xml" legal at start of file.
    143               if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
    144                   return XMLTestSuiteError::XMLPINAME;
    145               }
    146         }
    147         return 0;
    148 }
    149 
    150 static inline int CD_check(int pos) {
    151         if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
    152                   return XMLTestSuiteError::CDATA;
    153         }
    154         return 0;
    155 }
    156 
    157 static inline int GenRef_check(int pos) {
    158         unsigned char* s = (unsigned char*)&source[pos];
    159         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    160               return XMLTestSuiteError::UNDEFREF;
    161         }
    162         return 0;
    163 }
    164 
    165 static inline int HexRef_check(int pos) {
    166         unsigned char* s = (unsigned char*)&source[pos];
    167         int ch_val = 0;
    168         while(at_HexDigit<ASCII>(s)){
    169           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    170           if (ch_val> 0x10FFFF ){
    171                 return XMLTestSuiteError::CHARREF;
    172           }
    173           s++;
    174         }
    175         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    176           return XMLTestSuiteError::CHARREF;
    177         }
    178         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    179           return XMLTestSuiteError::XML10CHARREF;
    180         }
    181         return 0;
    182 }
    183 
    184 static inline int DecRef_check(int pos) {
    185         unsigned char* s = (unsigned char*)&source[pos];
    186         int ch_val = 0;
    187         while(at_HexDigit<ASCII>(s)){
    188           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    189           if (ch_val> 0x10FFFF ){
    190                         return XMLTestSuiteError::CHARREF;
    191           }
    192           s++;
    193         }
    194         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    195                   return XMLTestSuiteError::CHARREF;
    196         }
    197         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    198                   return XMLTestSuiteError::XML10CHARREF;
    199         }
    200         return 0;
    201 }
    202 
    203 static inline int AttRef_check(int pos) {
    204         unsigned char* s = (unsigned char*)&source[pos];
    205         int ch_val = 0;
    206         if(s[0]=='#'){
    207           s++;
    208           if(s[0]=='x' || s[0]=='X'){
    209             s++;
    210             while(at_HexDigit<ASCII>(s)){
    211               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    212               s++;
    213             }
    214           }
    215           else{
    216             while(at_HexDigit<ASCII>(s)){
    217               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    218               s++;
    219             }
    220           }
    221           if (ch_val==60){
    222             return XMLTestSuiteError::ATTREF;
    223           }
    224         }
    225         else if(at_Ref_lt<ASCII>(s)){
    226           return XMLTestSuiteError::ATTREF;
    227         }
    228         return 0;
    229 }
    230 
    231 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    232 
    233         BitBlockForwardIterator end;
    234         int pos, block_pos;
    235 
    236         while(start != end) {
    237 
    238                 block_pos = block_base + *start;
    239                 int rv = is_valid(block_pos);
    240 
    241                 if (rv) {
    242                         int error_line, error_column;
    243                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    244                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    245                         exit(-1);
    246                 }
    247                 start++;
    248         }
    249 }
    250 
    251 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    252 
    253         BitBlockForwardIterator end;
    254         int pos, block_pos, file_pos;
    255 
    256         while(start != end) {
    257 
    258                 block_pos = block_base + *start;
    259                 file_pos = block_pos+buffer_base;
    260 
    261 
    262                 int rv = is_valid(block_pos, file_pos);
    263 
    264                 if (rv) {
    265                         int error_line, error_column;
    266                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    267                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    268                         exit(-1);
    269                 }
    270                 start++;
    271         }
    27272}
    27373
     
    486286      exit(-1);
    487287    }
    488 #if DEBUG
    489     print_GIDS();
    490 #endif
    491 }
     288}
  • proto/SymbolTable/symtab_identity_template.cpp

    r1684 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <pbgs_identity_symbol_table.h>
     3
     4#include "../symtab_common_functions.h"
     5#include "../xmlwf_common_functions.h"
    36
    47#ifdef BUFFER_PROFILING
     
    1922int buffer_base=0;
    2023int buffer_last;
    21 char * source;
    22 LineColTracker tracker;
     24
    2325TagMatcher matcher;
    24 ErrorTracker error_tracker;
    25 BitBlock EOF_mask = simd<1>::constant<1>();
    2626
    2727BitBlock elem_ends;
     
    3333PBGSIdentitySymbolTable pbgs_symbol_table;
    3434
    35 static inline int NameStrt_check(int pos);
    36 static inline int Name_check(int pos);
    37 static inline int PIName_check(int pos);
    38 static inline int CD_check(int pos);
    39 static inline int GenRef_check(int pos);
    40 static inline int HexRef_check(int pos);
    41 static inline int DecRef_check(int pos);
    42 static inline int AttRef_check(int pos);
    43 
    44 @global
    45 
    46 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    47 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    4835static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail);
    4936
    5037void do_process(FILE *infile, FILE *outfile);
    5138
    52 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    53 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    5439static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
    5540
    5641static inline int ElemStart_grouping(int start_pos, int L) ;
    57 static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    58 static inline int ScanForwardPos(BitBlock * block, int pos);
    59 static inline int compute_hash_value (int lgth, int start);
    6042
    6143int main(int argc, char * argv[]) {
    62         char * infilename, * outfilename;
    63         FILE *infile, *outfile;
    64         struct stat fileinfo;
    65 
    66         if (argc < 2) {
    67                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    68                 exit(-1);
    69         }
    70 
    71         infilename = argv[1];
    72         stat(infilename, &fileinfo);
    73         infile = fopen(infilename, "rb");
    74         if (!infile) {
    75                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    76                 exit(-1);
    77         }
    78 
    79         if (argc < 3) outfile = stdout;
    80         else {
    81                 outfilename = argv[2];
    82                 outfile = fopen(outfilename, "wb");
    83                 if (!outfile) {
    84                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    85                         exit(-1);
    86                 }
    87         }
     44    char * infilename, * outfilename;
     45    FILE *infile, *outfile;
     46
     47    getFilenames(argc, argv, infilename, outfilename);
     48    openInputOutputFiles(infilename, outfilename,
     49                         infile, outfile);
    8850
    8951//      PERF_SEC_BIND(1);
    9052
    91         PERF_SEC_INIT(parser_timer);
    92 
    93         do_process(infile, outfile);
    94 
    95         PERF_SEC_DUMP(parser_timer);
    96 
    97         PERF_SEC_DESTROY(parser_timer);
    98 
    99         fclose(infile);
    100         fclose(outfile);
    101 
    102         return(0);
    103 }
    104 
    105 /* s2p Definitions */
    106 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    107   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    108         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    109 }
    110 
    111 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    112   s2p_do_block(U8, basis_bits);
    113   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    114   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    115   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    116   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    117   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    118   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    119   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    120   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    121 }
    122 
    123 static inline int ScanForwardPos(BitBlock * block, int pos)
    124 {
    125     BitBlock s = block[0];
    126     BitBlock temp = simd_and(s, simd<128>::sll(simd<2>::constant<3>(), convert(pos)));
    127 
    128     if (bitblock_has_bit(temp))
    129     {
    130         return count_forward_zeroes (temp);
    131     }
    132     else
    133     {
    134         //handle boundary case
    135         block_boundary_case = true;
    136         last_elem_start = pos - BLOCK_SIZE;
    137         return 0;
    138     }
    139 }
    140 
    141 static inline int compute_hash_value (int lgth, int start)
    142 {
    143     unsigned int offset_bit = start + 128;
    144     uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
    145     return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
     53    PERF_SEC_INIT(parser_timer);
     54
     55    // store symbols form text to Symbol Table
     56    do_process(infile, outfile);
     57
     58    PERF_SEC_DUMP(parser_timer);
     59
     60    PERF_SEC_DESTROY(parser_timer);
     61
     62    fclose(infile);
     63    fclose(outfile);
     64
     65#if PRINT_SYMBOL_DISTRIBUTION
     66//    print_GIDS();
     67    pbgs_symbol_table.Print_Symbol_Table_Distribution();
     68#endif
     69
     70    return(0);
    14671}
    14772
    14873static inline int ElemStart_grouping(int start_pos, int L) {
    149     int hashvalue = compute_hash_value(L, start_pos - block_base);
     74    int hashvalue = compute_hash_value(L, start_pos - block_base, hashvalues);
    15075    int gid = 0;
    15176
     
    217142}
    218143
    219 static inline int NameStrt_check(int pos) {
    220         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
    221               return XMLTestSuiteError::NAME_START;
    222         }
    223         return 0;
    224 }
    225 
    226 static inline int Name_check(int pos) {
    227         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
    228                   return XMLTestSuiteError::NAME;
    229         }
    230         return 0;
    231 }
    232 
    233 static inline int PIName_check(int pos, int file_pos) {
    234         if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
    235               // "<?xml" legal at start of file.
    236               if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
    237                   return XMLTestSuiteError::XMLPINAME;
    238               }
    239         }
    240         return 0;
    241 }
    242 
    243 static inline int CD_check(int pos) {
    244         if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
    245                   return XMLTestSuiteError::CDATA;
    246         }
    247         return 0;
    248 }
    249 
    250 static inline int GenRef_check(int pos) {
    251         unsigned char* s = (unsigned char*)&source[pos];
    252         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    253               return XMLTestSuiteError::UNDEFREF;
    254         }
    255         return 0;
    256 }
    257 
    258 static inline int HexRef_check(int pos) {
    259         unsigned char* s = (unsigned char*)&source[pos];
    260         int ch_val = 0;
    261         while(at_HexDigit<ASCII>(s)){
    262           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    263           if (ch_val> 0x10FFFF ){
    264                 return XMLTestSuiteError::CHARREF;
    265           }
    266           s++;
    267         }
    268         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    269           return XMLTestSuiteError::CHARREF;
    270         }
    271         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    272           return XMLTestSuiteError::XML10CHARREF;
    273         }
    274         return 0;
    275 }
    276 
    277 static inline int DecRef_check(int pos) {
    278         unsigned char* s = (unsigned char*)&source[pos];
    279         int ch_val = 0;
    280         while(at_HexDigit<ASCII>(s)){
    281           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    282           if (ch_val> 0x10FFFF ){
    283                         return XMLTestSuiteError::CHARREF;
    284           }
    285           s++;
    286         }
    287         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    288                   return XMLTestSuiteError::CHARREF;
    289         }
    290         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    291                   return XMLTestSuiteError::XML10CHARREF;
    292         }
    293         return 0;
    294 }
    295 
    296 static inline int AttRef_check(int pos) {
    297         unsigned char* s = (unsigned char*)&source[pos];
    298         int ch_val = 0;
    299         if(s[0]=='#'){
    300           s++;
    301           if(s[0]=='x' || s[0]=='X'){
    302             s++;
    303             while(at_HexDigit<ASCII>(s)){
    304               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    305               s++;
    306             }
    307           }
    308           else{
    309             while(at_HexDigit<ASCII>(s)){
    310               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    311               s++;
    312             }
    313           }
    314           if (ch_val==60){
    315             return XMLTestSuiteError::ATTREF;
    316           }
    317         }
    318         else if(at_Ref_lt<ASCII>(s)){
    319           return XMLTestSuiteError::ATTREF;
    320         }
    321         return 0;
    322 }
    323 
    324 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    325 
    326         BitBlockForwardIterator end;
    327         int pos, block_pos;
    328 
    329         while(start != end) {
    330 
    331                 block_pos = block_base + *start;
    332                 int rv = is_valid(block_pos);
    333 
    334                 if (rv) {
    335                         int error_line, error_column;
    336                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    337                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    338                         exit(-1);
    339                 }
    340                 start++;
    341         }
    342 }
    343 
    344 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    345 
    346         BitBlockForwardIterator end;
    347         int pos, block_pos, file_pos;
    348 
    349         while(start != end) {
    350 
    351                 block_pos = block_base + *start;
    352                 file_pos = block_pos+buffer_base;
    353 
    354 
    355                 int rv = is_valid(block_pos, file_pos);
    356 
    357                 if (rv) {
    358                         int error_line, error_column;
    359                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    360                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    361                         exit(-1);
    362                 }
    363                 start++;
    364         }
    365 }
    366 
    367144inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base) {
    368145
     
    378155            start_pos += block_base;
    379156            ElemStart_grouping(start_pos, lgth);
     157        }
     158        else
     159        {
     160            //handle boundary case
     161            block_boundary_case = true;
     162            last_elem_start = start_pos - BLOCK_SIZE;
    380163        }
    381164        start++;
     
    576359    }
    577360//  print_GIDS();
    578 #if DEBUG
     361#if PRINT_SYMBOL_DISTRIBUTION
    579362    pbgs_symbol_table.Print_Symbol_Table_Distribution();
    580363#endif
  • proto/SymbolTable/symtab_ls_template.cpp

    r1684 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <ls_symbol_table.h>
     3
     4#include "../symtab_common_functions.h"
     5#include "../xmlwf_common_functions.h"
    36
    47#ifdef BUFFER_PROFILING
     
    1922int buffer_base=0;
    2023int buffer_last;
    21 char * source;
    22 LineColTracker tracker;
     24
    2325TagMatcher matcher;
    24 ErrorTracker error_tracker;
    25 BitBlock EOF_mask = simd<1>::constant<1>();
    2626
    2727queue <size_t> elem_starts_buf;
     
    2929LSSymbolTable ls_symbol_table;
    3030
    31 static inline int NameStrt_check(int pos);
    32 static inline int Name_check(int pos);
    33 static inline int PIName_check(int pos);
    34 static inline int CD_check(int pos);
    35 static inline int GenRef_check(int pos);
    36 static inline int HexRef_check(int pos);
    37 static inline int DecRef_check(int pos);
    38 static inline int AttRef_check(int pos);
    39 
    40 @global
    41 
    42 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    43 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    4431static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
    4532void do_process(FILE *infile, FILE *outfile);
    4633
    47 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    48 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    4934static inline void do_symbol_table_lookup();
    5035
    5136int main(int argc, char * argv[]) {
    52         char * infilename, * outfilename;
    53         FILE *infile, *outfile;
    54         struct stat fileinfo;
    55 
    56         if (argc < 2) {
    57                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    58                 exit(-1);
    59         }
    60 
    61         infilename = argv[1];
    62         stat(infilename, &fileinfo);
    63         infile = fopen(infilename, "rb");
    64         if (!infile) {
    65                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    66                 exit(-1);
    67         }
    68 
    69         if (argc < 3) outfile = stdout;
    70         else {
    71                 outfilename = argv[2];
    72                 outfile = fopen(outfilename, "wb");
    73                 if (!outfile) {
    74                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    75                         exit(-1);
    76                 }
    77         }
     37    char * infilename, * outfilename;
     38    FILE *infile, *outfile;
     39
     40    getFilenames(argc, argv, infilename, outfilename);
     41    openInputOutputFiles(infilename, outfilename,
     42                         infile, outfile);
    7843
    7944//      PERF_SEC_BIND(1);
    8045
    81         PERF_SEC_INIT(parser_timer);
    82 
    83         do_process(infile, outfile);
    84 
    85         PERF_SEC_DUMP(parser_timer);
    86 
    87         PERF_SEC_DESTROY(parser_timer);
    88 
    89         fclose(infile);
    90         fclose(outfile);
    91 
    92         return(0);
    93 }
    94 
    95 /* s2p Definitions */
    96 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    97   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    98         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    99 }
    100 
    101 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    102   s2p_do_block(U8, basis_bits);
    103   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    104   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    105   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    106   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    107   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    108   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    109   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    110   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     46    PERF_SEC_INIT(parser_timer);
     47
     48    // store symbols form text to Symbol Table
     49    do_process(infile, outfile);
     50
     51    PERF_SEC_DUMP(parser_timer);
     52
     53    PERF_SEC_DESTROY(parser_timer);
     54
     55    fclose(infile);
     56    fclose(outfile);
     57
     58#if PRINT_SYMBOL_DISTRIBUTION
     59    print_GIDS(ls_symbol_table);
     60#endif
     61    ls_symbol_table.clear();
     62    return(0);
    11163}
    11264
     
    12072        return 0;
    12173}
    122 
    123 static inline int NameStrt_check(int pos) {
    124         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
    125               return XMLTestSuiteError::NAME_START;
    126         }
    127         return 0;
    128 }
    129 
    130 static inline int Name_check(int pos) {
    131         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
    132                   return XMLTestSuiteError::NAME;
    133         }
    134         return 0;
    135 }
    136 
    137 static inline int PIName_check(int pos, int file_pos) {
    138         if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
    139               // "<?xml" legal at start of file.
    140               if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
    141                   return XMLTestSuiteError::XMLPINAME;
    142               }
    143         }
    144         return 0;
    145 }
    146 
    147 static inline int CD_check(int pos) {
    148         if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
    149                   return XMLTestSuiteError::CDATA;
    150         }
    151         return 0;
    152 }
    153 
    154 static inline int GenRef_check(int pos) {
    155         unsigned char* s = (unsigned char*)&source[pos];
    156         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    157               return XMLTestSuiteError::UNDEFREF;
    158         }
    159         return 0;
    160 }
    161 
    162 static inline int HexRef_check(int pos) {
    163         unsigned char* s = (unsigned char*)&source[pos];
    164         int ch_val = 0;
    165         while(at_HexDigit<ASCII>(s)){
    166           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    167           if (ch_val> 0x10FFFF ){
    168                 return XMLTestSuiteError::CHARREF;
    169           }
    170           s++;
    171         }
    172         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    173           return XMLTestSuiteError::CHARREF;
    174         }
    175         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    176           return XMLTestSuiteError::XML10CHARREF;
    177         }
    178         return 0;
    179 }
    180 
    181 static inline int DecRef_check(int pos) {
    182         unsigned char* s = (unsigned char*)&source[pos];
    183         int ch_val = 0;
    184         while(at_HexDigit<ASCII>(s)){
    185           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    186           if (ch_val> 0x10FFFF ){
    187                         return XMLTestSuiteError::CHARREF;
    188           }
    189           s++;
    190         }
    191         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    192                   return XMLTestSuiteError::CHARREF;
    193         }
    194         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    195                   return XMLTestSuiteError::XML10CHARREF;
    196         }
    197         return 0;
    198 }
    199 
    200 static inline int AttRef_check(int pos) {
    201         unsigned char* s = (unsigned char*)&source[pos];
    202         int ch_val = 0;
    203         if(s[0]=='#'){
    204           s++;
    205           if(s[0]=='x' || s[0]=='X'){
    206             s++;
    207             while(at_HexDigit<ASCII>(s)){
    208               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    209               s++;
    210             }
    211           }
    212           else{
    213             while(at_HexDigit<ASCII>(s)){
    214               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    215               s++;
    216             }
    217           }
    218           if (ch_val==60){
    219             return XMLTestSuiteError::ATTREF;
    220           }
    221         }
    222         else if(at_Ref_lt<ASCII>(s)){
    223           return XMLTestSuiteError::ATTREF;
    224         }
    225         return 0;
    226 }
    227 
    228 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    229 
    230         BitBlockForwardIterator end;
    231         int pos, block_pos;
    232 
    233         while(start != end) {
    234 
    235                 block_pos = block_base + *start;
    236                 int rv = is_valid(block_pos);
    237 
    238                 if (rv) {
    239                         int error_line, error_column;
    240                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    241                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    242                         exit(-1);
    243                 }
    244                 start++;
    245         }
    246 }
    247 
    248 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    249 
    250         BitBlockForwardIterator end;
    251         int pos, block_pos, file_pos;
    252 
    253         while(start != end) {
    254 
    255                 block_pos = block_base + *start;
    256                 file_pos = block_pos+buffer_base;
    257 
    258 
    259                 int rv = is_valid(block_pos, file_pos);
    260 
    261                 if (rv) {
    262                         int error_line, error_column;
    263                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    264                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    265                         exit(-1);
    266                 }
    267                 start++;
    268         }
    269 }
    270 
    27174
    27275static inline void do_symbol_table_lookup()
     
    354157    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    355158    tracker.AdvanceBlock();
    356 }
    357 
    358 static inline void print_GIDS()
    359 {
    360     ls_symbol_table.display_flattened_symbol_values();
    361     ls_symbol_table.display_flattened_gids();
    362159}
    363160
     
    483280      exit(-1);
    484281    }
    485 
    486 #if DEBUG
    487     print_GIDS();
    488 #endif
    489     ls_symbol_table.clear();
    490 }
     282}
  • proto/SymbolTable/symtab_pbgs_div_template.cpp

    r1684 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <pbgs_div_symbol_table.h>
     3
     4#include "../symtab_common_functions.h"
     5#include "../xmlwf_common_functions.h"
    36
    47#ifdef BUFFER_PROFILING
     
    2023int buffer_base=0;
    2124int buffer_last;
    22 char * source;
    23 LineColTracker tracker;
     25
    2426TagMatcher matcher;
    25 BitBlock EOF_mask = simd<1>::constant<1>();
    26 ErrorTracker error_tracker;
    2727
    2828BitBlock elem_ends;
     
    3434PBGSDivSymbolTable pbgs_symbol_table;
    3535
    36 static inline int NameStrt_check(int pos);
    37 static inline int Name_check(int pos);
    38 static inline int PIName_check(int pos);
    39 static inline int CD_check(int pos);
    40 static inline int GenRef_check(int pos);
    41 static inline int HexRef_check(int pos);
    42 static inline int DecRef_check(int pos);
    43 static inline int AttRef_check(int pos);
    44 
    45 @global
    46 
    47 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    48 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    4936static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail);
    5037
    5138void do_process(FILE *infile, FILE *outfile);
    5239
    53 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    54 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    5540template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
    5641
    57 static inline int ScanForwardPos(BitBlock * block, int pos);
    58 static inline int compute_hash_value (int lgth, int start);
    5942static inline int ElemStart_grouping(int start_pos, int lgth); // lgth > 16
    6043template <int L> static inline int ElemEnd_grouping(int pos, int length);
    61 template <int L> static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    6244
    6345int main(int argc, char * argv[]) {
    64         char * infilename, * outfilename;
    65         FILE *infile, *outfile;
    66         struct stat fileinfo;
    67 
    68         if (argc < 2) {
    69                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    70                 exit(-1);
    71         }
    72 
    73         infilename = argv[1];
    74         stat(infilename, &fileinfo);
    75         infile = fopen(infilename, "rb");
    76         if (!infile) {
    77                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    78                 exit(-1);
    79         }
    80 
    81         if (argc < 3) outfile = stdout;
    82         else {
    83                 outfilename = argv[2];
    84                 outfile = fopen(outfilename, "wb");
    85                 if (!outfile) {
    86                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    87                         exit(-1);
    88                 }
    89         }
     46    char * infilename, * outfilename;
     47    FILE *infile, *outfile;
     48
     49    getFilenames(argc, argv, infilename, outfilename);
     50    openInputOutputFiles(infilename, outfilename,
     51                         infile, outfile);
    9052
    9153//      PERF_SEC_BIND(1);
    9254
    93         PERF_SEC_INIT(parser_timer);
    94 
    95         do_process(infile, outfile);
    96 
    97         PERF_SEC_DUMP(parser_timer);
    98 
    99         PERF_SEC_DESTROY(parser_timer);
    100 
    101         fclose(infile);
    102         fclose(outfile);
     55    PERF_SEC_INIT(parser_timer);
     56
     57    // store symbols form text to Symbol Table
     58    do_process(infile, outfile);
     59
     60    PERF_SEC_DUMP(parser_timer);
     61
     62    PERF_SEC_DESTROY(parser_timer);
     63
     64    fclose(infile);
     65    fclose(outfile);
     66
     67#if PRINT_SYMBOL_DISTRIBUTION
     68//    print_GIDS();
     69    pbgs_symbol_table.Print_Symbol_Table_Distribution();
     70#endif
    10371
    10472        return(0);
    105 }
    106 
    107 /* s2p Definitions */
    108 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    109   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    110         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    111 }
    112 
    113 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    114   s2p_do_block(U8, basis_bits);
    115   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    116   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    117   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    118   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    119   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    120   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    121   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    122   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    123 }
    124 
    125 static inline int ScanForwardPos(BitBlock * block, int pos)
    126 {
    127     BitBlock s = block[0];
    128     BitBlock temp = simd_and(s, simd<128>::sll(simd<2>::constant<3>(), convert(pos)));
    129 
    130     if (bitblock_has_bit(temp))
    131     {
    132         return count_forward_zeroes (temp);
    133     }
    134     return 0;
    135 }
    136 
    137 static inline int compute_hash_value (int lgth, int start)
    138 {
    139     unsigned int offset_bit = start + 128;
    140     uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
    141     return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
    14273}
    14374
     
    14677static inline int ElemEnd_grouping(int end) {
    14778    int start = end - L;
    148     int hashvalue = compute_hash_value(L, start - block_base);
     79    int hashvalue = compute_hash_value(L, start - block_base, hashvalues);
    14980    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
    15081    gids.push_back(gid);
     
    16192// length > 16
    16293static inline int ElemStart_grouping(int start, int lgth) {
    163     int hashvalue = compute_hash_value(lgth, start - block_base);
     94    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
    16495    int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
    16596    gids.push_back(gid);
     
    171102#endif
    172103    return 0;
    173 }
    174 
    175 static inline int NameStrt_check(int pos) {
    176         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
    177               return XMLTestSuiteError::NAME_START;
    178         }
    179         return 0;
    180 }
    181 
    182 static inline int Name_check(int pos) {
    183         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
    184                   return XMLTestSuiteError::NAME;
    185         }
    186         return 0;
    187 }
    188 
    189 static inline int PIName_check(int pos, int file_pos) {
    190         if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
    191               // "<?xml" legal at start of file.
    192               if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
    193                   return XMLTestSuiteError::XMLPINAME;
    194               }
    195         }
    196         return 0;
    197 }
    198 
    199 static inline int CD_check(int pos) {
    200         if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
    201                   return XMLTestSuiteError::CDATA;
    202         }
    203         return 0;
    204 }
    205 
    206 static inline int GenRef_check(int pos) {
    207         unsigned char* s = (unsigned char*)&source[pos];
    208         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    209               return XMLTestSuiteError::UNDEFREF;
    210         }
    211         return 0;
    212 }
    213 
    214 static inline int HexRef_check(int pos) {
    215         unsigned char* s = (unsigned char*)&source[pos];
    216         int ch_val = 0;
    217         while(at_HexDigit<ASCII>(s)){
    218           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    219           if (ch_val> 0x10FFFF ){
    220                 return XMLTestSuiteError::CHARREF;
    221           }
    222           s++;
    223         }
    224         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    225           return XMLTestSuiteError::CHARREF;
    226         }
    227         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    228           return XMLTestSuiteError::XML10CHARREF;
    229         }
    230         return 0;
    231 }
    232 
    233 static inline int DecRef_check(int pos) {
    234         unsigned char* s = (unsigned char*)&source[pos];
    235         int ch_val = 0;
    236         while(at_HexDigit<ASCII>(s)){
    237           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    238           if (ch_val> 0x10FFFF ){
    239                         return XMLTestSuiteError::CHARREF;
    240           }
    241           s++;
    242         }
    243         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    244                   return XMLTestSuiteError::CHARREF;
    245         }
    246         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    247                   return XMLTestSuiteError::XML10CHARREF;
    248         }
    249         return 0;
    250 }
    251 
    252 static inline int AttRef_check(int pos) {
    253         unsigned char* s = (unsigned char*)&source[pos];
    254         int ch_val = 0;
    255         if(s[0]=='#'){
    256           s++;
    257           if(s[0]=='x' || s[0]=='X'){
    258             s++;
    259             while(at_HexDigit<ASCII>(s)){
    260               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    261               s++;
    262             }
    263           }
    264           else{
    265             while(at_HexDigit<ASCII>(s)){
    266               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    267               s++;
    268             }
    269           }
    270           if (ch_val==60){
    271             return XMLTestSuiteError::ATTREF;
    272           }
    273         }
    274         else if(at_Ref_lt<ASCII>(s)){
    275           return XMLTestSuiteError::ATTREF;
    276         }
    277         return 0;
    278 }
    279 
    280 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    281 
    282         BitBlockForwardIterator end;
    283         int pos, block_pos;
    284 
    285         while(start != end) {
    286 
    287                 block_pos = block_base + *start;
    288                 int rv = is_valid(block_pos);
    289 
    290                 if (rv) {
    291                         int error_line, error_column;
    292                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    293                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    294                         exit(-1);
    295                 }
    296                 start++;
    297         }
    298 }
    299 
    300 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    301 
    302         BitBlockForwardIterator end;
    303         int pos, block_pos, file_pos;
    304 
    305         while(start != end) {
    306 
    307                 block_pos = block_base + *start;
    308                 file_pos = block_pos+buffer_base;
    309 
    310 
    311                 int rv = is_valid(block_pos, file_pos);
    312 
    313                 if (rv) {
    314                         int error_line, error_column;
    315                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    316                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    317                         exit(-1);
    318                 }
    319                 start++;
    320         }
    321104}
    322105
     
    388171        int lgth = count_forward_zeroes(elem_ends)-last_elem_start;
    389172        int start = block_base + last_elem_start;
    390         int hashvalue = compute_hash_value(lgth, last_elem_start);
     173        int hashvalue = compute_hash_value(lgth, last_elem_start, hashvalues);
    391174        int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
    392175        gids.push_back(gid);
     
    494277    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    495278    tracker.AdvanceBlock();
    496 }
    497 
    498 static inline void print_GIDS()
    499 {
    500     int span_count = gids.size();
    501     for(int i=0;i<span_count;i++) {
    502              cout << gids[i] << " ";
    503     }
    504     cout << endl;
    505 }
    506 
    507 static inline int test(int)
    508 {
    509     return 0;
    510279}
    511280
     
    630399      exit(-1);
    631400    }
    632 
    633 //  print_GIDS();
    634 #if DEBUG
    635     pbgs_symbol_table.Print_Symbol_Table_Distribution();
    636 #endif
    637 }
     401}
  • proto/SymbolTable/symtab_pbgs_identity_template.cpp

    r1684 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <pbgs_identity_symbol_table.h>
     3
     4#include "../symtab_common_functions.h"
     5#include "../xmlwf_common_functions.h"
     6
    37//#define STREAMSCAN
    48#ifdef BUFFER_PROFILING
     
    1822int block_base=0;
    1923int buffer_base=0;
    20 char * source;
    2124int buffer_last;
    2225
    23 LineColTracker tracker;
    2426TagMatcher matcher;
    25 ErrorTracker error_tracker;
    26 BitBlock EOF_mask = simd<1>::constant<1>();
    2727
    2828BitBlock elem_starts;
     
    3434
    3535
     36#ifdef STREAMSCAN
    3637static inline int NameStrt_check(int pos);
    3738static inline int Name_check(int pos);
     
    4243static inline int DecRef_check(int pos);
    4344static inline int AttRef_check(int pos);
    44 
    45 @global
    46 
    47 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    48 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     45#endif
     46
    4947static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, int chars_avail);
    5048
    5149void do_process(FILE *infile, FILE *outfile);
    5250
    53 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    54 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    5551template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
    5652
    57 static inline int ScanBackwardPos(BitBlock * block, int pos);
    58 static inline int compute_hash_value (int lgth, int start);
    5953template <int L> static inline int ElemEnd_grouping(int pos);
    6054
    6155int main(int argc, char * argv[]) {
    6256
    63         char * infilename, * outfilename;
    64         FILE *infile, *outfile;
    65         struct stat fileinfo;
    66 
    67         if (argc < 2) {
    68                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    69                 exit(-1);
    70         }
    71 
    72         infilename = argv[1];
    73         stat(infilename, &fileinfo);
    74         infile = fopen(infilename, "rb");
    75         if (!infile) {
    76                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    77                 exit(-1);
    78         }
    79 
    80         if (argc < 3) outfile = stdout;
    81         else {
    82                 outfilename = argv[2];
    83                 outfile = fopen(outfilename, "wb");
    84                 if (!outfile) {
    85                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    86                         exit(-1);
    87                 }
    88         }
     57    char * infilename, * outfilename;
     58    FILE *infile, *outfile;
     59
     60    getFilenames(argc, argv, infilename, outfilename);
     61    openInputOutputFiles(infilename, outfilename,
     62                         infile, outfile);
    8963
    9064//      PERF_SEC_BIND(1);
    9165
    92         PERF_SEC_INIT(parser_timer);
    93 
    94         do_process(infile, outfile);
    95 
    96         PERF_SEC_DUMP(parser_timer);
    97 
    98         PERF_SEC_DESTROY(parser_timer);
    99 
    100         fclose(infile);
    101         fclose(outfile);
    102 
    103         return(0);
    104 }
    105 
    106 /* s2p Definitions */
    107 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    108   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    109         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    110 }
    111 
    112 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    113   s2p_do_block(U8, basis_bits);
    114   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    115   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    116   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    117   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    118   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    119   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    120   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    121   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    122 }
    123 
    124 static inline int ScanBackwardPos(BitBlock * block, int pos)
    125 {
    126     BitBlock s = block[0];
    127     BitBlock temp = simd_and( s, simd_not(simd<128>::sll(simd<2>::constant<3>(), convert(pos))) );
    128 
    129     if (bitblock_has_bit(temp))
    130     {
    131         // sizeof (BitBlock)*8 - cbzl( s & ~(~0 << pos)) - 1;
    132         return BLOCK_SIZE - count_reverse_zeroes (temp) - 1;
    133     }
    134     else
    135     {
    136         //handle boundary case
    137         return previous_block_last_elem_start - 1;
    138     }
    139 }
    140 
    141 static inline int compute_hash_value (int lgth, int start)
    142 {
    143     unsigned int offset_bit = start + 128;
    144     uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
    145     return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
     66    PERF_SEC_INIT(parser_timer);
     67
     68    // store symbols form text to Symbol Table
     69    do_process(infile, outfile);
     70
     71    PERF_SEC_DUMP(parser_timer);
     72
     73    PERF_SEC_DESTROY(parser_timer);
     74
     75    fclose(infile);
     76    fclose(outfile);
     77
     78#if PRINT_SYMBOL_DISTRIBUTION
     79//    print_GIDS();
     80    pbgs_symbol_table.Print_Symbol_Table_Distribution();
     81#endif
     82
     83    return 0;
    14684}
    14785
     
    14987static inline int ElemEnd_grouping(int end) {
    15088    int start = end - L;
    151     int hashvalue = compute_hash_value(L, start - block_base);
     89    int hashvalue = compute_hash_value(L, start - block_base, hashvalues);
    15290    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
    15391    gids.push_back(gid);
     
    164102template<>
    165103inline int ElemEnd_grouping<17>(int end) {
    166     int start = ScanBackwardPos (&elem_starts, end - block_base) + block_base;
     104    int start = ScanBackwardPos (&elem_starts, end - block_base, previous_block_last_elem_start) + block_base;
    167105    int lgth = end - start;
    168     int hashvalue = compute_hash_value(lgth, start - block_base);
     106    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
    169107    int gid = 0;
    170108
     
    325263}
    326264#else
    327 static inline int NameStrt_check(int pos) {
    328         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
    329               return XMLTestSuiteError::NAME_START;
    330         }
    331         return 0;
    332 }
    333 
    334 static inline int Name_check(int pos) {
    335         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
    336                   return XMLTestSuiteError::NAME;
    337         }
    338         return 0;
    339 }
    340 
    341 static inline int PIName_check(int pos, int file_pos) {
    342         if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
    343               // "<?xml" legal at start of file.
    344               if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
    345                   return XMLTestSuiteError::XMLPINAME;
    346               }
    347         }
    348         return 0;
    349 }
    350 
    351 static inline int CD_check(int pos) {
    352         if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
    353                   return XMLTestSuiteError::CDATA;
    354         }
    355         return 0;
    356 }
    357 
    358 static inline int GenRef_check(int pos) {
    359         unsigned char* s = (unsigned char*)&source[pos];
    360         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    361               return XMLTestSuiteError::UNDEFREF;
    362         }
    363         return 0;
    364 }
    365 
    366 static inline int HexRef_check(int pos) {
    367         unsigned char* s = (unsigned char*)&source[pos];
    368         int ch_val = 0;
    369         while(at_HexDigit<ASCII>(s)){
    370           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    371           if (ch_val> 0x10FFFF ){
    372                 return XMLTestSuiteError::CHARREF;
    373           }
    374           s++;
    375         }
    376         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    377           return XMLTestSuiteError::CHARREF;
    378         }
    379         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    380           return XMLTestSuiteError::XML10CHARREF;
    381         }
    382         return 0;
    383 }
    384 
    385 static inline int DecRef_check(int pos) {
    386         unsigned char* s = (unsigned char*)&source[pos];
    387         int ch_val = 0;
    388         while(at_HexDigit<ASCII>(s)){
    389           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    390           if (ch_val> 0x10FFFF ){
    391                         return XMLTestSuiteError::CHARREF;
    392           }
    393           s++;
    394         }
    395         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    396                   return XMLTestSuiteError::CHARREF;
    397         }
    398         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    399                   return XMLTestSuiteError::XML10CHARREF;
    400         }
    401         return 0;
    402 }
    403 
    404 static inline int AttRef_check(int pos) {
    405         unsigned char* s = (unsigned char*)&source[pos];
    406         int ch_val = 0;
    407         if(s[0]=='#'){
    408           s++;
    409           if(s[0]=='x' || s[0]=='X'){
    410             s++;
    411             while(at_HexDigit<ASCII>(s)){
    412               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    413               s++;
    414             }
    415           }
    416           else{
    417             while(at_HexDigit<ASCII>(s)){
    418               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    419               s++;
    420             }
    421           }
    422           if (ch_val==60){
    423             return XMLTestSuiteError::ATTREF;
    424           }
    425         }
    426         else if(at_Ref_lt<ASCII>(s)){
    427           return XMLTestSuiteError::ATTREF;
    428         }
    429         return 0;
    430 }
    431 
    432 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    433 
    434         BitBlockForwardIterator end;
    435         int pos, block_pos;
    436 
    437         while(start != end) {
    438 
    439                 block_pos = block_base + *start;
    440                 int rv = is_valid(block_pos);
    441 
    442                 if (rv) {
    443                         int error_line, error_column;
    444                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    445                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    446                         exit(-1);
    447                 }
    448                 start++;
    449         }
    450 }
    451 
    452 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    453 
    454         BitBlockForwardIterator end;
    455         int pos, block_pos, file_pos;
    456 
    457         while(start != end) {
    458 
    459                 block_pos = block_base + *start;
    460                 file_pos = block_pos+buffer_base;
    461 
    462 
    463                 int rv = is_valid(block_pos, file_pos);
    464 
    465                 if (rv) {
    466                         int error_line, error_column;
    467                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    468                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    469                         exit(-1);
    470                 }
    471                 start++;
    472         }
    473 }
     265
     266
    474267#endif
    475268
     
    482275
    483276    while(start != end) {
    484         block_pos = block_base + *start;
     277        block_pos = block_base + *start;
    485278        ElemEnd_grouping<L>(block_pos);
    486         start++;
     279        start++;
    487280    }
    488281}
     
    883676#endif
    884677#endif
    885 }
    886 
    887 static inline void print_GIDS()
    888 {
    889     int span_count = gids.size();
    890     for(int i=0;i<span_count;i++) {
    891              cout << gids[i] << " ";
    892     }
    893     cout << endl;
    894 }
    895 
    896 static inline int test(int)
    897 {
    898     return 0;
    899678}
    900679
     
    1017796      exit(-1);
    1018797    }
    1019 //  print_GIDS();
    1020 #if DEBUG
    1021     pbgs_symbol_table.Print_Symbol_Table_Distribution();
    1022 #endif
    1023 }
    1024 
    1025 
     798}
     799
     800
  • proto/SymbolTable/symtab_pbgs_log_template.cpp

    r1684 r1721  
    11#define USE_MASK_COMPARE    //Comparison using masking technique.
    22
    3 #include "../symtab_global.h"
     3#include "../common_definitions.h"
    44#include <pbgs_log_symbol_table.h>
     5
     6#include "../symtab_common_functions.h"
     7#include "../xmlwf_common_functions.h"
    58
    69#ifdef BUFFER_PROFILING
     
    2124int buffer_base=0;
    2225int buffer_last;
    23 char * source;
    24 LineColTracker tracker;
     26
    2527TagMatcher matcher;
    26 BitBlock EOF_mask = simd<1>::constant<1>();
    27 ErrorTracker error_tracker;
    2828
    2929BitBlock elem_starts;
     
    3434PBGSLogSymbolTable pbgs_symbol_table;
    3535
    36 /* StreamScan & Post Process Declarations */
    37 //      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
    38 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
    39 
    40 static inline int NameStrt_check(int pos);
    41 static inline int Name_check(int pos);
    42 static inline int PIName_check(int pos);
    43 static inline int CD_check(int pos);
    44 static inline int GenRef_check(int pos);
    45 static inline int HexRef_check(int pos);
    46 static inline int DecRef_check(int pos);
    47 static inline int AttRef_check(int pos);
    48 
    49 @global
    50 
    51 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    52 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    5336static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail);
    5437
    5538void do_process(FILE *infile, FILE *outfile);
    5639
    57 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    58 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    5940template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
    6041
    61 static inline int ScanBackwardPos(BitBlock * block, int pos);
    62 static inline int compute_hash_value (int lgth, int start);
    6342template <int L> static inline int ElemEnd_grouping(int pos, int length);
    64 template <int L> static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    6543
    6644int main(int argc, char * argv[]) {
    67         char * infilename, * outfilename;
    68         FILE *infile, *outfile;
    69         struct stat fileinfo;
    70 
    71         if (argc < 2) {
    72                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    73                 exit(-1);
    74         }
    75 
    76         infilename = argv[1];
    77         stat(infilename, &fileinfo);
    78         infile = fopen(infilename, "rb");
    79         if (!infile) {
    80                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    81                 exit(-1);
    82         }
    83 
    84         if (argc < 3) outfile = stdout;
    85         else {
    86                 outfilename = argv[2];
    87                 outfile = fopen(outfilename, "wb");
    88                 if (!outfile) {
    89                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    90                         exit(-1);
    91                 }
    92         }
     45    char * infilename, * outfilename;
     46    FILE *infile, *outfile;
     47
     48    getFilenames(argc, argv, infilename, outfilename);
     49    openInputOutputFiles(infilename, outfilename,
     50                         infile, outfile);
    9351
    9452//      PERF_SEC_BIND(1);
    9553
    96         PERF_SEC_INIT(parser_timer);
    97 
    98         do_process(infile, outfile);
    99 
    100         PERF_SEC_DUMP(parser_timer);
    101 
    102         PERF_SEC_DESTROY(parser_timer);
    103 
    104         fclose(infile);
    105         fclose(outfile);
    106 
    107         return(0);
    108 }
    109 
    110 /* s2p Definitions */
    111 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    112   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    113         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    114 }
    115 
    116 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    117   s2p_do_block(U8, basis_bits);
    118   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    119   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    120   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    121   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    122   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    123   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    124   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    125   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    126 }
    127 
    128 static inline int ScanBackwardPos(BitBlock * block, int pos)
    129 {
    130     BitBlock s = block[0];
    131     BitBlock temp = simd_and( s, simd_not(simd<128>::sll(simd<2>::constant<3>(), convert(pos))) );
    132 
    133     if (bitblock_has_bit(temp))
    134     {
    135         // sizeof (BitBlock)*8 - cbzl( s & ~(~0 << pos)) - 1;
    136         return sizeof(BitBlock)*8 - count_reverse_zeroes (temp) - 1;
    137     }
    138     else
    139     {
    140         //handle boundary case
    141 #if DEBUG
    142         printf ("%s | block boundary case, return %i\n", __FUNCTION__, previous_block_last_elem_start - 1);
    143 #endif
    144         return previous_block_last_elem_start - 1;
    145     }
    146 }
    147 
    148 static inline int compute_hash_value (int lgth, int start)
    149 {
    150     unsigned int offset_bit = start + 128;
    151     uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
    152     return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
     54    PERF_SEC_INIT(parser_timer);
     55
     56    // store symbols form text to Symbol Table
     57    do_process(infile, outfile);
     58
     59    PERF_SEC_DUMP(parser_timer);
     60
     61    PERF_SEC_DESTROY(parser_timer);
     62
     63    fclose(infile);
     64    fclose(outfile);
     65
     66#if PRINT_SYMBOL_DISTRIBUTION
     67//    print_GIDS();
     68    pbgs_symbol_table.Print_Symbol_Table_Distribution();
     69#endif
     70    return 0;
    15371}
    15472
     
    16280inline int ElemEnd_grouping<1>(int pos, int length) {
    16381    int start = block_base + pos - length;
    164     int hashvalue = compute_hash_value(length, start - block_base);
     82    int hashvalue = compute_hash_value(length, start - block_base, hashvalues);
    16583    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_1(source + start, hashvalue);
    16684    gids.push_back(gid);
     
    18199inline int ElemEnd_grouping<2>(int pos, int length) {
    182100    int start = block_base + pos - length;
    183     int hashvalue = compute_hash_value(length, start - block_base);
     101    int hashvalue = compute_hash_value(length, start - block_base, hashvalues);
    184102    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_2(source + start, hashvalue);
    185103    gids.push_back(gid);
     
    201119inline int ElemEnd_grouping<4>(int pos, int L) {
    202120    int start = pos + block_base;
    203     int hashvalue = compute_hash_value(L, pos);
     121    int hashvalue = compute_hash_value(L, pos, hashvalues);
    204122    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_4(source + start, hashvalue, L);
    205123    gids.push_back(gid);
     
    219137inline int ElemEnd_grouping<8>(int pos,  int L) {
    220138    int start = pos + block_base;
    221     int hashvalue = compute_hash_value(L, pos);
     139    int hashvalue = compute_hash_value(L, pos, hashvalues);
    222140    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_8(source + start, hashvalue, L);
    223141    gids.push_back(gid);
     
    237155inline int ElemEnd_grouping<16>(int pos, int L) {
    238156    int start = pos + block_base;
    239     int hashvalue = compute_hash_value(L, pos);   
     157    int hashvalue = compute_hash_value(L, pos, hashvalues);
    240158    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_16(source + start, hashvalue, L);
    241159    gids.push_back(gid);
     
    255173inline int ElemEnd_grouping<17>(int pos, int lgth) {
    256174    int start = pos + block_base;
    257     int hashvalue = compute_hash_value(lgth, start - block_base);
     175    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
    258176    int gid = 0;
    259177
     
    274192#endif
    275193    return 0;
    276 }
    277 
    278 static inline int NameStrt_check(int pos) {
    279         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
    280               return XMLTestSuiteError::NAME_START;
    281         }
    282         return 0;
    283 }
    284 
    285 static inline int Name_check(int pos) {
    286         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
    287                   return XMLTestSuiteError::NAME;
    288         }
    289         return 0;
    290 }
    291 
    292 static inline int PIName_check(int pos, int file_pos) {
    293         if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
    294               // "<?xml" legal at start of file.
    295               if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
    296                   return XMLTestSuiteError::XMLPINAME;
    297               }
    298         }
    299         return 0;
    300 }
    301 
    302 static inline int CD_check(int pos) {
    303         if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
    304                   return XMLTestSuiteError::CDATA;
    305         }
    306         return 0;
    307 }
    308 
    309 static inline int GenRef_check(int pos) {
    310         unsigned char* s = (unsigned char*)&source[pos];
    311         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    312               return XMLTestSuiteError::UNDEFREF;
    313         }
    314         return 0;
    315 }
    316 
    317 static inline int HexRef_check(int pos) {
    318         unsigned char* s = (unsigned char*)&source[pos];
    319         int ch_val = 0;
    320         while(at_HexDigit<ASCII>(s)){
    321           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    322           if (ch_val> 0x10FFFF ){
    323                 return XMLTestSuiteError::CHARREF;
    324           }
    325           s++;
    326         }
    327         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    328           return XMLTestSuiteError::CHARREF;
    329         }
    330         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    331           return XMLTestSuiteError::XML10CHARREF;
    332         }
    333         return 0;
    334 }
    335 
    336 static inline int DecRef_check(int pos) {
    337         unsigned char* s = (unsigned char*)&source[pos];
    338         int ch_val = 0;
    339         while(at_HexDigit<ASCII>(s)){
    340           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    341           if (ch_val> 0x10FFFF ){
    342                         return XMLTestSuiteError::CHARREF;
    343           }
    344           s++;
    345         }
    346         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    347                   return XMLTestSuiteError::CHARREF;
    348         }
    349         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    350                   return XMLTestSuiteError::XML10CHARREF;
    351         }
    352         return 0;
    353 }
    354 
    355 static inline int AttRef_check(int pos) {
    356         unsigned char* s = (unsigned char*)&source[pos];
    357         int ch_val = 0;
    358         if(s[0]=='#'){
    359           s++;
    360           if(s[0]=='x' || s[0]=='X'){
    361             s++;
    362             while(at_HexDigit<ASCII>(s)){
    363               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    364               s++;
    365             }
    366           }
    367           else{
    368             while(at_HexDigit<ASCII>(s)){
    369               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    370               s++;
    371             }
    372           }
    373           if (ch_val==60){
    374             return XMLTestSuiteError::ATTREF;
    375           }
    376         }
    377         else if(at_Ref_lt<ASCII>(s)){
    378           return XMLTestSuiteError::ATTREF;
    379         }
    380         return 0;
    381 }
    382 
    383 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    384 
    385         BitBlockForwardIterator end;
    386         int pos, block_pos;
    387 
    388         while(start != end) {
    389 
    390                 block_pos = block_base + *start;
    391                 int rv = is_valid(block_pos);
    392 
    393                 if (rv) {
    394                         int error_line, error_column;
    395                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    396                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    397                         exit(-1);
    398                 }
    399                 start++;
    400         }
    401 }
    402 
    403 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    404 
    405         BitBlockForwardIterator end;
    406         int pos, block_pos, file_pos;
    407 
    408         while(start != end) {
    409 
    410                 block_pos = block_base + *start;
    411                 file_pos = block_pos+buffer_base;
    412 
    413 
    414                 int rv = is_valid(block_pos, file_pos);
    415 
    416                 if (rv) {
    417                         int error_line, error_column;
    418                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    419                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    420                         exit(-1);
    421                 }
    422                 start++;
    423         }
    424194}
    425195
     
    436206    while(start != end) {
    437207        end_pos = /*block_base + */*start;
    438         start_pos = ScanBackwardPos (&elem_starts, end_pos);
     208        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
    439209        length = end_pos - start_pos;
    440210        ElemEnd_grouping<L>(start_pos, length);
     
    452222    while(start != end) {
    453223        end_pos = /*block_base + */*start;
    454         start_pos = ScanBackwardPos (&elem_starts, end_pos);
     224        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
    455225        length = end_pos - start_pos;
    456226        ElemEnd_grouping<1>(end_pos, 1);
     
    468238    while(start != end) {
    469239        end_pos = /*block_base + */*start;
    470         start_pos = ScanBackwardPos (&elem_starts, end_pos);
     240        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
    471241        length = end_pos - start_pos;
    472242        ElemEnd_grouping<2>(end_pos, 2);
     
    565335    tracker.AdvanceBlock();
    566336
    567 }
    568 
    569 static inline void print_GIDS()
    570 {
    571     int span_count = gids.size();
    572     for(int i=0;i<span_count;i++) {
    573              cout << gids[i] << " ";
    574     }
    575     cout << endl;
    576337}
    577338
     
    697458    }
    698459
    699 //    pbgs_symbol_table.Print_Symbol_Table_Distribution();
    700 //  print_GIDS();
    701 }
     460}
  • proto/SymbolTable/symtab_stl_template.cpp

    r1684 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <symtab.h>
     3
     4#include "../symtab_common_functions.h"
     5#include "../xmlwf_common_functions.h"
    36
    47#ifdef BUFFER_PROFILING
     
    1821int block_base=0;
    1922int buffer_base=0;
    20 char * source;
    21 LineColTracker tracker;
     23
    2224TagMatcher matcher;
    23 ErrorTracker error_tracker;
    24 BitBlock EOF_mask = simd<1>::constant<1>();
    2525
    2626queue <size_t> elem_starts_buf;
     
    2929SymbolTable symbol_table;
    3030
    31 /* StreamScan & Post Process Declarations */
    32 //      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
    33 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
    34 
    35 static inline int NameStrt_check(int pos);
    36 static inline int Name_check(int pos);
    37 static inline int PIName_check(int pos);
    38 static inline int CD_check(int pos);
    39 static inline int GenRef_check(int pos);
    40 static inline int HexRef_check(int pos);
    41 static inline int DecRef_check(int pos);
    42 static inline int AttRef_check(int pos);
    43 
    44 @global
    45 
    46 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    47 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    4831static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, int chars_avail);
    4932void do_process(FILE *infile, FILE *outfile);
    5033
    51 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    52 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    53 
    5434static inline void do_symbol_table_lookup();
    5535
    5636
    5737int main(int argc, char * argv[]) {
    58         char * infilename, * outfilename;
    59         FILE *infile, *outfile;
    60         struct stat fileinfo;
    61 
    62         if (argc < 2) {
    63                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    64                 exit(-1);
    65         }
    66 
    67         infilename = argv[1];
    68         stat(infilename, &fileinfo);
    69         infile = fopen(infilename, "rb");
    70         if (!infile) {
    71                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    72                 exit(-1);
    73         }
    74 
    75         if (argc < 3) outfile = stdout;
    76         else {
    77                 outfilename = argv[2];
    78                 outfile = fopen(outfilename, "wb");
    79                 if (!outfile) {
    80                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    81                         exit(-1);
    82                 }
    83         }
     38    char * infilename, * outfilename;
     39    FILE *infile, *outfile;
     40
     41    getFilenames(argc, argv, infilename, outfilename);
     42    openInputOutputFiles(infilename, outfilename,
     43                         infile, outfile);
    8444
    8545//      PERF_SEC_BIND(1);
    8646
    87         PERF_SEC_INIT(parser_timer);
    88 
    89         do_process(infile, outfile);
    90 
    91         PERF_SEC_DUMP(parser_timer);
    92 
    93         PERF_SEC_DESTROY(parser_timer);
    94 
    95         fclose(infile);
    96         fclose(outfile);
    97 
    98         return(0);
    99 }
    100 
    101 /* s2p Definitions */
    102 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    103   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    104         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    105 }
    106 
    107 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    108   s2p_do_block(U8, basis_bits);
    109   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    110   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    111   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    112   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    113   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    114   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    115   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    116   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     47    PERF_SEC_INIT(parser_timer);
     48
     49    // store symbols form text to Symbol Table
     50    do_process(infile, outfile);
     51
     52    PERF_SEC_DUMP(parser_timer);
     53
     54    PERF_SEC_DESTROY(parser_timer);
     55
     56    fclose(infile);
     57    fclose(outfile);
     58
     59#if PRINT_SYMBOL_DISTRIBUTION
     60    print_GIDS(gids);
     61#endif
     62
     63    return(0);
    11764}
    11865
     
    12572        elem_ends_buf.push(buffer_base + pos);
    12673        return 0;
    127 }
    128 
    129 static inline int NameStrt_check(int pos) {
    130         if(XML_10_UTF8_NameStrt_bytes((unsigned char*)&source[pos]) == 0){
    131               return XMLTestSuiteError::NAME_START;
    132         }
    133         return 0;
    134 }
    135 
    136 static inline int Name_check(int pos) {
    137         if(XML_10_UTF8_NameChar_bytes((unsigned char*)&source[pos]) == 0){
    138                   return XMLTestSuiteError::NAME;
    139         }
    140         return 0;
    141 }
    142 
    143 static inline int PIName_check(int pos, int file_pos) {
    144         if (at_XxMmLll<ASCII>((unsigned char*)&source[pos]) && (source[pos+3]=='?' || source[pos+3]<= ' ')) {
    145               // "<?xml" legal at start of file.
    146               if (!((file_pos == 2) && at_XmlDecl_start<ASCII>((unsigned char*)&source[0]))) {
    147                   return XMLTestSuiteError::XMLPINAME;
    148               }
    149         }
    150         return 0;
    151 }
    152 
    153 static inline int CD_check(int pos) {
    154         if (!at_CDATA1<ASCII>((unsigned char*)&source[pos])){
    155                   return XMLTestSuiteError::CDATA;
    156         }
    157         return 0;
    158 }
    159 
    160 static inline int GenRef_check(int pos) {
    161         unsigned char* s = (unsigned char*)&source[pos];
    162         if (!(at_Ref_gt<ASCII>(s)||at_Ref_lt<ASCII>(s)||at_Ref_amp<ASCII>(s)||at_Ref_quot<ASCII>(s)||at_Ref_apos<ASCII>(s))){
    163               return XMLTestSuiteError::UNDEFREF;
    164         }
    165         return 0;
    166 }
    167 
    168 static inline int HexRef_check(int pos) {
    169         unsigned char* s = (unsigned char*)&source[pos];
    170         int ch_val = 0;
    171         while(at_HexDigit<ASCII>(s)){
    172           ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    173           if (ch_val> 0x10FFFF ){
    174                 return XMLTestSuiteError::CHARREF;
    175           }
    176           s++;
    177         }
    178         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    179           return XMLTestSuiteError::CHARREF;
    180         }
    181         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    182           return XMLTestSuiteError::XML10CHARREF;
    183         }
    184         return 0;
    185 }
    186 
    187 static inline int DecRef_check(int pos) {
    188         unsigned char* s = (unsigned char*)&source[pos];
    189         int ch_val = 0;
    190         while(at_HexDigit<ASCII>(s)){
    191           ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    192           if (ch_val> 0x10FFFF ){
    193                         return XMLTestSuiteError::CHARREF;
    194           }
    195           s++;
    196         }
    197         if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF)){
    198                   return XMLTestSuiteError::CHARREF;
    199         }
    200         else if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA))){
    201                   return XMLTestSuiteError::XML10CHARREF;
    202         }
    203         return 0;
    204 }
    205 
    206 static inline int AttRef_check(int pos) {
    207         unsigned char* s = (unsigned char*)&source[pos];
    208         int ch_val = 0;
    209         if(s[0]=='#'){
    210           s++;
    211           if(s[0]=='x' || s[0]=='X'){
    212             s++;
    213             while(at_HexDigit<ASCII>(s)){
    214               ch_val = HexVal<ASCII>(s[0]) + (ch_val<<4);
    215               s++;
    216             }
    217           }
    218           else{
    219             while(at_HexDigit<ASCII>(s)){
    220               ch_val = DigitVal<ASCII>(s[0]) + ch_val*10;
    221               s++;
    222             }
    223           }
    224           if (ch_val==60){
    225             return XMLTestSuiteError::ATTREF;
    226           }
    227         }
    228         else if(at_Ref_lt<ASCII>(s)){
    229           return XMLTestSuiteError::ATTREF;
    230         }
    231         return 0;
    232 }
    233 
    234 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    235 
    236         BitBlockForwardIterator end;
    237         int pos, block_pos;
    238 
    239         while(start != end) {
    240 
    241                 block_pos = block_base + *start;
    242                 int rv = is_valid(block_pos);
    243 
    244                 if (rv) {
    245                         int error_line, error_column;
    246                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    247                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    248                         exit(-1);
    249                 }
    250                 start++;
    251         }
    252 }
    253 
    254 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    255 
    256         BitBlockForwardIterator end;
    257         int pos, block_pos, file_pos;
    258 
    259         while(start != end) {
    260 
    261                 block_pos = block_base + *start;
    262                 file_pos = block_pos+buffer_base;
    263 
    264 
    265                 int rv = is_valid(block_pos, file_pos);
    266 
    267                 if (rv) {
    268                         int error_line, error_column;
    269                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    270                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    271                         exit(-1);
    272                 }
    273                 start++;
    274         }
    27574}
    27675
     
    358157    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
    359158    tracker.AdvanceBlock();
    360 }
    361 
    362 static inline void print_GIDS()
    363 {
    364     int span_count = gids.size();
    365     for(int i=0;i<span_count;i++) {
    366              cout << gids[i] << " ";
    367     }
    368     cout << endl;
    369159}
    370160
     
    490280    }
    491281
    492 #if DEBUG
    493     print_GIDS();
    494 #endif
    495 }
     282}
  • proto/SymbolTable/test/test_files/soap.xml

    r1442 r1721  
    2828</ns2:ListEntry>
    2929
     30<ns2:ListEntry xmlns:ns2='urn:develop-com:javaclass:roxtest' s:id='sid9999'>
     31<llong>3</llong>
     32<sshort>1</sshort>
     33<iint>9999</iint>
     34<ffloat>4.0</ffloat>
     35<cchar>s</cchar>
     36<ddouble>5.0</ddouble>
     37<sstring>Hello </sstring>
     38<next xsi:null='1' />
     39</ns2:ListEntry>
     40
     41<ns2:ListEntry xmlns:ns2='urn:develop-com:javaclass:roxtest' s:id='sid9999'>
     42<llong>3</llong>
     43<sshort>1</sshort>
     44<iint>9999</iint>
     45<ffloat>4.0</ffloat>
     46<cchar>s</cchar>
     47<ddouble>5.0</ddouble>
     48<sstring>Hello </sstring>
     49<next xsi:null='1' />
     50</ns2:ListEntry>
     51
    3052</s:Body>
    3153</s:Envelope>
  • proto/SymbolTable/wcd_hash_template.cpp

    r1688 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <hash_symbol_table.h>
     3
     4#include "../wcd_common_functions.h"
     5#include "../symtab_common_functions.h"
     6#include "parser_common_functions_generated.h"
    37
    48#ifdef BUFFER_PROFILING
     
    1923int buffer_base=0;
    2024char * source;
    21 LineColTracker tracker;
    22 BitBlock EOF_mask = simd<1>::constant<1>();
    2325
    2426queue <size_t> elem_starts_buf;
     
    2729HashSymbolTable symbol_table;
    2830
    29 /* StreamScan & Post Process Declarations */
    30 //      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
    31 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
    32 
    33 @global
    34 
    35 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    36 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     31template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
    3732static inline void postprocess_do_block(Dictionary& dictionary);
    38 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    39 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    4033
    4134static inline void do_symbol_table_lookup();
    4235
    43 void do_process(FILE *infile, FILE *outfile);
    44 
    4536int main(int argc, char * argv[]) {
    46         char * infilename, * outfilename;
    47         FILE *infile, *outfile;
    48         struct stat fileinfo;
    49 
    50         if (argc < 2) {
    51                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    52                 exit(-1);
    53         }
    54 
    55         infilename = argv[1];
    56         stat(infilename, &fileinfo);
    57         infile = fopen(infilename, "rb");
    58         if (!infile) {
    59                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    60                 exit(-1);
    61         }
    62 
    63         if (argc < 3) outfile = stdout;
    64         else {
    65                 outfilename = argv[2];
    66                 outfile = fopen(outfilename, "wb");
    67                 if (!outfile) {
    68                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    69                         exit(-1);
    70                 }
    71         }
     37    char * dictionaryfilename, * infilename, * outfilename;
     38    FILE * dictionaryfile, *infile, *outfile;
     39
     40    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
     41    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
     42                         dictionaryfile, infile, outfile);
     43
     44    int greatest_GID_in_dictionary;
     45    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
    7246
    7347//      PERF_SEC_BIND(1);
    7448
    75         PERF_SEC_INIT(parser_timer);
    76 
    77         do_process(infile, outfile);
    78 
    79         PERF_SEC_DUMP(parser_timer);
    80 
    81         PERF_SEC_DESTROY(parser_timer);
    82 
    83         fclose(infile);
    84         fclose(outfile);
    85 
    86         return(0);
    87 }
    88 
    89 /* s2p Definitions */
    90 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    91   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    92         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    93 }
    94 
    95 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    96   s2p_do_block(U8, basis_bits);
    97   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    98   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    99   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    100   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    101   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    102   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    103   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    104   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     49    PERF_SEC_INIT(parser_timer);
     50
     51    // store symbols form text to Symbol Table
     52    do_process<true>(infile, outfile);
     53
     54    PERF_SEC_DUMP(parser_timer);
     55
     56    PERF_SEC_DESTROY(parser_timer);
     57
     58    // gather dictionary statistics
     59    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
     60    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     61    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     62
     63    fclose(dictionaryfile);
     64    fclose(infile);
     65    fclose(outfile);
     66
     67#if PRINT_SYMBOL_DISTRIBUTION
     68    print_GIDS();
     69#endif
     70
     71    return(0);
    10572}
    10673
     
    140107}
    141108
    142 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    143 
    144         BitBlockForwardIterator end;
    145         int pos, block_pos;
    146 
    147         while(start != end) {
    148 
    149                 block_pos = block_base + *start;
    150                 int rv = is_valid(block_pos);
    151 
    152                 if (rv) {
    153                         int error_line, error_column;
    154                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    155                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    156                         exit(-1);
    157                 }
    158                 start++;
    159         }
    160 }
    161 
    162 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    163 
    164         BitBlockForwardIterator end;
    165         int pos, block_pos, file_pos;
    166 
    167         while(start != end) {
    168 
    169                 block_pos = block_base + *start;
    170                 file_pos = block_pos+buffer_base;
    171 
    172 
    173                 int rv = is_valid(block_pos, file_pos);
    174 
    175                 if (rv) {
    176                         int error_line, error_column;
    177                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    178                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    179                         exit(-1);
    180                 }
    181                 start++;
    182         }
    183 }
    184 
    185109static inline void postprocess_do_block(Dictionary& dictionary){
    186110
     
    199123}
    200124
    201 static inline void print_GIDS()
    202 {
    203     int span_count = gids.size();
    204     for(int i=0;i<span_count;i++) {
    205              cout << gids[i] << " ";
    206     }
    207     cout << endl;
    208 }
    209 
    210 void do_process(FILE *infile, FILE *outfile) {
     125template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
    211126
    212127@decl
     
    232147
    233148  if (e->content_start != 0) {
    234         memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
     149        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
    235150        buf_pos = e->content_start;
    236151        buffer_base = buf_pos;
    237         if (chars_avail == BUFFER_SIZE) {
    238                 chars_read = chars_read - e->content_start +
     152        if (chars_avail == BUFFER_SIZE) {
     153                chars_read = chars_read - e->content_start +
    239154                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
    240                 chars_avail = chars_read;
     155                chars_avail = chars_read;
    241156                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    242         }
    243         else {
    244           chars_read -=e->content_start;
     157        }
     158        else {
     159          chars_read -=e->content_start;
    245160          chars_avail -=e->content_start;
    246161        }
     
    252167
    253168    while (chars_avail == BUFFER_SIZE) {
    254       PERF_SEC_START(parser_timer);
     169      if (allow_performance_check)
     170      {
     171        PERF_SEC_START(parser_timer);
     172      }
     173
    255174      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    256175          block_base = blk*BLOCK_SIZE;
    257           s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
     176          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
    258177          @block_stmts
    259           postprocess_do_block(dictionary);
     178          postprocess_do_block(dictionary);
    260179      }
    261       PERF_SEC_END(parser_timer, chars_avail);
    262            
     180
     181      if (allow_performance_check)
     182      {
     183        PERF_SEC_END(parser_timer, chars_avail);
     184      }
    263185      int bytes_left = chars_read - chars_avail;
    264186      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
     
    270192    }
    271193/* Final Partial Buffer */
    272     PERF_SEC_START(parser_timer);
     194    if (allow_performance_check)
     195    {
     196        PERF_SEC_START(parser_timer);
     197    }
    273198
    274199    block_pos = 0;
     
    276201/* Full Blocks */
    277202    while (remaining >= BLOCK_SIZE) {
    278           block_base = block_pos;
    279           s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    280           @block_stmts
    281           postprocess_do_block(dictionary);
    282           block_pos += BLOCK_SIZE;
    283           remaining -= BLOCK_SIZE;
     203          block_base = block_pos;
     204          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
     205          @block_stmts
     206          postprocess_do_block(dictionary);
     207          block_pos += BLOCK_SIZE;
     208          remaining -= BLOCK_SIZE;
    284209    }
    285210    block_base = block_pos;
    286211    if (remaining > 0 || @any_carry) {
    287212          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    288           s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
    289           @final_block_stmts
    290           postprocess_do_block(dictionary);
     213          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
     214          @final_block_stmts
     215          postprocess_do_block(dictionary);
    291216    }
    292217    buf_pos += chars_avail;
    293218    buffer_base = buf_pos;
    294 
    295     PERF_SEC_END(parser_timer, chars_avail);
    296 
    297 #if DEBUG
    298     print_GIDS();
    299 #endif
    300 }
     219    if (allow_performance_check)
     220    {
     221        PERF_SEC_END(parser_timer, chars_avail);
     222    }
     223}
  • proto/SymbolTable/wcd_identity_template.cpp

    r1688 r1721  
    1 //#define USE_ITER
    2 
    3 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    42#include <pbgs_identity_symbol_table.h>
     3
     4#include "../wcd_common_functions.h"
     5#include "../symtab_common_functions.h"
     6#include "parser_common_functions_generated.h"
    57
    68#ifdef BUFFER_PROFILING
     
    2224int buffer_last;
    2325char * source;
    24 BitBlock EOF_mask = simd<1>::constant<1>();
    2526
    2627BitBlock elem_ends;
     
    3233PBGSIdentitySymbolTable pbgs_symbol_table;
    3334
    34 @global
    35 
    36 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    37 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    3835static inline void postprocess_do_block(Dictionary& dictionary, Hash_data hash_data);
    39 
    40 void do_process(FILE *infile, FILE *outfile);
     36template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
    4137
    4238static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
    43 
    4439static inline int ElemStart_grouping(int start_pos, int L) ;
    45 static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    46 static inline int ScanForwardPos(BitBlock * block, int pos);
    47 static inline int compute_hash_value (int lgth, int start);
    4840
    4941int main(int argc, char * argv[]) {
    50         char * infilename, * outfilename;
    51         FILE *infile, *outfile;
    52         struct stat fileinfo;
    53 
    54         if (argc < 2) {
    55                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    56                 exit(-1);
    57         }
    58 
    59         infilename = argv[1];
    60         stat(infilename, &fileinfo);
    61         infile = fopen(infilename, "rb");
    62         if (!infile) {
    63                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    64                 exit(-1);
    65         }
    66 
    67         if (argc < 3) outfile = stdout;
    68         else {
    69                 outfilename = argv[2];
    70                 outfile = fopen(outfilename, "wb");
    71                 if (!outfile) {
    72                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    73                         exit(-1);
    74                 }
    75         }
     42    char * dictionaryfilename, * infilename, * outfilename;
     43    FILE * dictionaryfile, *infile, *outfile;
     44
     45    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
     46    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
     47                         dictionaryfile, infile, outfile);
     48
     49    int greatest_GID_in_dictionary;
     50    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
    7651
    7752//      PERF_SEC_BIND(1);
    7853
    79         PERF_SEC_INIT(parser_timer);
    80 
    81         do_process(infile, outfile);
    82 
    83         PERF_SEC_DUMP(parser_timer);
    84 
    85         PERF_SEC_DESTROY(parser_timer);
    86 
    87         fclose(infile);
    88         fclose(outfile);
    89 
    90         return(0);
    91 }
    92 
    93 /* s2p Definitions */
    94 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    95   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    96         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    97 }
    98 
    99 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    100   s2p_do_block(U8, basis_bits);
    101   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    102   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    103   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    104   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    105   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    106   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    107   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    108   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    109 }
    110 
    111 static inline int ScanForwardPos(BitBlock * block, int pos)
    112 {
    113     BitBlock s = block[0];
    114     BitBlock temp = simd_and(s, simd<128>::sll(simd<2>::constant<3>(), convert(pos)));
    115 
    116     if (bitblock_has_bit(temp))
    117     {
    118         return count_forward_zeroes (temp);
    119     }
    120     else
    121     {
    122         //handle boundary case
    123         block_boundary_case = true;
    124         last_elem_start = pos - BLOCK_SIZE;
    125         return 0;
    126     }
     54    PERF_SEC_INIT(parser_timer);
     55
     56    // store symbols form text to Symbol Table
     57    do_process<true>(infile, outfile);
     58
     59    PERF_SEC_DUMP(parser_timer);
     60
     61    PERF_SEC_DESTROY(parser_timer);
     62
     63    // gather dictionary statistics
     64    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
     65    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     66    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     67
     68    fclose(dictionaryfile);
     69    fclose(infile);
     70    fclose(outfile);
     71
     72#if PRINT_SYMBOL_DISTRIBUTION
     73//    print_GIDS();
     74    pbgs_symbol_table.Print_Symbol_Table_Distribution();
     75#endif
    12776}
    12877
     
    219168            ElemStart_grouping(start_pos, lgth);
    220169        }
     170        else
     171        {
     172            //handle boundary case
     173            block_boundary_case = true;
     174            last_elem_start = start_pos - BLOCK_SIZE;
     175        }
    221176        start++;
    222177    }
     
    245200}
    246201
    247 static inline void print_GIDS()
    248 {
    249     int span_count = gids.size();
    250     for(int i=0;i<span_count;i++) {
    251              cout << gids[i] << " ";
    252     }
    253     cout << endl;
    254 }
    255 
    256 void do_process(FILE *infile, FILE *outfile) {
     202template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
    257203
    258204@decl
     
    278224
    279225  if (e->content_start != 0) {
    280         memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
     226        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
    281227        buf_pos = e->content_start;
    282228        buffer_base = buf_pos;
    283         if (chars_avail == BUFFER_SIZE) {
    284                 chars_read = chars_read - e->content_start +
     229        if (chars_avail == BUFFER_SIZE) {
     230                chars_read = chars_read - e->content_start +
    285231                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
    286                 chars_avail = chars_read;
     232                chars_avail = chars_read;
    287233                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    288         }
    289         else {
    290           chars_read -=e->content_start;
     234        }
     235        else {
     236          chars_read -=e->content_start;
    291237          chars_avail -=e->content_start;
    292238        }
     
    298244
    299245    while (chars_avail == BUFFER_SIZE) {
    300       PERF_SEC_START(parser_timer);
     246      if (allow_performance_check)
     247      {
     248        PERF_SEC_START(parser_timer);
     249      }
     250
    301251      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    302252          block_base = blk*BLOCK_SIZE;
    303           s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
     253          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
    304254          @block_stmts
    305255          postprocess_do_block(dictionary, hash_data);
    306256      }
    307       PERF_SEC_END(parser_timer, chars_avail);
    308            
     257
     258      if (allow_performance_check)
     259      {
     260        PERF_SEC_END(parser_timer, chars_avail);
     261      }
    309262      int bytes_left = chars_read - chars_avail;
    310263      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
     
    316269    }
    317270/* Final Partial Buffer */
    318     PERF_SEC_START(parser_timer);
     271    if (allow_performance_check)
     272    {
     273        PERF_SEC_START(parser_timer);
     274    }
    319275
    320276    block_pos = 0;
     
    322278/* Full Blocks */
    323279    while (remaining >= BLOCK_SIZE) {
    324           block_base = block_pos;
    325           s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    326           @block_stmts
    327           postprocess_do_block(dictionary, hash_data);
    328           block_pos += BLOCK_SIZE;
    329           remaining -= BLOCK_SIZE;
     280          block_base = block_pos;
     281          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
     282          @block_stmts
     283          postprocess_do_block(dictionary, hash_data);
     284          block_pos += BLOCK_SIZE;
     285          remaining -= BLOCK_SIZE;
    330286    }
    331287    block_base = block_pos;
    332288    if (remaining > 0 || @any_carry) {
    333289          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    334           s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
    335           @final_block_stmts
    336           postprocess_do_block(dictionary, hash_data);
     290          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
     291          @final_block_stmts
     292          postprocess_do_block(dictionary, hash_data);
    337293    }
    338294    buf_pos += chars_avail;
    339295    buffer_base = buf_pos;
    340 
    341     PERF_SEC_END(parser_timer, chars_avail);
    342 
    343 #if DEBUG
    344 //    print_GIDS();
    345     pbgs_symbol_table.Print_Symbol_Table_Distribution();
    346 #endif
    347 }
     296    if (allow_performance_check)
     297    {
     298        PERF_SEC_END(parser_timer, chars_avail);
     299    }
     300}
  • proto/SymbolTable/wcd_ls_template.cpp

    r1688 r1721  
    1 #include "../symtab_global.h"
     1// WARNING: This implementation does not run, it will give the wrong result and crash.
     2#error "This implementation does not run, it will give the wrong result and/or crash."
     3
     4#define USE_LS_SYMBOL_TABLE
     5
     6#include "../common_definitions.h"
    27#include <ls_symbol_table.h>
     8
     9#include "../wcd_common_functions.h"
     10#include "../symtab_common_functions.h"
     11#include "parser_common_functions_generated.h"
    312
    413#ifdef BUFFER_PROFILING
     
    2332queue <size_t> elem_starts_buf;
    2433queue <size_t> elem_ends_buf;
     34vector <int> gids;
    2535LSSymbolTable ls_symbol_table;
    2636LineColTracker tracker;
    2737
    28 @global
    29 
    30 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    31 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    3238static inline void postprocess_do_block(Dictionary& dictionary);
    3339
    34 void do_process(FILE *infile, FILE *outfile);
     40template<bool allow_performance_check, bool finalize_gids> void do_process(FILE *infile, FILE *outfile);
    3541
    3642static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
     
    3945
    4046int main(int argc, char * argv[]) {
    41         char * infilename, * outfilename;
    42         FILE *infile, *outfile;
    43         struct stat fileinfo;
    44 
    45         if (argc < 2) {
    46                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    47                 exit(-1);
    48         }
    49 
    50         infilename = argv[1];
    51         stat(infilename, &fileinfo);
    52         infile = fopen(infilename, "rb");
    53         if (!infile) {
    54                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    55                 exit(-1);
    56         }
    57 
    58         if (argc < 3) outfile = stdout;
    59         else {
    60                 outfilename = argv[2];
    61                 outfile = fopen(outfilename, "wb");
    62                 if (!outfile) {
    63                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    64                         exit(-1);
    65                 }
    66         }
     47    char * dictionaryfilename, * infilename, * outfilename;
     48    FILE * dictionaryfile, *infile, *outfile;
     49
     50    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
     51    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
     52                         dictionaryfile, infile, outfile);
     53    int greatest_GID_in_dictionary;
     54    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
     55
     56    ls_symbol_table.clear();
     57    rewind (dictionaryfile);
     58    do_process<true, false>(dictionaryfile, outfile);
    6759
    6860//      PERF_SEC_BIND(1);
    6961
    70         PERF_SEC_INIT(parser_timer);
    71 
    72         do_process(infile, outfile);
    73 
    74         PERF_SEC_DUMP(parser_timer);
    75 
    76         PERF_SEC_DESTROY(parser_timer);
    77 
    78         fclose(infile);
    79         fclose(outfile);
     62    PERF_SEC_INIT(parser_timer);
     63
     64    // store symbols form text to Symbol Table
     65    do_process<true, true>(infile, outfile);
     66
     67    PERF_SEC_DUMP(parser_timer);
     68
     69    PERF_SEC_DESTROY(parser_timer);
     70
     71    //print_GIDS(ls_symbol_table);
     72
     73    // gather dictionary statistics
     74    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
     75    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     76    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     77
     78    fclose(dictionaryfile);
     79    fclose(infile);
     80    fclose(outfile);
     81
     82#if PRINT_SYMBOL_DISTRIBUTION
     83        print_GIDS(ls_symbol_table);
     84#endif
    8085
    8186        return(0);
    82 }
    83 
    84 /* s2p Definitions */
    85 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    86   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    87         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    88 }
    89 
    90 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    91   s2p_do_block(U8, basis_bits);
    92   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    93   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    94   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    95   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    96   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    97   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    98   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    99   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    10087}
    10188
     
    194181}
    195182
    196 static inline void print_GIDS()
    197 {
    198     ls_symbol_table.display_flattened_symbol_values();
    199     ls_symbol_table.display_flattened_gids();
    200 }
    201 
    202 void do_process(FILE *infile, FILE *outfile) {
     183template<bool allow_performance_check, bool finalize_gids> void do_process(FILE *infile, FILE *outfile) {
    203184
    204185@decl
     
    244225
    245226    while (chars_avail == BUFFER_SIZE) {
    246       PERF_SEC_START(parser_timer);
     227      if (allow_performance_check)
     228      {
     229        PERF_SEC_START(parser_timer);
     230      }
    247231      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    248232          block_base = blk*BLOCK_SIZE;
     
    251235          postprocess_do_block(dictionary);
    252236      }
    253       PERF_SEC_END(parser_timer, chars_avail);
     237      if (allow_performance_check)
     238      {
     239        PERF_SEC_END(parser_timer, chars_avail);
     240      }
    254241
    255242      int bytes_left = chars_read - chars_avail;
     
    262249    }
    263250/* Final Partial Buffer */
    264     PERF_SEC_START(parser_timer);
     251    if (allow_performance_check)
     252    {
     253      PERF_SEC_START(parser_timer);
     254    }
    265255
    266256    block_pos = 0;
     
    285275    buffer_base = buf_pos;
    286276
    287     ls_symbol_table.bind();
    288     ls_symbol_table.finalize();
    289     PERF_SEC_END(parser_timer, chars_avail);
    290 
    291 #if DEBUG
    292     print_GIDS();
    293 #endif
    294 }
     277    if (finalize_gids)
     278    {
     279        ls_symbol_table.bind();
     280        ls_symbol_table.finalize();
     281    }
     282
     283    if (allow_performance_check)
     284    {
     285      PERF_SEC_END(parser_timer, chars_avail);
     286    }
     287
     288    // get gids
     289    if (finalize_gids)
     290    {
     291        gids = ls_symbol_table.get_flattened_gids();
     292    }
     293}
  • proto/SymbolTable/wcd_pbgs_div_template.cpp

    r1688 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <pbgs_div_symbol_table.h>
     3
     4#include "../wcd_common_functions.h"
     5#include "../symtab_common_functions.h"
     6#include "parser_common_functions_generated.h"
    37
    48#ifdef BUFFER_PROFILING
     
    2024int buffer_last;
    2125char * source;
    22 BitBlock EOF_mask = simd<1>::constant<1>();
    2326
    2427BitBlock elem_ends;
     
    3033PBGSDivSymbolTable pbgs_symbol_table;
    3134
    32 @global
    33 
    34 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    35 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    3635static inline void postprocess_do_block(Dictionary& dictionary, Hash_data hash_data);
    37 
    38 void do_process(FILE *infile, FILE *outfile);
     36template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
    3937
    4038template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
    4139
    42 static inline int ScanForwardPos(BitBlock * block, int pos);
    43 static inline int compute_hash_value (int lgth, int start);
    4440static inline int ElemStart_grouping(int start_pos, int lgth); // lgth > 16
    4541template <int L> static inline int ElemEnd_grouping(int pos, int length);
    46 template <int L> static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    4742
    4843int main(int argc, char * argv[]) {
    49         char * infilename, * outfilename;
    50         FILE *infile, *outfile;
    51         struct stat fileinfo;
    52 
    53         if (argc < 2) {
    54                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    55                 exit(-1);
    56         }
    57 
    58         infilename = argv[1];
    59         stat(infilename, &fileinfo);
    60         infile = fopen(infilename, "rb");
    61         if (!infile) {
    62                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    63                 exit(-1);
    64         }
    65 
    66         if (argc < 3) outfile = stdout;
    67         else {
    68                 outfilename = argv[2];
    69                 outfile = fopen(outfilename, "wb");
    70                 if (!outfile) {
    71                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    72                         exit(-1);
    73                 }
    74         }
     44    char * dictionaryfilename, * infilename, * outfilename;
     45    FILE * dictionaryfile, *infile, *outfile;
     46
     47    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
     48    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
     49                         dictionaryfile, infile, outfile);
     50
     51    int greatest_GID_in_dictionary;
     52    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
    7553
    7654//      PERF_SEC_BIND(1);
    7755
    78         PERF_SEC_INIT(parser_timer);
    79 
    80         do_process(infile, outfile);
    81 
    82         PERF_SEC_DUMP(parser_timer);
    83 
    84         PERF_SEC_DESTROY(parser_timer);
    85 
    86         fclose(infile);
    87         fclose(outfile);
     56    PERF_SEC_INIT(parser_timer);
     57
     58    // store symbols form text to Symbol Table
     59    do_process<true>(infile, outfile);
     60
     61    PERF_SEC_DUMP(parser_timer);
     62
     63    PERF_SEC_DESTROY(parser_timer);
     64
     65    // gather dictionary statistics
     66    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
     67    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     68    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     69
     70    fclose(dictionaryfile);
     71    fclose(infile);
     72    fclose(outfile);
     73
     74#if PRINT_SYMBOL_DISTRIBUTION
     75//    print_GIDS();
     76    pbgs_symbol_table.Print_Symbol_Table_Distribution();
     77#endif
    8878
    8979        return(0);
    90 }
    91 
    92 /* s2p Definitions */
    93 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    94   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    95         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    96 }
    97 
    98 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    99   s2p_do_block(U8, basis_bits);
    100   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    101   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    102   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    103   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    104   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    105   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    106   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    107   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    108 }
    109 
    110 static inline int ScanForwardPos(BitBlock * block, int pos)
    111 {
    112     BitBlock s = block[0];
    113     BitBlock temp = simd_and(s, simd<128>::sll(simd<2>::constant<3>(), convert(pos)));
    114 
    115     if (bitblock_has_bit(temp))
    116     {
    117         return count_forward_zeroes (temp);
    118     }
    119     return 0;
    120 }
    121 
    122 static inline int compute_hash_value (int lgth, int start)
    123 {
    124     unsigned int offset_bit = start + 128;
    125     uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
    126     return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
    12780}
    12881
     
    13184static inline int ElemEnd_grouping(int end) {
    13285    int start = end - L;
    133     int hashvalue = compute_hash_value(L, start - block_base);
     86    int hashvalue = compute_hash_value(L, start - block_base, hashvalues);
    13487    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
    13588    gids.push_back(gid);
     
    14699// length > 16
    147100static inline int ElemStart_grouping(int start, int lgth) {
    148     int hashvalue = compute_hash_value(lgth, start - block_base);
     101    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
    149102    int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
    150103    gids.push_back(gid);
     
    223176        int lgth = count_forward_zeroes(elem_ends)-last_elem_start;
    224177        int start = block_base + last_elem_start;
    225         int hashvalue = compute_hash_value(lgth, last_elem_start);
     178        int hashvalue = compute_hash_value(lgth, last_elem_start, hashvalues);
    226179        int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
    227180        gids.push_back(gid);
     
    290243}
    291244
    292 static inline void print_GIDS()
    293 {
    294     int span_count = gids.size();
    295     for(int i=0;i<span_count;i++) {
    296              cout << gids[i] << " ";
    297     }
    298     cout << endl;
    299 }
    300 
    301 void do_process(FILE *infile, FILE *outfile) {
     245template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
    302246
    303247@decl
     
    323267
    324268  if (e->content_start != 0) {
    325         memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
     269        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
    326270        buf_pos = e->content_start;
    327271        buffer_base = buf_pos;
    328         if (chars_avail == BUFFER_SIZE) {
    329                 chars_read = chars_read - e->content_start +
     272        if (chars_avail == BUFFER_SIZE) {
     273                chars_read = chars_read - e->content_start +
    330274                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
    331                 chars_avail = chars_read;
     275                chars_avail = chars_read;
    332276                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    333         }
    334         else {
    335           chars_read -=e->content_start;
     277        }
     278        else {
     279          chars_read -=e->content_start;
    336280          chars_avail -=e->content_start;
    337281        }
     
    343287
    344288    while (chars_avail == BUFFER_SIZE) {
    345       PERF_SEC_START(parser_timer);
     289      if (allow_performance_check)
     290      {
     291        PERF_SEC_START(parser_timer);
     292      }
     293
    346294      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    347295          block_base = blk*BLOCK_SIZE;
    348           s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
     296          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
    349297          @block_stmts
    350298          postprocess_do_block(dictionary, hash_data);
    351299      }
    352       PERF_SEC_END(parser_timer, chars_avail);
    353            
     300
     301      if (allow_performance_check)
     302      {
     303        PERF_SEC_END(parser_timer, chars_avail);
     304      }
    354305      int bytes_left = chars_read - chars_avail;
    355306      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
     
    361312    }
    362313/* Final Partial Buffer */
    363     PERF_SEC_START(parser_timer);
     314    if (allow_performance_check)
     315    {
     316        PERF_SEC_START(parser_timer);
     317    }
    364318
    365319    block_pos = 0;
     
    367321/* Full Blocks */
    368322    while (remaining >= BLOCK_SIZE) {
    369           block_base = block_pos;
    370           s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    371           @block_stmts
    372           postprocess_do_block(dictionary, hash_data);
    373           block_pos += BLOCK_SIZE;
    374           remaining -= BLOCK_SIZE;
     323          block_base = block_pos;
     324          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
     325          @block_stmts
     326          postprocess_do_block(dictionary, hash_data);
     327          block_pos += BLOCK_SIZE;
     328          remaining -= BLOCK_SIZE;
    375329    }
    376330    block_base = block_pos;
    377331    if (remaining > 0 || @any_carry) {
    378332          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    379           s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
    380           @final_block_stmts
    381           postprocess_do_block(dictionary, hash_data);
     333          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
     334          @final_block_stmts
     335          postprocess_do_block(dictionary, hash_data);
    382336    }
    383337    buf_pos += chars_avail;
    384338    buffer_base = buf_pos;
    385 
    386     PERF_SEC_END(parser_timer, chars_avail);
    387 
    388 #if DEBUG
    389 //    print_GIDS();
    390     pbgs_symbol_table.Print_Symbol_Table_Distribution();
    391 #endif
    392 }
     339    if (allow_performance_check)
     340    {
     341        PERF_SEC_END(parser_timer, chars_avail);
     342    }
     343}
  • proto/SymbolTable/wcd_pbgs_identity_template.cpp

    r1688 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <pbgs_identity_symbol_table.h>
     3
     4#include "../wcd_common_functions.h"
     5#include "../symtab_common_functions.h"
     6#include "parser_common_functions_generated.h"
    37
    48#ifdef BUFFER_PROFILING
     
    2024int buffer_last;
    2125char * source;
    22 LineColTracker tracker;
    23 TagMatcher matcher;
    24 BitBlock EOF_mask = simd<1>::constant<1>();
    25 ErrorTracker error_tracker;
    2626
    2727BitBlock elem_starts;
     
    3232PBGSIdentitySymbolTable pbgs_symbol_table;
    3333
    34 @global
    35 
    36 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    37 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    3834static inline void postprocess_do_block(Dictionary& dictionary, Hash_data hash_data);
    39 
    40 void do_process(FILE *infile, FILE *outfile);
     35template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
    4136
    4237template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
    43 
    44 static inline int ScanBackwardPos(BitBlock * block, int pos);
    45 static inline int compute_hash_value (int lgth, int start);
    4638template <int L> static inline int ElemEnd_grouping(int pos);
    4739
    4840int main(int argc, char * argv[]) {
    49         char * infilename, * outfilename;
    50         FILE *infile, *outfile;
    51         struct stat fileinfo;
    52 
    53         if (argc < 2) {
    54                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    55                 exit(-1);
    56         }
    57 
    58         infilename = argv[1];
    59         stat(infilename, &fileinfo);
    60         infile = fopen(infilename, "rb");
    61         if (!infile) {
    62                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    63                 exit(-1);
    64         }
    65 
    66         if (argc < 3) outfile = stdout;
    67         else {
    68                 outfilename = argv[2];
    69                 outfile = fopen(outfilename, "wb");
    70                 if (!outfile) {
    71                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    72                         exit(-1);
    73                 }
    74         }
     41    char * dictionaryfilename, * infilename, * outfilename;
     42    FILE * dictionaryfile, *infile, *outfile;
     43
     44    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
     45    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
     46                         dictionaryfile, infile, outfile);
     47
     48    int greatest_GID_in_dictionary;
     49    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
    7550
    7651//      PERF_SEC_BIND(1);
    7752
    78         PERF_SEC_INIT(parser_timer);
    79 
    80         do_process(infile, outfile);
    81 
    82         PERF_SEC_DUMP(parser_timer);
    83 
    84         PERF_SEC_DESTROY(parser_timer);
    85 
    86         fclose(infile);
    87         fclose(outfile);
    88 
    89         return(0);
    90 }
    91 
    92 /* s2p Definitions */
    93 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    94   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    95         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    96 }
    97 
    98 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    99   s2p_do_block(U8, basis_bits);
    100   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    101   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    102   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    103   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    104   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    105   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    106   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    107   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    108 }
    109 
    110 static inline int ScanBackwardPos(BitBlock * block, int pos)
    111 {
    112     BitBlock s = block[0];
    113     BitBlock temp = simd_and( s, simd_not(simd<128>::sll(simd<2>::constant<3>(), convert(pos))) );
    114 
    115     if (bitblock_has_bit(temp))
    116     {
    117         // sizeof (BitBlock)*8 - cbzl( s & ~(~0 << pos)) - 1;
    118         return BLOCK_SIZE - count_reverse_zeroes (temp) - 1;
    119     }
    120     else
    121     {
    122         //handle boundary case
    123         return previous_block_last_elem_start - 1;
    124     }
    125 }
    126 
    127 static inline int compute_hash_value (int lgth, int start)
    128 {
    129     unsigned int offset_bit = start + 128;
    130     uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
    131     return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
     53    PERF_SEC_INIT(parser_timer);
     54
     55    // store symbols form text to Symbol Table
     56    do_process<true>(infile, outfile);
     57
     58    PERF_SEC_DUMP(parser_timer);
     59
     60    PERF_SEC_DESTROY(parser_timer);
     61
     62    // gather dictionary statistics
     63    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
     64    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     65    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     66
     67    fclose(dictionaryfile);
     68    fclose(infile);
     69    fclose(outfile);
     70
     71#if PRINT_SYMBOL_DISTRIBUTION
     72//    print_GIDS();
     73    pbgs_symbol_table.Print_Symbol_Table_Distribution();
     74#endif
     75
     76    return(0);
    13277}
    13378
     
    13580static inline int ElemEnd_grouping(int end) {
    13681    int start = end - L;
    137     int hashvalue = compute_hash_value(L, start - block_base);
     82    int hashvalue = compute_hash_value(L, start - block_base, hashvalues);
    13883    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
    13984    gids.push_back(gid);
     
    15095template<>
    15196inline int ElemEnd_grouping<17>(int end) {
    152     int start = ScanBackwardPos (&elem_starts, end - block_base) + block_base;
     97    int start = ScanBackwardPos (&elem_starts, end - block_base, previous_block_last_elem_start) + block_base;
    15398    int lgth = end - start;
    154     int hashvalue = compute_hash_value(lgth, start - block_base);
     99    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
    155100    int gid = 0;
    156101
     
    300245}
    301246
    302 static inline void print_GIDS()
    303 {
    304     int span_count = gids.size();
    305     for(int i=0;i<span_count;i++) {
    306              cout << gids[i] << " ";
    307     }
    308     cout << endl;
    309 }
    310 
    311 void do_process(FILE *infile, FILE *outfile) {
     247template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
    312248
    313249@decl
     
    333269
    334270  if (e->content_start != 0) {
    335         memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
     271        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
    336272        buf_pos = e->content_start;
    337273        buffer_base = buf_pos;
    338         if (chars_avail == BUFFER_SIZE) {
    339                 chars_read = chars_read - e->content_start +
     274        if (chars_avail == BUFFER_SIZE) {
     275                chars_read = chars_read - e->content_start +
    340276                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
    341                 chars_avail = chars_read;
     277                chars_avail = chars_read;
    342278                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    343         }
    344         else {
    345           chars_read -=e->content_start;
     279        }
     280        else {
     281          chars_read -=e->content_start;
    346282          chars_avail -=e->content_start;
    347283        }
     
    353289
    354290    while (chars_avail == BUFFER_SIZE) {
    355       PERF_SEC_START(parser_timer);
     291      if (allow_performance_check)
     292      {
     293        PERF_SEC_START(parser_timer);
     294      }
     295
    356296      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    357297          block_base = blk*BLOCK_SIZE;
    358           s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
     298          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
    359299          @block_stmts
    360300          postprocess_do_block(dictionary, hash_data);
    361301      }
    362       PERF_SEC_END(parser_timer, chars_avail);
    363            
     302
     303      if (allow_performance_check)
     304      {
     305        PERF_SEC_END(parser_timer, chars_avail);
     306      }
    364307      int bytes_left = chars_read - chars_avail;
    365308      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
     
    371314    }
    372315/* Final Partial Buffer */
    373     PERF_SEC_START(parser_timer);
     316    if (allow_performance_check)
     317    {
     318        PERF_SEC_START(parser_timer);
     319    }
    374320
    375321    block_pos = 0;
     
    377323/* Full Blocks */
    378324    while (remaining >= BLOCK_SIZE) {
    379           block_base = block_pos;
    380           s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    381           @block_stmts
    382           postprocess_do_block(dictionary, hash_data);
    383           block_pos += BLOCK_SIZE;
    384           remaining -= BLOCK_SIZE;
     325          block_base = block_pos;
     326          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
     327          @block_stmts
     328          postprocess_do_block(dictionary, hash_data);
     329          block_pos += BLOCK_SIZE;
     330          remaining -= BLOCK_SIZE;
    385331    }
    386332    block_base = block_pos;
    387333    if (remaining > 0 || @any_carry) {
    388334          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    389           s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
    390           @final_block_stmts
    391           postprocess_do_block(dictionary, hash_data);
     335          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
     336          @final_block_stmts
     337          postprocess_do_block(dictionary, hash_data);
    392338    }
    393339    buf_pos += chars_avail;
    394340    buffer_base = buf_pos;
    395 
    396     PERF_SEC_END(parser_timer, chars_avail);
    397 
    398 #if DEBUG
    399 //    print_GIDS();
    400     pbgs_symbol_table.Print_Symbol_Table_Distribution();
    401 #endif
    402 }
     341    if (allow_performance_check)
     342    {
     343        PERF_SEC_END(parser_timer, chars_avail);
     344    }
     345}
  • proto/SymbolTable/wcd_pbgs_log_template.cpp

    r1688 r1721  
    11#define USE_MASK_COMPARE    //Comparison using masking technique.
    22
    3 #include "../symtab_global.h"
     3#include "../common_definitions.h"
    44#include <pbgs_log_symbol_table.h>
     5
     6#include "../wcd_common_functions.h"
     7#include "../symtab_common_functions.h"
     8#include "parser_common_functions_generated.h"
    59
    610#ifdef BUFFER_PROFILING
     
    2226int buffer_last;
    2327char * source;
    24 BitBlock EOF_mask = simd<1>::constant<1>();
     28
    2529BitBlock elem_starts;
    2630int previous_block_last_elem_start;
     
    3034PBGSLogSymbolTable pbgs_symbol_table;
    3135
    32 @global
    33 
    34 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    35 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    3636static inline void postprocess_do_block(Dictionary& dictionary, Hash_data hash_data);
    37 
    38 void do_process(FILE *infile, FILE *outfile);
     37template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
    3938
    4039template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
    41 
    42 static inline int ScanBackwardPos(BitBlock * block, int pos);
    43 static inline int compute_hash_value (int lgth, int start);
    4440template <int L> static inline int ElemEnd_grouping(int pos, int length);
    45 template <int L> static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    4641
    4742int main(int argc, char * argv[]) {
    48         char * infilename, * outfilename;
    49         FILE *infile, *outfile;
    50         struct stat fileinfo;
    51 
    52         if (argc < 2) {
    53                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    54                 exit(-1);
    55         }
    56 
    57         infilename = argv[1];
    58         stat(infilename, &fileinfo);
    59         infile = fopen(infilename, "rb");
    60         if (!infile) {
    61                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    62                 exit(-1);
    63         }
    64 
    65         if (argc < 3) outfile = stdout;
    66         else {
    67                 outfilename = argv[2];
    68                 outfile = fopen(outfilename, "wb");
    69                 if (!outfile) {
    70                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    71                         exit(-1);
    72                 }
    73         }
     43    char * dictionaryfilename, * infilename, * outfilename;
     44    FILE * dictionaryfile, *infile, *outfile;
     45
     46    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
     47    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
     48                         dictionaryfile, infile, outfile);
     49
     50    int greatest_GID_in_dictionary;
     51    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
    7452
    7553//      PERF_SEC_BIND(1);
    7654
    77         PERF_SEC_INIT(parser_timer);
    78 
    79         do_process(infile, outfile);
    80 
    81         PERF_SEC_DUMP(parser_timer);
    82 
    83         PERF_SEC_DESTROY(parser_timer);
    84 
    85         fclose(infile);
    86         fclose(outfile);
    87 
    88         return(0);
    89 }
    90 
    91 /* s2p Definitions */
    92 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    93   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    94         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    95 }
    96 
    97 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    98   s2p_do_block(U8, basis_bits);
    99   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    100   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    101   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    102   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    103   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    104   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    105   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    106   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    107 }
    108 
    109 
    110 static inline int ScanBackwardPos(BitBlock * block, int pos)
    111 {
    112     BitBlock s = block[0];
    113     BitBlock temp = simd_and( s, simd_not(simd<128>::sll(simd<2>::constant<3>(), convert(pos))) );
    114 
    115     if (bitblock_has_bit(temp))
    116     {
    117         // sizeof (BitBlock)*8 - cbzl( s & ~(~0 << pos)) - 1;
    118         return sizeof(BitBlock)*8 - count_reverse_zeroes (temp) - 1;
    119     }
    120     else
    121     {
    122         //handle boundary case
    123 #if DEBUG
    124         printf ("%s | block boundary case, return %i\n", __FUNCTION__, previous_block_last_elem_start - 1);
    125 #endif
    126         return previous_block_last_elem_start - 1;
    127     }
    128 }
    129 
    130 static inline int compute_hash_value (int lgth, int start)
    131 {
    132     unsigned int offset_bit = start + 128;
    133     uint64_t stream = *((uint64_t*)(((uint32_t*)hashvalues)+(offset_bit>>5)));
    134     return stream >> (offset_bit & 0x1F) & ~(~0 << lgth);
     55    PERF_SEC_INIT(parser_timer);
     56
     57    // store symbols form text to Symbol Table
     58    do_process<true>(infile, outfile);
     59
     60    PERF_SEC_DUMP(parser_timer);
     61
     62    PERF_SEC_DESTROY(parser_timer);
     63
     64    // gather dictionary statistics
     65    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
     66    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     67    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     68
     69    fclose(dictionaryfile);
     70    fclose(infile);
     71    fclose(outfile);
     72
     73#if PRINT_SYMBOL_DISTRIBUTION
     74//    print_GIDS();
     75    pbgs_symbol_table.Print_Symbol_Table_Distribution();
     76#endif
    13577}
    13678
     
    14486inline int ElemEnd_grouping<1>(int pos, int length) {
    14587    int start = block_base + pos - length;
    146     int hashvalue = compute_hash_value(length, start - block_base);
     88    int hashvalue = compute_hash_value(length, start - block_base, hashvalues);
    14789    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_1(source + start, hashvalue);
    14890    gids.push_back(gid);
     
    163105inline int ElemEnd_grouping<2>(int pos, int length) {
    164106    int start = block_base + pos - length;
    165     int hashvalue = compute_hash_value(length, start - block_base);
     107    int hashvalue = compute_hash_value(length, start - block_base, hashvalues);
    166108    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_2(source + start, hashvalue);
    167109    gids.push_back(gid);
     
    183125inline int ElemEnd_grouping<4>(int pos, int L) {
    184126    int start = pos + block_base;
    185     int hashvalue = compute_hash_value(L, pos);
     127    int hashvalue = compute_hash_value(L, pos, hashvalues);
    186128    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_4(source + start, hashvalue, L);
    187129    gids.push_back(gid);
     
    201143inline int ElemEnd_grouping<8>(int pos,  int L) {
    202144    int start = pos + block_base;
    203     int hashvalue = compute_hash_value(L, pos);
     145    int hashvalue = compute_hash_value(L, pos, hashvalues);
    204146    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_8(source + start, hashvalue, L);
    205147    gids.push_back(gid);
     
    219161inline int ElemEnd_grouping<16>(int pos, int L) {
    220162    int start = pos + block_base;
    221     int hashvalue = compute_hash_value(L, pos);
     163    int hashvalue = compute_hash_value(L, pos, hashvalues);
    222164    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_16(source + start, hashvalue, L);
    223165    gids.push_back(gid);
     
    237179inline int ElemEnd_grouping<17>(int pos, int lgth) {
    238180    int start = pos + block_base;
    239     int hashvalue = compute_hash_value(lgth, start - block_base);
     181    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
    240182    int gid = 0;
    241183
     
    270212    while(start != end) {
    271213        end_pos = /*block_base + */*start;
    272         start_pos = ScanBackwardPos (&elem_starts, end_pos);
     214        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
    273215        length = end_pos - start_pos;
    274216        ElemEnd_grouping<L>(start_pos, length);
     
    286228    while(start != end) {
    287229        end_pos = /*block_base + */*start;
    288         start_pos = ScanBackwardPos (&elem_starts, end_pos);
     230        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
    289231        length = end_pos - start_pos;
    290232        ElemEnd_grouping<1>(end_pos, 1);
     
    302244    while(start != end) {
    303245        end_pos = /*block_base + */*start;
    304         start_pos = ScanBackwardPos (&elem_starts, end_pos);
     246        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
    305247        length = end_pos - start_pos;
    306248        ElemEnd_grouping<2>(end_pos, 2);
     
    357299}
    358300
    359 static inline void print_GIDS()
    360 {
    361     int span_count = gids.size();
    362     for(int i=0;i<span_count;i++) {
    363              cout << gids[i] << " ";
    364     }
    365     cout << endl;
    366 }
    367 
    368 void do_process(FILE *infile, FILE *outfile) {
     301template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
    369302
    370303@decl
     
    390323
    391324  if (e->content_start != 0) {
    392         memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
     325        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
    393326        buf_pos = e->content_start;
    394327        buffer_base = buf_pos;
    395         if (chars_avail == BUFFER_SIZE) {
    396                 chars_read = chars_read - e->content_start +
     328        if (chars_avail == BUFFER_SIZE) {
     329                chars_read = chars_read - e->content_start +
    397330                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
    398                 chars_avail = chars_read;
     331                chars_avail = chars_read;
    399332                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    400         }
    401         else {
    402           chars_read -=e->content_start;
     333        }
     334        else {
     335          chars_read -=e->content_start;
    403336          chars_avail -=e->content_start;
    404337        }
     
    410343
    411344    while (chars_avail == BUFFER_SIZE) {
    412       PERF_SEC_START(parser_timer);
     345      if (allow_performance_check)
     346      {
     347        PERF_SEC_START(parser_timer);
     348      }
     349
    413350      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    414351          block_base = blk*BLOCK_SIZE;
    415           s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
     352          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
    416353          @block_stmts
    417354          postprocess_do_block(dictionary, hash_data);
    418355      }
    419       PERF_SEC_END(parser_timer, chars_avail);
    420            
     356
     357      if (allow_performance_check)
     358      {
     359        PERF_SEC_END(parser_timer, chars_avail);
     360      }
    421361      int bytes_left = chars_read - chars_avail;
    422362      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
     
    428368    }
    429369/* Final Partial Buffer */
    430     PERF_SEC_START(parser_timer);
     370    if (allow_performance_check)
     371    {
     372        PERF_SEC_START(parser_timer);
     373    }
    431374
    432375    block_pos = 0;
     
    434377/* Full Blocks */
    435378    while (remaining >= BLOCK_SIZE) {
    436           block_base = block_pos;
    437           s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    438           @block_stmts
    439           postprocess_do_block(dictionary, hash_data);
    440           block_pos += BLOCK_SIZE;
    441           remaining -= BLOCK_SIZE;
     379          block_base = block_pos;
     380          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
     381          @block_stmts
     382          postprocess_do_block(dictionary, hash_data);
     383          block_pos += BLOCK_SIZE;
     384          remaining -= BLOCK_SIZE;
    442385    }
    443386    block_base = block_pos;
    444387    if (remaining > 0 || @any_carry) {
    445388          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    446           s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
    447           @final_block_stmts
    448           postprocess_do_block(dictionary, hash_data);
     389          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
     390          @final_block_stmts
     391          postprocess_do_block(dictionary, hash_data);
    449392    }
    450393    buf_pos += chars_avail;
    451394    buffer_base = buf_pos;
    452 
    453     PERF_SEC_END(parser_timer, chars_avail);
    454 
    455 #if DEBUG
    456 //    print_GIDS();
    457     pbgs_symbol_table.Print_Symbol_Table_Distribution();
    458 #endif
    459 }
     395    if (allow_performance_check)
     396    {
     397        PERF_SEC_END(parser_timer, chars_avail);
     398    }
     399}
  • proto/SymbolTable/wcd_stl_template.cpp

    r1688 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <symtab.h>
     3
     4#include "../wcd_common_functions.h"
     5#include "../symtab_common_functions.h"
     6#include "parser_common_functions_generated.h"
    37
    48#ifdef BUFFER_PROFILING
     
    1923int buffer_base=0;
    2024char * source;
    21 BitBlock EOF_mask = simd<1>::constant<1>();
    22 LineColTracker tracker;
    2325
    2426queue <size_t> elem_starts_buf;
     
    2729SymbolTable symbol_table;
    2830
    29 @global
    30 
    31 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    32 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     31template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
    3332static inline void postprocess_do_block(Dictionary& dictionary);
    3433static inline void do_symbol_table_lookup();
    3534
    36 void do_process(FILE *infile, FILE *outfile);
    37 
    3835int main(int argc, char * argv[]) {
    39         char * infilename, * outfilename;
    40         FILE *infile, *outfile;
    41         struct stat fileinfo;
    42 
    43         if (argc < 2) {
    44                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    45                 exit(-1);
    46         }
    47 
    48         infilename = argv[1];
    49         stat(infilename, &fileinfo);
    50         infile = fopen(infilename, "rb");
    51         if (!infile) {
    52                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    53                 exit(-1);
    54         }
    55 
    56         if (argc < 3) outfile = stdout;
    57         else {
    58                 outfilename = argv[2];
    59                 outfile = fopen(outfilename, "wb");
    60                 if (!outfile) {
    61                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    62                         exit(-1);
    63                 }
    64         }
     36    char * dictionaryfilename, * infilename, * outfilename;
     37    FILE * dictionaryfile, *infile, *outfile;
     38
     39    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
     40    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
     41                         dictionaryfile, infile, outfile);
     42
     43    int greatest_GID_in_dictionary;
     44    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
    6545
    6646//      PERF_SEC_BIND(1);
    6747
    68         PERF_SEC_INIT(parser_timer);
    69 
    70         do_process(infile, outfile);
    71 
    72         PERF_SEC_DUMP(parser_timer);
    73 
    74         PERF_SEC_DESTROY(parser_timer);
    75 
    76         fclose(infile);
    77         fclose(outfile);
    78 
    79         return(0);
    80 }
    81 
    82 /* s2p Definitions */
    83 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    84   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    85         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    86 }
    87 
    88 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    89   s2p_do_block(U8, basis_bits);
    90   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    91   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    92   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    93   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    94   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    95   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    96   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    97   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     48    PERF_SEC_INIT(parser_timer);
     49
     50    // store symbols form text to Symbol Table
     51    do_process<true>(infile, outfile);
     52
     53    PERF_SEC_DUMP(parser_timer);
     54
     55    PERF_SEC_DESTROY(parser_timer);
     56
     57    // gather dictionary statistics
     58    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
     59    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     60    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     61
     62    fclose(dictionaryfile);
     63    fclose(infile);
     64    fclose(outfile);
     65
     66#if PRINT_SYMBOL_DISTRIBUTION
     67    print_GIDS();
     68#endif
     69
     70    return(0);
    9871}
    9972
     
    133106}
    134107
    135 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    136 
    137         BitBlockForwardIterator end;
    138         int pos, block_pos;
    139 
    140         while(start != end) {
    141 
    142                 block_pos = block_base + *start;
    143                 int rv = is_valid(block_pos);
    144 
    145                 if (rv) {
    146                         int error_line, error_column;
    147                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    148                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    149                         exit(-1);
    150                 }
    151                 start++;
    152         }
    153 }
    154 
    155 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    156 
    157         BitBlockForwardIterator end;
    158         int pos, block_pos, file_pos;
    159 
    160         while(start != end) {
    161 
    162                 block_pos = block_base + *start;
    163                 file_pos = block_pos+buffer_base;
    164 
    165 
    166                 int rv = is_valid(block_pos, file_pos);
    167 
    168                 if (rv) {
    169                         int error_line, error_column;
    170                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    171                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    172                         exit(-1);
    173                 }
    174                 start++;
    175         }
    176 }
    177 
    178108static inline void postprocess_do_block(Dictionary& dictionary){
    179109
     
    193123}
    194124
    195 static inline void print_GIDS()
    196 {
    197     int span_count = gids.size();
    198     for(int i=0;i<span_count;i++) {
    199              cout << gids[i] << " ";
    200     }
    201     cout << endl;
    202 }
    203 
    204 void do_process(FILE *infile, FILE *outfile) {
     125template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
    205126
    206127@decl
     
    246167
    247168    while (chars_avail == BUFFER_SIZE) {
    248       PERF_SEC_START(parser_timer);
     169      if (allow_performance_check)
     170      {
     171        PERF_SEC_START(parser_timer);
     172      }
    249173      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    250174          block_base = blk*BLOCK_SIZE;
     
    253177          postprocess_do_block(dictionary);
    254178      }
    255       PERF_SEC_END(parser_timer, chars_avail);
    256            
     179      if (allow_performance_check)
     180      {
     181        PERF_SEC_END(parser_timer, chars_avail);
     182      }
     183
    257184      int bytes_left = chars_read - chars_avail;
    258185      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
     
    264191    }
    265192/* Final Partial Buffer */
    266     PERF_SEC_START(parser_timer);
     193    if (allow_performance_check)
     194    {
     195      PERF_SEC_START(parser_timer);
     196    }
    267197
    268198    block_pos = 0;
     
    287217    buffer_base = buf_pos;
    288218
    289     PERF_SEC_END(parser_timer, chars_avail);
    290 
    291 #if DEBUG
    292     print_GIDS();
    293 #endif
    294 }
     219    if (allow_performance_check)
     220    {
     221      PERF_SEC_END(parser_timer, chars_avail);
     222    }
     223}
Note: See TracChangeset for help on using the changeset viewer.