Ignore:
Timestamp:
Nov 21, 2011, 4:09:54 PM (8 years ago)
Author:
vla24
Message:

SymbolTable?: completed dictionary implementation and refactored templates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/wcd_hash_template.cpp

    r1688 r1721  
    1 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    22#include <hash_symbol_table.h>
     3
     4#include "../wcd_common_functions.h"
     5#include "../symtab_common_functions.h"
     6#include "parser_common_functions_generated.h"
    37
    48#ifdef BUFFER_PROFILING
     
    1923int buffer_base=0;
    2024char * source;
    21 LineColTracker tracker;
    22 BitBlock EOF_mask = simd<1>::constant<1>();
    2325
    2426queue <size_t> elem_starts_buf;
     
    2729HashSymbolTable symbol_table;
    2830
    29 /* StreamScan & Post Process Declarations */
    30 //      static inline int StreamScanToFirst(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block)
    31 static inline int StreamScan(ScanBlock * stream, int blk_count, int ProcessPos(int), int * error_pos_in_block);
    32 
    33 @global
    34 
    35 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    36 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
     31template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
    3732static inline void postprocess_do_block(Dictionary& dictionary);
    38 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int));
    39 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int,int));
    4033
    4134static inline void do_symbol_table_lookup();
    4235
    43 void do_process(FILE *infile, FILE *outfile);
    44 
    4536int main(int argc, char * argv[]) {
    46         char * infilename, * outfilename;
    47         FILE *infile, *outfile;
    48         struct stat fileinfo;
    49 
    50         if (argc < 2) {
    51                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    52                 exit(-1);
    53         }
    54 
    55         infilename = argv[1];
    56         stat(infilename, &fileinfo);
    57         infile = fopen(infilename, "rb");
    58         if (!infile) {
    59                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    60                 exit(-1);
    61         }
    62 
    63         if (argc < 3) outfile = stdout;
    64         else {
    65                 outfilename = argv[2];
    66                 outfile = fopen(outfilename, "wb");
    67                 if (!outfile) {
    68                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    69                         exit(-1);
    70                 }
    71         }
     37    char * dictionaryfilename, * infilename, * outfilename;
     38    FILE * dictionaryfile, *infile, *outfile;
     39
     40    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
     41    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
     42                         dictionaryfile, infile, outfile);
     43
     44    int greatest_GID_in_dictionary;
     45    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
    7246
    7347//      PERF_SEC_BIND(1);
    7448
    75         PERF_SEC_INIT(parser_timer);
    76 
    77         do_process(infile, outfile);
    78 
    79         PERF_SEC_DUMP(parser_timer);
    80 
    81         PERF_SEC_DESTROY(parser_timer);
    82 
    83         fclose(infile);
    84         fclose(outfile);
    85 
    86         return(0);
    87 }
    88 
    89 /* s2p Definitions */
    90 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    91   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    92         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    93 }
    94 
    95 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    96   s2p_do_block(U8, basis_bits);
    97   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    98   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    99   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    100   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    101   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    102   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    103   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    104   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
     49    PERF_SEC_INIT(parser_timer);
     50
     51    // store symbols form text to Symbol Table
     52    do_process<true>(infile, outfile);
     53
     54    PERF_SEC_DUMP(parser_timer);
     55
     56    PERF_SEC_DESTROY(parser_timer);
     57
     58    // gather dictionary statistics
     59    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
     60    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     61    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     62
     63    fclose(dictionaryfile);
     64    fclose(infile);
     65    fclose(outfile);
     66
     67#if PRINT_SYMBOL_DISTRIBUTION
     68    print_GIDS();
     69#endif
     70
     71    return(0);
    10572}
    10673
     
    140107}
    141108
    142 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int is_valid(int)) {
    143 
    144         BitBlockForwardIterator end;
    145         int pos, block_pos;
    146 
    147         while(start != end) {
    148 
    149                 block_pos = block_base + *start;
    150                 int rv = is_valid(block_pos);
    151 
    152                 if (rv) {
    153                         int error_line, error_column;
    154                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    155                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    156                         exit(-1);
    157                 }
    158                 start++;
    159         }
    160 }
    161 
    162 static inline void validate_block(BitBlockForwardIterator & start, int block_base, int buffer_base, int is_valid(int,int)) {
    163 
    164         BitBlockForwardIterator end;
    165         int pos, block_pos, file_pos;
    166 
    167         while(start != end) {
    168 
    169                 block_pos = block_base + *start;
    170                 file_pos = block_pos+buffer_base;
    171 
    172 
    173                 int rv = is_valid(block_pos, file_pos);
    174 
    175                 if (rv) {
    176                         int error_line, error_column;
    177                         tracker.get_Line_and_Column(block_pos, error_line, error_column);
    178                         ReportError(XMLTestSuiteError::get_msg(rv), error_line, error_column);
    179                         exit(-1);
    180                 }
    181                 start++;
    182         }
    183 }
    184 
    185109static inline void postprocess_do_block(Dictionary& dictionary){
    186110
     
    199123}
    200124
    201 static inline void print_GIDS()
    202 {
    203     int span_count = gids.size();
    204     for(int i=0;i<span_count;i++) {
    205              cout << gids[i] << " ";
    206     }
    207     cout << endl;
    208 }
    209 
    210 void do_process(FILE *infile, FILE *outfile) {
     125template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
    211126
    212127@decl
     
    232147
    233148  if (e->content_start != 0) {
    234         memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
     149        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
    235150        buf_pos = e->content_start;
    236151        buffer_base = buf_pos;
    237         if (chars_avail == BUFFER_SIZE) {
    238                 chars_read = chars_read - e->content_start +
     152        if (chars_avail == BUFFER_SIZE) {
     153                chars_read = chars_read - e->content_start +
    239154                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
    240                 chars_avail = chars_read;
     155                chars_avail = chars_read;
    241156                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    242         }
    243         else {
    244           chars_read -=e->content_start;
     157        }
     158        else {
     159          chars_read -=e->content_start;
    245160          chars_avail -=e->content_start;
    246161        }
     
    252167
    253168    while (chars_avail == BUFFER_SIZE) {
    254       PERF_SEC_START(parser_timer);
     169      if (allow_performance_check)
     170      {
     171        PERF_SEC_START(parser_timer);
     172      }
     173
    255174      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    256175          block_base = blk*BLOCK_SIZE;
    257           s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
     176          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
    258177          @block_stmts
    259           postprocess_do_block(dictionary);
     178          postprocess_do_block(dictionary);
    260179      }
    261       PERF_SEC_END(parser_timer, chars_avail);
    262            
     180
     181      if (allow_performance_check)
     182      {
     183        PERF_SEC_END(parser_timer, chars_avail);
     184      }
    263185      int bytes_left = chars_read - chars_avail;
    264186      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
     
    270192    }
    271193/* Final Partial Buffer */
    272     PERF_SEC_START(parser_timer);
     194    if (allow_performance_check)
     195    {
     196        PERF_SEC_START(parser_timer);
     197    }
    273198
    274199    block_pos = 0;
     
    276201/* Full Blocks */
    277202    while (remaining >= BLOCK_SIZE) {
    278           block_base = block_pos;
    279           s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    280           @block_stmts
    281           postprocess_do_block(dictionary);
    282           block_pos += BLOCK_SIZE;
    283           remaining -= BLOCK_SIZE;
     203          block_base = block_pos;
     204          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
     205          @block_stmts
     206          postprocess_do_block(dictionary);
     207          block_pos += BLOCK_SIZE;
     208          remaining -= BLOCK_SIZE;
    284209    }
    285210    block_base = block_pos;
    286211    if (remaining > 0 || @any_carry) {
    287212          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    288           s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
    289           @final_block_stmts
    290           postprocess_do_block(dictionary);
     213          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
     214          @final_block_stmts
     215          postprocess_do_block(dictionary);
    291216    }
    292217    buf_pos += chars_avail;
    293218    buffer_base = buf_pos;
    294 
    295     PERF_SEC_END(parser_timer, chars_avail);
    296 
    297 #if DEBUG
    298     print_GIDS();
    299 #endif
    300 }
     219    if (allow_performance_check)
     220    {
     221        PERF_SEC_END(parser_timer, chars_avail);
     222    }
     223}
Note: See TracChangeset for help on using the changeset viewer.