Ignore:
Timestamp:
Nov 21, 2011, 4:09:54 PM (8 years ago)
Author:
vla24
Message:

SymbolTable?: completed dictionary implementation and refactored templates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/SymbolTable/wcd_identity_template.cpp

    r1688 r1721  
    1 //#define USE_ITER
    2 
    3 #include "../symtab_global.h"
     1#include "../common_definitions.h"
    42#include <pbgs_identity_symbol_table.h>
     3
     4#include "../wcd_common_functions.h"
     5#include "../symtab_common_functions.h"
     6#include "parser_common_functions_generated.h"
    57
    68#ifdef BUFFER_PROFILING
     
    2224int buffer_last;
    2325char * source;
    24 BitBlock EOF_mask = simd<1>::constant<1>();
    2526
    2627BitBlock elem_ends;
     
    3233PBGSIdentitySymbolTable pbgs_symbol_table;
    3334
    34 @global
    35 
    36 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits);
    37 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask);
    3835static inline void postprocess_do_block(Dictionary& dictionary, Hash_data hash_data);
    39 
    40 void do_process(FILE *infile, FILE *outfile);
     36template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
    4137
    4238static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
    43 
    4439static inline int ElemStart_grouping(int start_pos, int L) ;
    45 static inline int StreamScanLengthGrouping(ScanBlock * stream, int blk_count);
    46 static inline int ScanForwardPos(BitBlock * block, int pos);
    47 static inline int compute_hash_value (int lgth, int start);
    4840
    4941int main(int argc, char * argv[]) {
    50         char * infilename, * outfilename;
    51         FILE *infile, *outfile;
    52         struct stat fileinfo;
    53 
    54         if (argc < 2) {
    55                 printf("Usage: %s <filename> [<outputfile>]\n", argv[0]);
    56                 exit(-1);
    57         }
    58 
    59         infilename = argv[1];
    60         stat(infilename, &fileinfo);
    61         infile = fopen(infilename, "rb");
    62         if (!infile) {
    63                 fprintf(stderr, "Error: cannot open %s for input.\n", infilename);
    64                 exit(-1);
    65         }
    66 
    67         if (argc < 3) outfile = stdout;
    68         else {
    69                 outfilename = argv[2];
    70                 outfile = fopen(outfilename, "wb");
    71                 if (!outfile) {
    72                         fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    73                         exit(-1);
    74                 }
    75         }
     42    char * dictionaryfilename, * infilename, * outfilename;
     43    FILE * dictionaryfile, *infile, *outfile;
     44
     45    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
     46    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
     47                         dictionaryfile, infile, outfile);
     48
     49    int greatest_GID_in_dictionary;
     50    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
    7651
    7752//      PERF_SEC_BIND(1);
    7853
    79         PERF_SEC_INIT(parser_timer);
    80 
    81         do_process(infile, outfile);
    82 
    83         PERF_SEC_DUMP(parser_timer);
    84 
    85         PERF_SEC_DESTROY(parser_timer);
    86 
    87         fclose(infile);
    88         fclose(outfile);
    89 
    90         return(0);
    91 }
    92 
    93 /* s2p Definitions */
    94 static inline void s2p_do_block(BytePack U8[], Basis_bits & basis_bits) {
    95   s2p(U8[0], U8[1], U8[2], U8[3], U8[4], U8[5], U8[6], U8[7],
    96         basis_bits.bit_0, basis_bits.bit_1, basis_bits.bit_2, basis_bits.bit_3, basis_bits.bit_4, basis_bits.bit_5, basis_bits.bit_6, basis_bits.bit_7);
    97 }
    98 
    99 static inline void s2p_do_final_block(BytePack U8[], Basis_bits & basis_bits, BitBlock EOF_mask) {
    100   s2p_do_block(U8, basis_bits);
    101   basis_bits.bit_0 = simd_and(basis_bits.bit_0, EOF_mask);
    102   basis_bits.bit_1 = simd_and(basis_bits.bit_1, EOF_mask);
    103   basis_bits.bit_2 = simd_and(basis_bits.bit_2, EOF_mask);
    104   basis_bits.bit_3 = simd_and(basis_bits.bit_3, EOF_mask);
    105   basis_bits.bit_4 = simd_and(basis_bits.bit_4, EOF_mask);
    106   basis_bits.bit_5 = simd_and(basis_bits.bit_5, EOF_mask);
    107   basis_bits.bit_6 = simd_and(basis_bits.bit_6, EOF_mask);
    108   basis_bits.bit_7 = simd_and(basis_bits.bit_7, EOF_mask);
    109 }
    110 
    111 static inline int ScanForwardPos(BitBlock * block, int pos)
    112 {
    113     BitBlock s = block[0];
    114     BitBlock temp = simd_and(s, simd<128>::sll(simd<2>::constant<3>(), convert(pos)));
    115 
    116     if (bitblock_has_bit(temp))
    117     {
    118         return count_forward_zeroes (temp);
    119     }
    120     else
    121     {
    122         //handle boundary case
    123         block_boundary_case = true;
    124         last_elem_start = pos - BLOCK_SIZE;
    125         return 0;
    126     }
     54    PERF_SEC_INIT(parser_timer);
     55
     56    // store symbols form text to Symbol Table
     57    do_process<true>(infile, outfile);
     58
     59    PERF_SEC_DUMP(parser_timer);
     60
     61    PERF_SEC_DESTROY(parser_timer);
     62
     63    // gather dictionary statistics
     64    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
     65    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     66    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
     67
     68    fclose(dictionaryfile);
     69    fclose(infile);
     70    fclose(outfile);
     71
     72#if PRINT_SYMBOL_DISTRIBUTION
     73//    print_GIDS();
     74    pbgs_symbol_table.Print_Symbol_Table_Distribution();
     75#endif
    12776}
    12877
     
    219168            ElemStart_grouping(start_pos, lgth);
    220169        }
     170        else
     171        {
     172            //handle boundary case
     173            block_boundary_case = true;
     174            last_elem_start = start_pos - BLOCK_SIZE;
     175        }
    221176        start++;
    222177    }
     
    245200}
    246201
    247 static inline void print_GIDS()
    248 {
    249     int span_count = gids.size();
    250     for(int i=0;i<span_count;i++) {
    251              cout << gids[i] << " ";
    252     }
    253     cout << endl;
    254 }
    255 
    256 void do_process(FILE *infile, FILE *outfile) {
     202template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
    257203
    258204@decl
     
    278224
    279225  if (e->content_start != 0) {
    280         memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
     226        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
    281227        buf_pos = e->content_start;
    282228        buffer_base = buf_pos;
    283         if (chars_avail == BUFFER_SIZE) {
    284                 chars_read = chars_read - e->content_start +
     229        if (chars_avail == BUFFER_SIZE) {
     230                chars_read = chars_read - e->content_start +
    285231                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
    286                 chars_avail = chars_read;
     232                chars_avail = chars_read;
    287233                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
    288         }
    289         else {
    290           chars_read -=e->content_start;
     234        }
     235        else {
     236          chars_read -=e->content_start;
    291237          chars_avail -=e->content_start;
    292238        }
     
    298244
    299245    while (chars_avail == BUFFER_SIZE) {
    300       PERF_SEC_START(parser_timer);
     246      if (allow_performance_check)
     247      {
     248        PERF_SEC_START(parser_timer);
     249      }
     250
    301251      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    302252          block_base = blk*BLOCK_SIZE;
    303           s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
     253          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
    304254          @block_stmts
    305255          postprocess_do_block(dictionary, hash_data);
    306256      }
    307       PERF_SEC_END(parser_timer, chars_avail);
    308            
     257
     258      if (allow_performance_check)
     259      {
     260        PERF_SEC_END(parser_timer, chars_avail);
     261      }
    309262      int bytes_left = chars_read - chars_avail;
    310263      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
     
    316269    }
    317270/* Final Partial Buffer */
    318     PERF_SEC_START(parser_timer);
     271    if (allow_performance_check)
     272    {
     273        PERF_SEC_START(parser_timer);
     274    }
    319275
    320276    block_pos = 0;
     
    322278/* Full Blocks */
    323279    while (remaining >= BLOCK_SIZE) {
    324           block_base = block_pos;
    325           s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
    326           @block_stmts
    327           postprocess_do_block(dictionary, hash_data);
    328           block_pos += BLOCK_SIZE;
    329           remaining -= BLOCK_SIZE;
     280          block_base = block_pos;
     281          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
     282          @block_stmts
     283          postprocess_do_block(dictionary, hash_data);
     284          block_pos += BLOCK_SIZE;
     285          remaining -= BLOCK_SIZE;
    330286    }
    331287    block_base = block_pos;
    332288    if (remaining > 0 || @any_carry) {
    333289          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    334           s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
    335           @final_block_stmts
    336           postprocess_do_block(dictionary, hash_data);
     290          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
     291          @final_block_stmts
     292          postprocess_do_block(dictionary, hash_data);
    337293    }
    338294    buf_pos += chars_avail;
    339295    buffer_base = buf_pos;
    340 
    341     PERF_SEC_END(parser_timer, chars_avail);
    342 
    343 #if DEBUG
    344 //    print_GIDS();
    345     pbgs_symbol_table.Print_Symbol_Table_Distribution();
    346 #endif
    347 }
     296    if (allow_performance_check)
     297    {
     298        PERF_SEC_END(parser_timer, chars_avail);
     299    }
     300}
Note: See TracChangeset for help on using the changeset viewer.