source: proto/SymbolTable/wcd_hash_template.cpp @ 1741

Last change on this file since 1741 was 1741, checked in by vla24, 8 years ago

SymbolTable?: Fixed custom hashing function for parallel bitstream based grouping

File size: 6.5 KB
Line 
1#include "../common_definitions.h"
2#include <hash_symbol_table.h>
3
4#include "../wcd_common_functions.h"
5#include "../symtab_common_functions.h"
6#include "parser_common_functions_generated.h"
7
8#ifdef BUFFER_PROFILING
9        BOM_Table * parser_timer;
10
11#elif CODE_CLOCKER
12        #define NUM_EVENTS 1
13        int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
14        //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
15        //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
16        int cal_size = 20;
17        CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
18#else
19        void * parser_timer;
20#endif
21
22int block_base=0;
23int buffer_base=0;
24char * source;
25
26queue <size_t> elem_starts_buf;
27queue <size_t> elem_ends_buf;
28vector <int> gids;
29HashSymbolTable symbol_table;
30
31template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
32static inline void postprocess_do_block(Dictionary& dictionary);
33
34static inline void do_symbol_table_lookup();
35
36int main(int argc, char * argv[]) {
37    char * dictionaryfilename, * infilename, * outfilename;
38    FILE * dictionaryfile, *infile, *outfile;
39
40    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
41    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
42                         dictionaryfile, infile, outfile);
43
44    int greatest_GID_in_dictionary;
45    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
46
47//      PERF_SEC_BIND(1);
48
49    PERF_SEC_INIT(parser_timer);
50
51    // store symbols form text to Symbol Table
52    do_process<true>(infile, outfile);
53
54    PERF_SEC_DUMP(parser_timer);
55
56    PERF_SEC_DESTROY(parser_timer);
57
58    // gather dictionary statistics
59    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
60    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
61    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
62
63    fclose(dictionaryfile);
64    fclose(infile);
65    fclose(outfile);
66
67#if PRINT_SYMBOL_DISTRIBUTION
68    print_GIDS(gids);
69#endif
70
71    return(0);
72}
73
74static inline int ElemStrt_check(int pos) {
75        elem_starts_buf.push(buffer_base + pos);
76        return 0;
77}
78
79static inline int ElemEnd_check(int pos) {
80        elem_ends_buf.push(buffer_base + pos);
81        return 0;
82}
83
84static inline void do_symbol_table_lookup()
85{
86    while( !elem_starts_buf.empty() && !elem_ends_buf.empty() )
87    {
88        int start = elem_starts_buf.front();
89        int end = elem_ends_buf.front();
90        elem_starts_buf.pop();
91        elem_ends_buf.pop();
92        int length = end - start;
93
94        //lookup or insert to symbol table
95#if DEBUG
96        char* symbol = new char[length+1];
97        strncpy ( symbol, source + start - buffer_base, length );
98        symbol[length] ='\0';
99        printf ("start: %i[%i] | end: %i[%i] | length: %i | symbol: %s\n", start, start-buffer_base, end, end-buffer_base, length, symbol );
100
101        delete symbol; symbol = 0;
102#endif
103
104        int gid = symbol_table.Lookup_or_Insert_Name(source + start - buffer_base, length);
105        gids.push_back(gid);
106    }
107}
108
109static inline void postprocess_do_block(Dictionary& dictionary){
110
111    if ( bitblock_has_bit(dictionary.Word_starts))
112    {
113        BitBlockForwardIterator iter_length_grouping_starts(&dictionary.Word_starts);
114        validate_block(iter_length_grouping_starts, block_base, ElemStrt_check);
115    }
116
117    if ( bitblock_has_bit(dictionary.Word_ends) )
118    {
119        BitBlockForwardIterator iter_length_grouping_ends(&dictionary.Word_ends);
120        validate_block(iter_length_grouping_ends, block_base, ElemEnd_check);
121    }
122    do_symbol_table_lookup();
123}
124
125template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
126
127@decl
128
129  int buf_pos = 0;
130  int block_pos = 0;
131  int errpos = 0;
132  int chars_avail = 0;
133  int check_pos = 0;
134  int chars_read = 0;
135  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(SIMD_type)];
136
137  char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
138  buffer_base = buf_pos;
139  source = srcbuf;
140
141  chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
142  chars_avail = chars_read;
143  if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
144
145  Entity_Info * e = new Entity_Info;
146  e->AnalyzeSignature((unsigned char *)srcbuf);
147
148  if (e->content_start != 0) {
149        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
150        buf_pos = e->content_start;
151        buffer_base = buf_pos;
152        if (chars_avail == BUFFER_SIZE) {
153                chars_read = chars_read - e->content_start +
154                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
155                chars_avail = chars_read;
156                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
157        }
158        else {
159          chars_read -=e->content_start;
160          chars_avail -=e->content_start;
161        }
162  }
163
164@stream_stmts
165
166/* Full Buffers */
167
168    while (chars_avail == BUFFER_SIZE) {
169      if (allow_performance_check)
170      {
171        PERF_SEC_START(parser_timer);
172      }
173
174      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
175          block_base = blk*BLOCK_SIZE;
176          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
177          @block_stmts
178          postprocess_do_block(dictionary);
179      }
180
181      if (allow_performance_check)
182      {
183        PERF_SEC_END(parser_timer, chars_avail);
184      }
185      int bytes_left = chars_read - chars_avail;
186      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
187      chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
188      chars_avail = chars_read;
189      if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
190      buf_pos += chars_avail;
191      buffer_base = buf_pos;
192    }
193/* Final Partial Buffer */
194    if (allow_performance_check)
195    {
196        PERF_SEC_START(parser_timer);
197    }
198
199    block_pos = 0;
200    int remaining = chars_avail;
201/* Full Blocks */
202    while (remaining >= BLOCK_SIZE) {
203          block_base = block_pos;
204          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
205          @block_stmts
206          postprocess_do_block(dictionary);
207          block_pos += BLOCK_SIZE;
208          remaining -= BLOCK_SIZE;
209    }
210    block_base = block_pos;
211    if (remaining > 0 || @any_carry) {
212          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
213          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
214          @final_block_stmts
215          postprocess_do_block(dictionary);
216    }
217    buf_pos += chars_avail;
218    buffer_base = buf_pos;
219    if (allow_performance_check)
220    {
221        PERF_SEC_END(parser_timer, chars_avail);
222    }
223}
Note: See TracBrowser for help on using the repository browser.