source: proto/SymbolTable/wcd_symbol_stat_gather.cpp @ 4204

Last change on this file since 4204 was 1787, checked in by vla24, 8 years ago

Added a blank file for wcd. This file is used to measure fixed costs in various symtab impl.

File size: 13.1 KB
Line 
1#include "../common_definitions.h"
2#include <pbgs_identity_symbol_table.h>
3
4#include "../wcd_common_functions.h"
5#include "../symtab_common_functions.h"
6#include "parser_common_functions_generated.h"
7
8#include "../symbol_stat_gatherer.h"
9
10#ifdef BUFFER_PROFILING
11        BOM_Table * parser_timer;
12
13#elif CODE_CLOCKER
14        #define NUM_EVENTS 1
15        int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
16        //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
17        //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
18        int cal_size = 20;
19        CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
20#else
21        void * parser_timer;
22#endif
23
24int block_base=0;
25int buffer_base=0;
26int buffer_last;
27char * source;
28
29BitBlock elem_starts;
30int previous_block_last_elem_start;
31BytePack hashvalues[2];
32SymbolStatGatherer symbol_stat;
33
34vector <int> gids;
35PBGSIdentitySymbolTable pbgs_symbol_table;
36
37static inline void postprocess_do_block(Dictionary& dictionary, Hash_data hash_data);
38template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
39
40template <bool allow_performance_check, int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
41template <bool allow_performance_check, int L> static inline int ElemEnd_grouping(int pos);
42
43int main(int argc, char * argv[]) {
44    char * dictionaryfilename, * infilename, * outfilename;
45    FILE * dictionaryfile, *infile, *outfile;
46
47    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
48    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
49                         dictionaryfile, infile, outfile);
50
51    int greatest_GID_in_dictionary;
52    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
53    symbol_stat.clear_all();
54
55//      PERF_SEC_BIND(1);
56
57    PERF_SEC_INIT(parser_timer);
58
59    // store symbols form text to Symbol Table
60    do_process<true>(infile, outfile);
61
62    PERF_SEC_DUMP(parser_timer);
63
64    PERF_SEC_DESTROY(parser_timer);
65
66    // gather dictionary statistics
67    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
68    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
69    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
70
71    symbol_stat.print_symbol_statistic();
72
73#if PRINT_SYMBOL_DISTRIBUTION
74//    print_GIDS();
75    pbgs_symbol_table.Print_Symbol_Table_Distribution();
76#endif
77
78    fclose(dictionaryfile);
79    fclose(infile);
80    fclose(outfile);
81
82    return(0);
83}
84
85template <bool allow_performance_check, int L>
86static inline int ElemEnd_grouping(int end) {
87    int start = end - L;
88    int hashvalue = compute_hash_value(L, start - block_base, hashvalues);
89    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
90    gids.push_back(gid);
91    if (allow_performance_check)
92    {
93        symbol_stat.store_symbol_gid(gid);
94        symbol_stat.store_symbol_length(L);
95        symbol_stat.store_symbol_density(start, end);
96    }
97#if DEBUG
98    char* symbol = new char[L+1];
99    strncpy ( symbol, source + start, L );
100    symbol[L] ='\0';
101    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
102    delete symbol; symbol = 0;
103#endif
104    return 0;
105}
106
107template<>
108inline int ElemEnd_grouping<true,17>(int end) {
109    int start = ScanBackwardPos (&elem_starts, end - block_base, previous_block_last_elem_start) + block_base;
110    int lgth = end - start;
111    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
112    int gid = gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
113    gids.push_back(gid);
114
115    symbol_stat.store_symbol_length(lgth);
116    symbol_stat.store_symbol_density(start, end);
117    symbol_stat.store_symbol_gid(gid);
118
119#if DEBUG
120    char* symbol = new char[lgth+1];
121    strncpy ( symbol, source + start, lgth );
122    symbol[lgth] ='\0';
123    printf ("%s | start: %i[%i] | end: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, end, end - block_base, lgth, hashvalue, gid, symbol);
124#endif
125    return 0;
126}
127
128template<>
129inline int ElemEnd_grouping<false,17>(int end) {
130    int start = ScanBackwardPos (&elem_starts, end - block_base, previous_block_last_elem_start) + block_base;
131    int lgth = end - start;
132    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
133    int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
134    gids.push_back(gid);
135#if DEBUG
136    char* symbol = new char[lgth+1];
137    strncpy ( symbol, source + start, lgth );
138    symbol[lgth] ='\0';
139    printf ("%s | start: %i[%i] | end: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, end, end - block_base, lgth, hashvalue, gid, symbol);
140#endif
141    return 0;
142}
143
144template <bool allow_performance_check,int L>
145static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base) {
146
147    BitBlockForwardIterator end;
148    int block_pos;
149
150    while(start != end) {
151        block_pos = block_base + *start;
152        ElemEnd_grouping<allow_performance_check, L>(block_pos);
153        start++;
154    }
155}
156
157template<bool allow_performance_check>
158inline void postprocess_do_block(Dictionary& dictionary, Hash_data hash_data){
159
160    elem_starts = dictionary.Word_starts;
161    hashvalues[1] = hash_data.Hash_value;
162
163    if ( bitblock_has_bit(dictionary.Word_ends_1) )
164    {
165        BitBlockForwardIterator iter_length_grouping_1(&dictionary.Word_ends_1);
166        validate_block_length_grouping<allow_performance_check,1>(iter_length_grouping_1, block_base);
167    }
168
169    if ( bitblock_has_bit(dictionary.Word_ends_2) )
170    {
171        BitBlockForwardIterator iter_length_grouping_2(&dictionary.Word_ends_2);
172        validate_block_length_grouping<allow_performance_check,2>(iter_length_grouping_2, block_base);
173    }
174
175    if ( bitblock_has_bit(dictionary.Word_ends_3) )
176    {
177        BitBlockForwardIterator iter_length_grouping_3(&dictionary.Word_ends_3);
178        validate_block_length_grouping<allow_performance_check,3>(iter_length_grouping_3, block_base);
179    }
180
181    if ( bitblock_has_bit(dictionary.Word_ends_4) )
182    {
183        BitBlockForwardIterator iter_length_grouping_4(&dictionary.Word_ends_4);
184        validate_block_length_grouping<allow_performance_check,4>(iter_length_grouping_4, block_base);
185    }
186
187    if ( bitblock_has_bit(dictionary.Word_ends_5) )
188    {
189        BitBlockForwardIterator iter_length_grouping_5(&dictionary.Word_ends_5);
190        validate_block_length_grouping<allow_performance_check,5>(iter_length_grouping_5, block_base);
191    }
192
193    if ( bitblock_has_bit(dictionary.Word_ends_6) )
194    {
195        BitBlockForwardIterator iter_length_grouping_6(&dictionary.Word_ends_6);
196        validate_block_length_grouping<allow_performance_check,6>(iter_length_grouping_6, block_base);
197    }
198
199    if ( bitblock_has_bit(dictionary.Word_ends_7) )
200    {
201        BitBlockForwardIterator iter_length_grouping_7(&dictionary.Word_ends_7);
202        validate_block_length_grouping<allow_performance_check,7>(iter_length_grouping_7, block_base);
203    }
204
205    if ( bitblock_has_bit(dictionary.Word_ends_8) )
206    {
207        BitBlockForwardIterator iter_length_grouping_8(&dictionary.Word_ends_8);
208        validate_block_length_grouping<allow_performance_check,8>(iter_length_grouping_8, block_base);
209    }
210
211    if ( bitblock_has_bit(dictionary.Word_ends_9) )
212    {
213        BitBlockForwardIterator iter_length_grouping_9(&dictionary.Word_ends_9);
214        validate_block_length_grouping<allow_performance_check,9>(iter_length_grouping_9, block_base);
215    }
216
217    if ( bitblock_has_bit(dictionary.Word_ends_10) )
218    {
219        BitBlockForwardIterator iter_length_grouping_10(&dictionary.Word_ends_10);
220        validate_block_length_grouping<allow_performance_check,10>(iter_length_grouping_10, block_base);
221    }
222
223    if ( bitblock_has_bit(dictionary.Word_ends_11) )
224    {
225        BitBlockForwardIterator iter_length_grouping_11(&dictionary.Word_ends_11);
226        validate_block_length_grouping<allow_performance_check,11>(iter_length_grouping_11, block_base);
227    }
228
229    if ( bitblock_has_bit(dictionary.Word_ends_12) )
230    {
231        BitBlockForwardIterator iter_length_grouping_12(&dictionary.Word_ends_12);
232        validate_block_length_grouping<allow_performance_check,12>(iter_length_grouping_12, block_base);
233    }
234
235    if ( bitblock_has_bit(dictionary.Word_ends_13) )
236    {
237        BitBlockForwardIterator iter_length_grouping_13(&dictionary.Word_ends_13);
238        validate_block_length_grouping<allow_performance_check,13>(iter_length_grouping_13, block_base);
239    }
240
241    if ( bitblock_has_bit(dictionary.Word_ends_14) )
242    {
243        BitBlockForwardIterator iter_length_grouping_14(&dictionary.Word_ends_14);
244        validate_block_length_grouping<allow_performance_check,14>(iter_length_grouping_14, block_base);
245    }
246
247    if ( bitblock_has_bit(dictionary.Word_ends_15) )
248    {
249        BitBlockForwardIterator iter_length_grouping_15(&dictionary.Word_ends_15);
250        validate_block_length_grouping<allow_performance_check,15>(iter_length_grouping_15, block_base);
251    }
252
253    if ( bitblock_has_bit(dictionary.Word_ends_16) )
254    {
255        BitBlockForwardIterator iter_length_grouping_16(&dictionary.Word_ends_16);
256        validate_block_length_grouping<allow_performance_check,16>(iter_length_grouping_16, block_base);
257    }
258
259    if ( bitblock_has_bit(dictionary.Word_ends_17_and_longer) )
260    {
261        BitBlockForwardIterator iter_length_grouping_remaining(&dictionary.Word_ends_17_and_longer);
262        validate_block_length_grouping<allow_performance_check,17>(iter_length_grouping_remaining, block_base);
263    }
264
265    // Store the last starting position in case we hit boundary case
266    previous_block_last_elem_start = - count_reverse_zeroes (elem_starts);
267
268    //copy current hash value data as previous one.
269    memmove (&hashvalues[0], &hashvalues[1], 16);
270}
271
272template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
273
274@decl
275
276  int buf_pos = 0;
277  int block_pos = 0;
278  int errpos = 0;
279  int chars_avail = 0;
280  int check_pos = 0;
281  int chars_read = 0;
282  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(SIMD_type)];
283
284  char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
285  buffer_base = buf_pos;
286  source = srcbuf;
287
288  chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
289  chars_avail = chars_read;
290  if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
291
292  Entity_Info * e = new Entity_Info;
293  e->AnalyzeSignature((unsigned char *)srcbuf);
294
295  if (e->content_start != 0) {
296        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
297        buf_pos = e->content_start;
298        buffer_base = buf_pos;
299        if (chars_avail == BUFFER_SIZE) {
300                chars_read = chars_read - e->content_start +
301                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
302                chars_avail = chars_read;
303                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
304        }
305        else {
306          chars_read -=e->content_start;
307          chars_avail -=e->content_start;
308        }
309  }
310
311@stream_stmts
312
313/* Full Buffers */
314
315    while (chars_avail == BUFFER_SIZE) {
316      if (allow_performance_check)
317      {
318        PERF_SEC_START(parser_timer);
319      }
320
321      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
322          block_base = blk*BLOCK_SIZE;
323          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
324          @block_stmts
325          postprocess_do_block<allow_performance_check> (dictionary, hash_data);
326      }
327
328      if (allow_performance_check)
329      {
330        PERF_SEC_END(parser_timer, chars_avail);
331      }
332      int bytes_left = chars_read - chars_avail;
333      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
334      chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
335      chars_avail = chars_read;
336      if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
337      buf_pos += chars_avail;
338      buffer_base = buf_pos;
339    }
340/* Final Partial Buffer */
341    if (allow_performance_check)
342    {
343        PERF_SEC_START(parser_timer);
344    }
345
346    block_pos = 0;
347    int remaining = chars_avail;
348/* Full Blocks */
349    while (remaining >= BLOCK_SIZE) {
350          block_base = block_pos;
351          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
352          @block_stmts
353          postprocess_do_block<allow_performance_check>(dictionary, hash_data);
354          block_pos += BLOCK_SIZE;
355          remaining -= BLOCK_SIZE;
356    }
357    block_base = block_pos;
358    if (remaining > 0 || @any_carry) {
359          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
360          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
361          @final_block_stmts
362          postprocess_do_block<allow_performance_check>(dictionary, hash_data);
363    }
364    buf_pos += chars_avail;
365    buffer_base = buf_pos;
366    if (allow_performance_check)
367    {
368        symbol_stat.store_symbol_density(buffer_base, -1);
369        PERF_SEC_END(parser_timer, chars_avail);
370    }
371}
Note: See TracBrowser for help on using the repository browser.