source: proto/SymbolTable/wcd_pbgs_div_template.cpp @ 2940

Last change on this file since 2940 was 1792, checked in by vla24, 8 years ago

Fixed macro names. I got L1 and L2 mixed up

File size: 11.3 KB
Line 
1#include "../common_definitions.h"
2#include <pbgs_div_symbol_table.h>
3
4#include "../wcd_common_functions.h"
5#include "../symtab_common_functions.h"
6#include "parser_common_functions_generated.h"
7
8#ifdef BUFFER_PROFILING
9        BOM_Table * parser_timer;
10
11#elif CODE_CLOCKER
12        #define NUM_EVENTS 1
13#ifdef L2_PERF
14        int Events[NUM_EVENTS] = {PAPI_L2_DCM};
15#elif L1_PERF
16        int Events[NUM_EVENTS] = {PAPI_L1_DCM};
17#else
18        int Events[NUM_EVENTS] = {PAPI_BR_MSP};
19#endif
20        int cal_size = 20;
21        CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
22#else
23        void * parser_timer;
24#endif
25
26int block_base=0;
27int buffer_base=0;
28int buffer_last;
29char * source;
30
31BitBlock elem_ends;
32int last_elem_start;
33bool block_boundary_case = false;
34BytePack hashvalues[2];
35
36vector <int> gids;
37PBGSDivSymbolTable pbgs_symbol_table;
38
39static inline void postprocess_do_block(Dictionary& dictionary, Hash_data hash_data);
40template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile);
41
42template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
43
44static inline int ElemStart_grouping(int start_pos, int lgth); // lgth > 16
45template <int L> static inline int ElemEnd_grouping(int pos, int length);
46
47int main(int argc, char * argv[]) {
48    char * dictionaryfilename, * infilename, * outfilename;
49    FILE * dictionaryfile, *infile, *outfile;
50
51    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
52    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
53                         dictionaryfile, infile, outfile);
54
55    int greatest_GID_in_dictionary;
56    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
57
58//      PERF_SEC_BIND(1);
59
60    PERF_SEC_INIT(parser_timer);
61
62    // store symbols form text to Symbol Table
63    do_process<true>(infile, outfile);
64
65    PERF_SEC_DUMP(parser_timer);
66
67    PERF_SEC_DESTROY(parser_timer);
68
69    // gather dictionary statistics
70    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
71    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
72    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
73
74#if PRINT_SYMBOL_DISTRIBUTION
75//    print_GIDS();
76    pbgs_symbol_table.Print_Symbol_Table_Distribution();
77#endif
78
79    fclose(dictionaryfile);
80    fclose(infile);
81    fclose(outfile);
82
83    return(0);
84}
85
86// length in [1,16]
87template <int L>
88static inline int ElemEnd_grouping(int end) {
89    int start = end - L;
90    int hashvalue = compute_hash_value(L, start - block_base, hashvalues);
91    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
92    gids.push_back(gid);
93#if DEBUG
94    char* symbol = new char[L+1];
95    strncpy ( symbol, source + start, L );
96    symbol[L] ='\0';
97    printf ("%s | start: %i[%i] | end: %i[%i] | lgth: %i | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, L, gid, hashvalue, symbol );
98    delete symbol; symbol = 0;
99#endif
100    return 0;
101}
102
103// length > 16
104static inline int ElemStart_grouping(int start, int lgth) {
105    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
106    int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
107    gids.push_back(gid);
108#if DEBUG
109    char* symbol = new char[lgth+1];
110    strncpy ( symbol, source + start, lgth );
111    symbol[lgth] ='\0';
112    printf ("%s | start: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, lgth, hashvalue, gid, symbol);
113#endif
114    return 0;
115}
116
117// L = 2, pass in bitstream for symbols length [1,2]
118// L = 4, pass in bitstream for symbols length [3,4]
119// L = 6, pass in bitstream for symbols length [5,6]
120// L = 8, pass in bitstream for symbols length [7,8]
121// L = 10, pass in bitstream for symbols length [9,10]
122// L = 12, pass in bitstream for symbols length [11,12]
123// L = 14, pass in bitstream for symbols length [13,14]
124// L = 16, pass in bitstream for symbols length [15,16]
125// L = 17, pass in bitstream for symbols length longer than 16
126template <int L>
127static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base) {
128
129    BitBlockForwardIterator end;
130    int end_pos;
131
132    while(start != end) {
133        end_pos = *start;
134        ElemEnd_grouping<L>(end_pos + block_base);
135        start++;
136    }
137}
138
139template <>
140inline void validate_block_length_grouping<17>(BitBlockForwardIterator & start, int block_base) {
141
142    BitBlockForwardIterator end;
143    int start_pos, end_pos;
144
145    while(start != end) {
146        start_pos = *start;
147        end_pos = ScanForwardPos (&elem_ends, start_pos);
148        if (end_pos)
149        {
150            ElemStart_grouping(start_pos - 16 + block_base, end_pos - start_pos + 16);
151        }
152        else
153        {
154#if DEBUG
155            printf ("There is no more 1 bit in the block. pos: %i | sym: %c%c[%c]\n", start_pos,
156                    source[start_pos + block_base-2], source[start_pos + block_base-1], source[start_pos + block_base]);
157#endif
158            //handle boundary case
159            block_boundary_case = true;
160            last_elem_start = start_pos - 16 - BLOCK_SIZE;
161#if DEBUG
162            printf ("last_elem_start: %i\n", last_elem_start);
163#endif
164        }
165        start++;
166    }
167}
168
169static inline void postprocess_do_block(Dictionary& dictionary, Hash_data hash_data){
170
171    elem_ends = dictionary.Word_ends;
172    hashvalues[1] = hash_data.Hash_value;
173
174    // Check for block boundary case for length 16 and above
175    if (block_boundary_case)
176    {
177#if DEBUG
178        printf ("block boundary case! Special handle!\n");
179#endif
180        int lgth = count_forward_zeroes(elem_ends)-last_elem_start;
181        int start = block_base + last_elem_start;
182        int hashvalue = compute_hash_value(lgth, last_elem_start, hashvalues);
183        int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
184        gids.push_back(gid);
185#if DEBUG
186        printf ("%s | start: %i[%i] | lgth: %i | hashvalue: %i | gid: %i \n", __FUNCTION__, start, start - block_base, lgth, hashvalue, gid);
187#endif
188        block_boundary_case = false;
189    }
190
191    if ( bitblock_has_bit(dictionary.Word_ends_1_to_2) )
192    {
193        BitBlockForwardIterator iter_length_grouping_2(&dictionary.Word_ends_1_to_2);
194        validate_block_length_grouping<2>(iter_length_grouping_2, block_base);
195    }
196
197    if ( bitblock_has_bit(dictionary.Word_ends_3_to_4) )
198    {
199        BitBlockForwardIterator iter_length_grouping_4(&dictionary.Word_ends_3_to_4);
200        validate_block_length_grouping<4>(iter_length_grouping_4, block_base);
201    }
202
203    if ( bitblock_has_bit(dictionary.Word_ends_5_to_6) )
204    {
205        BitBlockForwardIterator iter_length_grouping_6(&dictionary.Word_ends_5_to_6);
206        validate_block_length_grouping<6>(iter_length_grouping_6, block_base);
207    }
208
209    if ( bitblock_has_bit(dictionary.Word_ends_7_to_8) )
210    {
211        BitBlockForwardIterator iter_length_grouping_8(&dictionary.Word_ends_7_to_8);
212        validate_block_length_grouping<8>(iter_length_grouping_8, block_base);
213    }
214
215    if ( bitblock_has_bit(dictionary.Word_ends_9_to_10) )
216    {
217        BitBlockForwardIterator iter_length_grouping_10(&dictionary.Word_ends_9_to_10);
218        validate_block_length_grouping<10>(iter_length_grouping_10, block_base);
219    }
220
221    if ( bitblock_has_bit(dictionary.Word_ends_11_to_12) )
222    {
223        BitBlockForwardIterator iter_length_grouping_12(&dictionary.Word_ends_11_to_12);
224        validate_block_length_grouping<12>(iter_length_grouping_12, block_base);
225    }
226
227    if ( bitblock_has_bit(dictionary.Word_ends_13_to_14) )
228    {
229        BitBlockForwardIterator iter_length_grouping_14(&dictionary.Word_ends_13_to_14);
230        validate_block_length_grouping<14>(iter_length_grouping_14, block_base);
231    }
232
233    if ( bitblock_has_bit(dictionary.Word_ends_15_to_16) )
234    {
235        BitBlockForwardIterator iter_length_grouping_16(&dictionary.Word_ends_15_to_16);
236        validate_block_length_grouping<16>(iter_length_grouping_16, block_base);
237    }
238
239    if ( bitblock_has_bit(dictionary.Word_remaining_ends) )
240    {
241        BitBlockForwardIterator iter_length_grouping_remaining(&dictionary.Word_remaining_ends);
242        validate_block_length_grouping<17>(iter_length_grouping_remaining, block_base);
243    }
244
245    //copy current hash value data as previous one.
246    memmove (&hashvalues[0], &hashvalues[1], 16);
247}
248
249template<bool allow_performance_check> void do_process(FILE *infile, FILE *outfile) {
250
251@decl
252
253  int buf_pos = 0;
254  int block_pos = 0;
255  int errpos = 0;
256  int chars_avail = 0;
257  int check_pos = 0;
258  int chars_read = 0;
259  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(SIMD_type)];
260
261  char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
262  buffer_base = buf_pos;
263  source = srcbuf;
264
265  chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
266  chars_avail = chars_read;
267  if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
268
269  Entity_Info * e = new Entity_Info;
270  e->AnalyzeSignature((unsigned char *)srcbuf);
271
272  if (e->content_start != 0) {
273        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
274        buf_pos = e->content_start;
275        buffer_base = buf_pos;
276        if (chars_avail == BUFFER_SIZE) {
277                chars_read = chars_read - e->content_start +
278                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
279                chars_avail = chars_read;
280                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
281        }
282        else {
283          chars_read -=e->content_start;
284          chars_avail -=e->content_start;
285        }
286  }
287
288@stream_stmts
289
290/* Full Buffers */
291
292    while (chars_avail == BUFFER_SIZE) {
293      if (allow_performance_check)
294      {
295        PERF_SEC_START(parser_timer);
296      }
297
298      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
299          block_base = blk*BLOCK_SIZE;
300          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
301          @block_stmts
302          postprocess_do_block(dictionary, hash_data);
303      }
304
305      if (allow_performance_check)
306      {
307#ifndef CACHE_PERF
308         PERF_SEC_END(parser_timer, chars_avail);
309#else
310         PERF_SEC_END(parser_timer, 1);
311#endif
312      }
313      int bytes_left = chars_read - chars_avail;
314      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
315      chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
316      chars_avail = chars_read;
317      if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
318      buf_pos += chars_avail;
319      buffer_base = buf_pos;
320    }
321/* Final Partial Buffer */
322    if (allow_performance_check)
323    {
324        PERF_SEC_START(parser_timer);
325    }
326
327    block_pos = 0;
328    int remaining = chars_avail;
329/* Full Blocks */
330    while (remaining >= BLOCK_SIZE) {
331          block_base = block_pos;
332          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
333          @block_stmts
334          postprocess_do_block(dictionary, hash_data);
335          block_pos += BLOCK_SIZE;
336          remaining -= BLOCK_SIZE;
337    }
338    block_base = block_pos;
339    if (remaining > 0 || @any_carry) {
340          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
341          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
342          @final_block_stmts
343          postprocess_do_block(dictionary, hash_data);
344    }
345    buf_pos += chars_avail;
346    buffer_base = buf_pos;
347    if (allow_performance_check)
348    {
349#ifndef CACHE_PERF
350         PERF_SEC_END(parser_timer, chars_avail);
351#else
352         PERF_SEC_END(parser_timer, 1);
353#endif
354    }
355}
Note: See TracBrowser for help on using the repository browser.