source: proto/SymbolTable/wcd_ls_template.cpp @ 1795

Last change on this file since 1795 was 1795, checked in by vla24, 8 years ago

updated wcd template implementation for Ken's symbol table.

File size: 7.1 KB
Line 
1// WARNING: This implementation does not run, it will give the wrong result and crash.
2//#error "This implementation does not run, it will give the wrong result and/or crash."
3
4#define USE_LS_SYMBOL_TABLE
5
6#include "../common_definitions.h"
7#include <ls_symbol_table.h>
8
9#include "../wcd_common_functions.h"
10#include "../symtab_common_functions.h"
11#include "parser_common_functions_generated.h"
12
13#ifdef BUFFER_PROFILING
14        BOM_Table * parser_timer;
15
16#elif CODE_CLOCKER
17        #define NUM_EVENTS 1
18        int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
19        //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
20        //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
21        int cal_size = 20;
22        CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
23#else
24        void * parser_timer;
25#endif
26
27int block_base=0;
28int buffer_base=0;
29char * source;
30
31queue <size_t> elem_starts_buf;
32queue <size_t> elem_ends_buf;
33vector <int> gids;
34LSSymbolTable ls_symbol_table;
35
36static inline void postprocess_do_block(Dictionary& dictionary);
37
38template<bool allow_performance_check, bool finalize_gids> void do_process(FILE *infile, FILE *outfile);
39
40static inline void do_symbol_table_lookup();
41
42int main(int argc, char * argv[]) {
43    char * dictionaryfilename, * infilename, * outfilename;
44    FILE * dictionaryfile, *infile, *outfile;
45
46    getFilenames(argc, argv, dictionaryfilename, infilename, outfilename);
47    openInputOutputFiles(dictionaryfilename, infilename, outfilename,
48                         dictionaryfile, infile, outfile);
49    int greatest_GID_in_dictionary;
50//    populateDictionary(dictionaryfile, outfile, gids, greatest_GID_in_dictionary);
51
52//    ls_symbol_table.clear();
53//    rewind (dictionaryfile);
54//    do_process<true, false>(dictionaryfile, outfile);
55
56//      PERF_SEC_BIND(1);
57
58    PERF_SEC_INIT(parser_timer);
59
60    // store symbols form text to Symbol Table
61    do_process<true, true>(infile, outfile);
62
63    PERF_SEC_DUMP(parser_timer);
64
65    PERF_SEC_DESTROY(parser_timer);
66
67    //print_GIDS(ls_symbol_table);
68
69    // gather dictionary statistics
70//    int totalKnownWordsInDictionary, totalUnknownWordsInDictionary;
71//    wordCountInDictionary(greatest_GID_in_dictionary, gids, totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
72//    printWordCountInDictionary(totalUnknownWordsInDictionary, totalKnownWordsInDictionary);
73
74    fclose(dictionaryfile);
75    fclose(infile);
76    fclose(outfile);
77
78#if PRINT_SYMBOL_DISTRIBUTION
79        print_GIDS(ls_symbol_table);
80#endif
81
82        return(0);
83}
84
85static inline int ElemStrt_check(int pos) {
86        elem_starts_buf.push(buffer_base + pos);
87        return 0;
88}
89
90static inline int ElemEnd_check(int pos) {
91        elem_ends_buf.push(buffer_base + pos);
92        return 0;
93}
94
95static inline void do_symbol_table_lookup()
96{
97    while( !elem_starts_buf.empty() && !elem_ends_buf.empty() )
98    {
99        int start = elem_starts_buf.front();
100        int end = elem_ends_buf.front();
101        elem_starts_buf.pop();
102        elem_ends_buf.pop();
103        int length = end - start;
104
105        //lookup or insert to symbol table
106#if DEBUG
107        char* symbol = new char[length+1];
108        strncpy ( symbol, source + start - buffer_base, length );
109        symbol[length] ='\0';
110        printf ("start: %i[%i] | end: %i[%i] | length: %i | symbol: %s\n", start, start-buffer_base, end, end-buffer_base, length, symbol );
111
112        delete symbol; symbol = 0;
113#endif
114
115        ls_symbol_table.put((unsigned char*)source + start - buffer_base, length);
116    }
117}
118
119static inline void postprocess_do_block(Dictionary& dictionary){
120
121    if ( bitblock_has_bit(dictionary.Word_starts))
122    {
123        BitBlockForwardIterator iter_length_grouping_starts(&dictionary.Word_starts);
124        validate_block(iter_length_grouping_starts, block_base, ElemStrt_check);
125    }
126
127    if ( bitblock_has_bit(dictionary.Word_ends) )
128    {
129        BitBlockForwardIterator iter_length_grouping_ends(&dictionary.Word_ends);
130        validate_block(iter_length_grouping_ends, block_base, ElemEnd_check);
131    }
132
133    do_symbol_table_lookup();
134}
135
136template<bool allow_performance_check, bool finalize_gids> void do_process(FILE *infile, FILE *outfile) {
137
138@decl
139
140  int buf_pos = 0;
141  int block_pos = 0;
142  int errpos = 0;
143  int chars_avail = 0;
144  int check_pos = 0;
145  int chars_read = 0;
146  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(SIMD_type)];
147
148  char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
149  buffer_base = buf_pos;
150  source = srcbuf;
151
152  chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
153  chars_avail = chars_read;
154  if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
155
156  Entity_Info * e = new Entity_Info;
157  e->AnalyzeSignature((unsigned char *)srcbuf);
158
159  if (e->content_start != 0) {
160        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
161        buf_pos = e->content_start;
162        buffer_base = buf_pos;
163        if (chars_avail == BUFFER_SIZE) {
164                chars_read = chars_read - e->content_start +
165                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
166                chars_avail = chars_read;
167                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
168        }
169        else {
170          chars_read -=e->content_start;
171          chars_avail -=e->content_start;
172        }
173  }
174
175@stream_stmts
176
177/* Full Buffers */
178
179    while (chars_avail == BUFFER_SIZE) {
180      if (allow_performance_check)
181      {
182        PERF_SEC_START(parser_timer);
183      }
184      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
185          block_base = blk*BLOCK_SIZE;
186          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
187          @block_stmts
188          postprocess_do_block(dictionary);
189      }
190      if (allow_performance_check)
191      {
192        PERF_SEC_END(parser_timer, chars_avail);
193      }
194
195      int bytes_left = chars_read - chars_avail;
196      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
197      chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
198      chars_avail = chars_read;
199      if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
200      buf_pos += chars_avail;
201      buffer_base = buf_pos;
202    }
203/* Final Partial Buffer */
204    if (allow_performance_check)
205    {
206      PERF_SEC_START(parser_timer);
207    }
208
209    block_pos = 0;
210    int remaining = chars_avail;
211/* Full Blocks */
212    while (remaining >= BLOCK_SIZE) {
213          block_base = block_pos;
214          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
215          @block_stmts
216          postprocess_do_block(dictionary);
217          block_pos += BLOCK_SIZE;
218          remaining -= BLOCK_SIZE;
219    }
220    block_base = block_pos;
221    if (remaining > 0 || @any_carry) {
222          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
223          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
224          @final_block_stmts
225          postprocess_do_block(dictionary);
226    }
227    buf_pos += chars_avail;
228    buffer_base = buf_pos;
229
230    if (finalize_gids)
231    {
232        ls_symbol_table.bind();
233        ls_symbol_table.finalize();
234    }
235
236    if (allow_performance_check)
237    {
238      PERF_SEC_END(parser_timer, chars_avail);
239    }
240
241    // get gids
242    if (finalize_gids)
243    {
244        gids = ls_symbol_table.get_flattened_gids();
245    }
246}
Note: See TracBrowser for help on using the repository browser.