source: proto/SymbolTable/symtab_pbgs_log_template.cpp @ 1721

Last change on this file since 1721 was 1721, checked in by vla24, 8 years ago

SymbolTable?: completed dictionary implementation and refactored templates

File size: 15.1 KB
Line 
1#define USE_MASK_COMPARE    //Comparison using masking technique.
2
3#include "../common_definitions.h"
4#include <pbgs_log_symbol_table.h>
5
6#include "../symtab_common_functions.h"
7#include "../xmlwf_common_functions.h"
8
9#ifdef BUFFER_PROFILING
10        BOM_Table * parser_timer;
11
12#elif CODE_CLOCKER
13        #define NUM_EVENTS 1
14        int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
15        //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
16        //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
17        int cal_size = 20;
18        CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
19#else
20        void * parser_timer;
21#endif
22
23int block_base=0;
24int buffer_base=0;
25int buffer_last;
26
27TagMatcher matcher;
28
29BitBlock elem_starts;
30int previous_block_last_elem_start;
31BytePack hashvalues[2];
32
33vector <int> gids;
34PBGSLogSymbolTable pbgs_symbol_table;
35
36static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail);
37
38void do_process(FILE *infile, FILE *outfile);
39
40template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
41
42template <int L> static inline int ElemEnd_grouping(int pos, int length);
43
44int main(int argc, char * argv[]) {
45    char * infilename, * outfilename;
46    FILE *infile, *outfile;
47
48    getFilenames(argc, argv, infilename, outfilename);
49    openInputOutputFiles(infilename, outfilename,
50                         infile, outfile);
51
52//      PERF_SEC_BIND(1);
53
54    PERF_SEC_INIT(parser_timer);
55
56    // store symbols form text to Symbol Table
57    do_process(infile, outfile);
58
59    PERF_SEC_DUMP(parser_timer);
60
61    PERF_SEC_DESTROY(parser_timer);
62
63    fclose(infile);
64    fclose(outfile);
65
66#if PRINT_SYMBOL_DISTRIBUTION
67//    print_GIDS();
68    pbgs_symbol_table.Print_Symbol_Table_Distribution();
69#endif
70    return 0;
71}
72
73template <int L>
74static inline int ElemEnd_grouping(int pos, int length) {
75    return 0;
76}
77
78// length = 1
79template <>
80inline int ElemEnd_grouping<1>(int pos, int length) {
81    int start = block_base + pos - length;
82    int hashvalue = compute_hash_value(length, start - block_base, hashvalues);
83    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_1(source + start, hashvalue);
84    gids.push_back(gid);
85#if DEBUG
86    int L = length;
87    int end = start - L;
88    char* symbol = new char[L+1];
89    strncpy ( symbol, source + start, L );
90    symbol[L] ='\0';
91    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
92    delete symbol; symbol = 0;
93#endif
94    return 0;
95}
96
97// length = 2
98template <>
99inline int ElemEnd_grouping<2>(int pos, int length) {
100    int start = block_base + pos - length;
101    int hashvalue = compute_hash_value(length, start - block_base, hashvalues);
102    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_2(source + start, hashvalue);
103    gids.push_back(gid);
104#if DEBUG
105    int L = 2;
106    int end = start - L;
107    char* symbol = new char[L+1];
108    strncpy ( symbol, source + start, L );
109    symbol[L] ='\0';
110    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
111    delete symbol; symbol = 0;
112#endif
113    return 0;
114}
115
116
117// length in [3,4]
118template <>
119inline int ElemEnd_grouping<4>(int pos, int L) {
120    int start = pos + block_base;
121    int hashvalue = compute_hash_value(L, pos, hashvalues);
122    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_4(source + start, hashvalue, L);
123    gids.push_back(gid);
124#if DEBUG
125    int end = start + L;
126    char* symbol = new char[L+1];
127    strncpy ( symbol, source + start, L );
128    symbol[L] ='\0';
129    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
130    delete symbol; symbol = 0;
131#endif
132    return 0;
133}
134
135// length in [5,8]
136template <>
137inline int ElemEnd_grouping<8>(int pos,  int L) {
138    int start = pos + block_base;
139    int hashvalue = compute_hash_value(L, pos, hashvalues);
140    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_8(source + start, hashvalue, L);
141    gids.push_back(gid);
142#if DEBUG
143    int end = start + L;
144    char* symbol = new char[L+1];
145    strncpy ( symbol, source + start, L );
146    symbol[L] ='\0';
147    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
148    delete symbol; symbol = 0;
149#endif
150    return 0;
151}
152
153// length in [9,16]
154template <>
155inline int ElemEnd_grouping<16>(int pos, int L) {
156    int start = pos + block_base;
157    int hashvalue = compute_hash_value(L, pos, hashvalues);
158    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_16(source + start, hashvalue, L);
159    gids.push_back(gid);
160#if DEBUG
161    int end = start + L;
162    char* symbol = new char[L+1];
163    strncpy ( symbol, source + start, L );
164    symbol[L] ='\0';
165    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
166    delete symbol; symbol = 0;
167#endif
168    return 0;
169}
170
171// length > 16
172template <>
173inline int ElemEnd_grouping<17>(int pos, int lgth) {
174    int start = pos + block_base;
175    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
176    int gid = 0;
177
178//    if (lgth < 32)
179//    {
180//      gid = pbgs_symbol_table.Lookup_or_Insert_Name_32(source + start, hashvalue, lgth);
181//    }
182//    else
183    {
184        gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
185    }
186    gids.push_back(gid);
187#if DEBUG
188    char* symbol = new char[lgth+1];
189    strncpy ( symbol, source + start, lgth );
190    symbol[lgth] ='\0';
191    printf ("%s | start: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, lgth, hashvalue, gid, symbol);
192#endif
193    return 0;
194}
195
196// L = 4, pass in bitstream for symbols length [3,4]
197// L = 8, pass in bitstream for symbols length [5,8]
198// L = 16, pass in bitstream for symbols length [9,16]
199// L = 17, pass in bitstream for symbols length longer than 16
200template <int L>
201static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base) {
202
203    BitBlockForwardIterator end;
204    int end_pos, start_pos, length;
205
206    while(start != end) {
207        end_pos = /*block_base + */*start;
208        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
209        length = end_pos - start_pos;
210        ElemEnd_grouping<L>(start_pos, length);
211        start++;
212    }
213}
214
215// pass in bitstream for symbols length 1
216template <>
217inline void validate_block_length_grouping<1>(BitBlockForwardIterator & start, int block_base) {
218
219    BitBlockForwardIterator end;
220    int end_pos, start_pos, length;
221
222    while(start != end) {
223        end_pos = /*block_base + */*start;
224        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
225        length = end_pos - start_pos;
226        ElemEnd_grouping<1>(end_pos, 1);
227        start++;
228    }
229}
230
231// pass in bitstream for symbols length 2
232template <>
233inline void validate_block_length_grouping<2>(BitBlockForwardIterator & start, int block_base) {
234
235    BitBlockForwardIterator end;
236    int end_pos, start_pos, length;
237
238    while(start != end) {
239        end_pos = /*block_base + */*start;
240        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
241        length = end_pos - start_pos;
242        ElemEnd_grouping<2>(end_pos, 2);
243        start++;
244    }
245}
246
247static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail){
248
249    tracker.StoreNewlines(lex.LF);
250
251    elem_starts = tag_Callouts.ElemName_starts;
252    hashvalues[1] = hash_data.Hash_value;
253
254    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_1) )
255    {
256        BitBlockForwardIterator iter_length_grouping_1(&tag_Callouts.ElemName_ends_1);
257        validate_block_length_grouping<1>(iter_length_grouping_1, block_base);
258    }
259
260    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_2) )
261    {
262        BitBlockForwardIterator iter_length_grouping_2(&tag_Callouts.ElemName_ends_2);
263        validate_block_length_grouping<2>(iter_length_grouping_2, block_base);
264    }
265
266    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_3_to_4) )
267    {
268        BitBlockForwardIterator iter_length_grouping_4(&tag_Callouts.ElemName_ends_3_to_4);
269        validate_block_length_grouping<4>(iter_length_grouping_4, block_base);
270    }
271
272    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_5_to_8) )
273    {
274        BitBlockForwardIterator iter_length_grouping_8(&tag_Callouts.ElemName_ends_5_to_8);
275        validate_block_length_grouping<8>(iter_length_grouping_8, block_base);
276    }
277
278    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_9_to_16) )
279    {
280        BitBlockForwardIterator iter_length_grouping_16(&tag_Callouts.ElemName_ends_9_to_16);
281        validate_block_length_grouping<16>(iter_length_grouping_16, block_base);
282    }
283
284    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_17_and_longer) )
285    {
286        BitBlockForwardIterator iter_length_grouping_remaining(&tag_Callouts.ElemName_ends_17_and_longer);
287        validate_block_length_grouping<17>(iter_length_grouping_remaining, block_base);
288    }
289
290    // Store the last starting position in case we hit boundary case
291    previous_block_last_elem_start = - count_reverse_zeroes (elem_starts);
292
293    //copy current hash value data as previous one.
294    memmove (&hashvalues[0], &hashvalues[1], 16);
295
296    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
297        BitBlockForwardIterator iter_NameStrt_check(&check_streams.non_ascii_name_starts);
298        validate_block(iter_NameStrt_check, block_base, NameStrt_check);
299        BitBlockForwardIterator iter_Name_check(&check_streams.non_ascii_names);
300        validate_block(iter_Name_check, block_base, Name_check);
301    }
302    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
303        BitBlockForwardIterator iter_PI_name_starts(&(ctCDPI_Callouts.PI_name_starts));
304        validate_block(iter_PI_name_starts, block_base, buffer_base, PIName_check);
305    }
306    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
307        BitBlockForwardIterator iter_CD_check(&ctCDPI_Callouts.CD_starts);
308        validate_block(iter_CD_check, block_base, CD_check);
309    }
310    if(bitblock_has_bit(ref_Callouts.GenRef_starts)){
311        BitBlockForwardIterator iter_GenRef_check(&ref_Callouts.GenRef_starts);
312        validate_block(iter_GenRef_check, block_base, GenRef_check);
313    }
314    if(bitblock_has_bit(ref_Callouts.DecRef_starts)){
315        BitBlockForwardIterator iter_DecRef_check(&ref_Callouts.DecRef_starts);
316        validate_block(iter_DecRef_check, block_base, DecRef_check);
317    }
318    if(bitblock_has_bit(ref_Callouts.HexRef_starts)){
319        BitBlockForwardIterator iter_HexRef_check(&ref_Callouts.HexRef_starts);
320        validate_block(iter_HexRef_check, block_base, HexRef_check);
321    }
322    if(bitblock_has_bit(check_streams.att_refs)){
323        BitBlockForwardIterator iter_AttRef_check(&check_streams.att_refs);
324        validate_block(iter_AttRef_check, block_base, AttRef_check);
325    }
326
327    if(error_tracker.Has_Noted_Error()){
328        int error_line, error_column;
329        tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
330        ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
331        exit(-1);
332    }
333
334    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
335    tracker.AdvanceBlock();
336
337}
338
339void do_process(FILE *infile, FILE *outfile) {
340
341@decl
342  int buf_pos = 0;
343  int block_pos = 0;
344  int errpos = 0;
345  int chars_avail = 0;
346  int check_pos = 0;
347  int chars_read = 0;
348  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(SIMD_type)];
349
350  char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
351  buffer_base = buf_pos;
352  source = srcbuf;
353  chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
354  chars_avail = chars_read;
355  if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
356
357  matcher.setSrc(srcbuf);
358
359  if(chars_read<4){
360    fprintf(stderr,"File is too short. Not well formed.\n");
361    exit(-1);
362  }
363
364  Entity_Info * e = new Entity_Info;
365  e->AnalyzeSignature((unsigned char *)srcbuf);
366
367  if (e->code_unit_base == ASCII) {
368
369    XML_Decl_Parser<ASCII> decl_parser((unsigned char *)srcbuf);
370
371    decl_parser.ReadXMLInfo(*e);
372
373    if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
374        fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
375        exit(-1);
376    }
377  }
378  else {
379    fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
380        exit(-1);
381  }
382
383  if (e->content_start != 0) {
384        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
385        buf_pos = e->content_start;
386        buffer_base = buf_pos;
387        if (chars_avail == BUFFER_SIZE) {
388                chars_read = chars_read - e->content_start +
389                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
390                chars_avail = chars_read;
391                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
392        }
393        else {
394          chars_read -=e->content_start;
395          chars_avail -=e->content_start;
396        }
397  }
398
399@stream_stmts
400
401/* Full Buffers */
402    while (chars_avail == BUFFER_SIZE) {
403      PERF_SEC_START(parser_timer);
404      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
405          block_base = blk*BLOCK_SIZE;
406          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
407          @block_stmts
408          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
409      }
410      matcher.StreamScan(chars_avail);
411      matcher.Advance_buffer();
412      tracker.Advance_buffer();
413      PERF_SEC_END(parser_timer, chars_avail);
414
415      int bytes_left = chars_read - chars_avail;
416      //memmove(srcbuf, &srcbuf[BUFFER_SIZE], bytes_left);
417      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
418      chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
419
420      chars_avail = chars_read;
421      if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
422      buf_pos += chars_avail;
423      buffer_base = buf_pos;
424
425  }
426/* Final Partial Buffer */
427    PERF_SEC_START(parser_timer);
428
429    block_pos = 0;
430    int remaining = chars_avail;
431/* Full Blocks */
432    while (remaining >= BLOCK_SIZE) {
433          block_base = block_pos;
434          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
435          @block_stmts
436          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
437          block_pos += BLOCK_SIZE;
438          remaining -= BLOCK_SIZE;
439    }
440    block_base = block_pos;
441    if (remaining > 0 || @any_carry) {
442          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
443          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
444          @final_block_stmts
445          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
446    }
447    buf_pos += chars_avail;
448    buffer_base = buf_pos;
449
450    matcher.StreamScan(chars_avail);
451    matcher.Advance_buffer();
452    tracker.Advance_buffer();
453
454    PERF_SEC_END(parser_timer, chars_avail);
455    if (matcher.depth != 0) {
456      fprintf(stderr, "tag matching error (depth %i) at position %i\n", matcher.depth, buffer_base);
457      exit(-1);
458    }
459
460}
Note: See TracBrowser for help on using the repository browser.