source: proto/SymbolTable/symtab_pbgs_log_template.cpp @ 1787

Last change on this file since 1787 was 1786, checked in by vla24, 8 years ago

Added some code to support performance tests using PAPI

File size: 15.3 KB
Line 
1#define USE_MASK_COMPARE    //Comparison using masking technique.
2
3#include "../common_definitions.h"
4#include <pbgs_log_symbol_table.h>
5
6#include "../symtab_common_functions.h"
7#include "../xmlwf_common_functions.h"
8
9#ifdef BUFFER_PROFILING
10        BOM_Table * parser_timer;
11
12#elif CODE_CLOCKER
13        #define NUM_EVENTS 1
14//      int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
15//      int Events[NUM_EVENTS] = {PAPI_L2_DCM};
16//      int Events[NUM_EVENTS] = {PAPI_L1_DCM};
17        int Events[NUM_EVENTS] = {PAPI_BR_MSP};
18        int cal_size = 20;
19        CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
20#else
21        void * parser_timer;
22#endif
23
24int block_base=0;
25int buffer_base=0;
26int buffer_last;
27
28TagMatcher matcher;
29
30BitBlock elem_starts;
31int previous_block_last_elem_start;
32BytePack hashvalues[2];
33
34vector <int> gids;
35PBGSLogSymbolTable pbgs_symbol_table;
36
37static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail);
38
39void do_process(FILE *infile, FILE *outfile);
40
41template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
42
43template <int L> static inline int ElemEnd_grouping(int pos, int length);
44
45int main(int argc, char * argv[]) {
46    char * infilename, * outfilename;
47    FILE *infile, *outfile;
48
49    getFilenames(argc, argv, infilename, outfilename);
50    openInputOutputFiles(infilename, outfilename,
51                         infile, outfile);
52
53//      PERF_SEC_BIND(1);
54
55    PERF_SEC_INIT(parser_timer);
56
57    // store symbols form text to Symbol Table
58    do_process(infile, outfile);
59
60    PERF_SEC_DUMP(parser_timer);
61
62    PERF_SEC_DESTROY(parser_timer);
63
64    fclose(infile);
65    fclose(outfile);
66
67#if PRINT_SYMBOL_DISTRIBUTION
68//    print_GIDS();
69    pbgs_symbol_table.Print_Symbol_Table_Distribution();
70#endif
71    return 0;
72}
73
74template <int L>
75static inline int ElemEnd_grouping(int pos, int length) {
76    return 0;
77}
78
79// length = 1
80template <>
81inline int ElemEnd_grouping<1>(int pos, int length) {
82    int start = block_base + pos - length;
83    int hashvalue = compute_hash_value(length, start - block_base, hashvalues);
84    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_1(source + start, hashvalue);
85    gids.push_back(gid);
86#if DEBUG
87    int L = length;
88    int end = start - L;
89    char* symbol = new char[L+1];
90    strncpy ( symbol, source + start, L );
91    symbol[L] ='\0';
92    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
93    delete symbol; symbol = 0;
94#endif
95    return 0;
96}
97
98// length = 2
99template <>
100inline int ElemEnd_grouping<2>(int pos, int length) {
101    int start = block_base + pos - length;
102    int hashvalue = compute_hash_value(length, start - block_base, hashvalues);
103    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_2(source + start, hashvalue);
104    gids.push_back(gid);
105#if DEBUG
106    int L = 2;
107    int end = start - L;
108    char* symbol = new char[L+1];
109    strncpy ( symbol, source + start, L );
110    symbol[L] ='\0';
111    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
112    delete symbol; symbol = 0;
113#endif
114    return 0;
115}
116
117
118// length in [3,4]
119template <>
120inline int ElemEnd_grouping<4>(int pos, int L) {
121    int start = pos + block_base;
122    int hashvalue = compute_hash_value(L, pos, hashvalues);
123    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_4(source + start, hashvalue, L);
124    gids.push_back(gid);
125#if DEBUG
126    int end = start + L;
127    char* symbol = new char[L+1];
128    strncpy ( symbol, source + start, L );
129    symbol[L] ='\0';
130    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
131    delete symbol; symbol = 0;
132#endif
133    return 0;
134}
135
136// length in [5,8]
137template <>
138inline int ElemEnd_grouping<8>(int pos,  int L) {
139    int start = pos + block_base;
140    int hashvalue = compute_hash_value(L, pos, hashvalues);
141    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_8(source + start, hashvalue, L);
142    gids.push_back(gid);
143#if DEBUG
144    int end = start + L;
145    char* symbol = new char[L+1];
146    strncpy ( symbol, source + start, L );
147    symbol[L] ='\0';
148    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
149    delete symbol; symbol = 0;
150#endif
151    return 0;
152}
153
154// length in [9,16]
155template <>
156inline int ElemEnd_grouping<16>(int pos, int L) {
157    int start = pos + block_base;
158    int hashvalue = compute_hash_value(L, pos, hashvalues);
159    int gid = pbgs_symbol_table.Lookup_or_Insert_Name_16(source + start, hashvalue, L);
160    gids.push_back(gid);
161#if DEBUG
162    int end = start + L;
163    char* symbol = new char[L+1];
164    strncpy ( symbol, source + start, L );
165    symbol[L] ='\0';
166    printf ("%s | start: %i[%i] | end: %i[%i] | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, gid, hashvalue, symbol );
167    delete symbol; symbol = 0;
168#endif
169    return 0;
170}
171
172// length > 16
173template <>
174inline int ElemEnd_grouping<17>(int pos, int lgth) {
175    int start = pos + block_base;
176    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
177    int gid = 0;
178
179//    if (lgth < 32)
180//    {
181//      gid = pbgs_symbol_table.Lookup_or_Insert_Name_32(source + start, hashvalue, lgth);
182//    }
183//    else
184    {
185        gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
186    }
187    gids.push_back(gid);
188#if DEBUG
189    char* symbol = new char[lgth+1];
190    strncpy ( symbol, source + start, lgth );
191    symbol[lgth] ='\0';
192    printf ("%s | start: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, lgth, hashvalue, gid, symbol);
193#endif
194    return 0;
195}
196
197// L = 4, pass in bitstream for symbols length [3,4]
198// L = 8, pass in bitstream for symbols length [5,8]
199// L = 16, pass in bitstream for symbols length [9,16]
200// L = 17, pass in bitstream for symbols length longer than 16
201template <int L>
202static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base) {
203
204    BitBlockForwardIterator end;
205    int end_pos, start_pos, length;
206
207    while(start != end) {
208        end_pos = /*block_base + */*start;
209        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
210        length = end_pos - start_pos;
211        ElemEnd_grouping<L>(start_pos, length);
212        start++;
213    }
214}
215
216// pass in bitstream for symbols length 1
217template <>
218inline void validate_block_length_grouping<1>(BitBlockForwardIterator & start, int block_base) {
219
220    BitBlockForwardIterator end;
221    int end_pos, start_pos, length;
222
223    while(start != end) {
224        end_pos = /*block_base + */*start;
225        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
226        length = end_pos - start_pos;
227        ElemEnd_grouping<1>(end_pos, 1);
228        start++;
229    }
230}
231
232// pass in bitstream for symbols length 2
233template <>
234inline void validate_block_length_grouping<2>(BitBlockForwardIterator & start, int block_base) {
235
236    BitBlockForwardIterator end;
237    int end_pos, start_pos, length;
238
239    while(start != end) {
240        end_pos = /*block_base + */*start;
241        start_pos = ScanBackwardPos (&elem_starts, end_pos, previous_block_last_elem_start);
242        length = end_pos - start_pos;
243        ElemEnd_grouping<2>(end_pos, 2);
244        start++;
245    }
246}
247
248static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail){
249
250    tracker.StoreNewlines(lex.LF);
251
252    elem_starts = tag_Callouts.ElemName_starts;
253    hashvalues[1] = hash_data.Hash_value;
254
255    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_1) )
256    {
257        BitBlockForwardIterator iter_length_grouping_1(&tag_Callouts.ElemName_ends_1);
258        validate_block_length_grouping<1>(iter_length_grouping_1, block_base);
259    }
260
261    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_2) )
262    {
263        BitBlockForwardIterator iter_length_grouping_2(&tag_Callouts.ElemName_ends_2);
264        validate_block_length_grouping<2>(iter_length_grouping_2, block_base);
265    }
266
267    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_3_to_4) )
268    {
269        BitBlockForwardIterator iter_length_grouping_4(&tag_Callouts.ElemName_ends_3_to_4);
270        validate_block_length_grouping<4>(iter_length_grouping_4, block_base);
271    }
272
273    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_5_to_8) )
274    {
275        BitBlockForwardIterator iter_length_grouping_8(&tag_Callouts.ElemName_ends_5_to_8);
276        validate_block_length_grouping<8>(iter_length_grouping_8, block_base);
277    }
278
279    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_9_to_16) )
280    {
281        BitBlockForwardIterator iter_length_grouping_16(&tag_Callouts.ElemName_ends_9_to_16);
282        validate_block_length_grouping<16>(iter_length_grouping_16, block_base);
283    }
284
285    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_17_and_longer) )
286    {
287        BitBlockForwardIterator iter_length_grouping_remaining(&tag_Callouts.ElemName_ends_17_and_longer);
288        validate_block_length_grouping<17>(iter_length_grouping_remaining, block_base);
289    }
290
291    // Store the last starting position in case we hit boundary case
292    previous_block_last_elem_start = - count_reverse_zeroes (elem_starts);
293
294    //copy current hash value data as previous one.
295    memmove (&hashvalues[0], &hashvalues[1], 16);
296
297    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
298        BitBlockForwardIterator iter_NameStrt_check(&check_streams.non_ascii_name_starts);
299        validate_block(iter_NameStrt_check, block_base, NameStrt_check);
300        BitBlockForwardIterator iter_Name_check(&check_streams.non_ascii_names);
301        validate_block(iter_Name_check, block_base, Name_check);
302    }
303    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
304        BitBlockForwardIterator iter_PI_name_starts(&(ctCDPI_Callouts.PI_name_starts));
305        validate_block(iter_PI_name_starts, block_base, buffer_base, PIName_check);
306    }
307    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
308        BitBlockForwardIterator iter_CD_check(&ctCDPI_Callouts.CD_starts);
309        validate_block(iter_CD_check, block_base, CD_check);
310    }
311    if(bitblock_has_bit(ref_Callouts.GenRef_starts)){
312        BitBlockForwardIterator iter_GenRef_check(&ref_Callouts.GenRef_starts);
313        validate_block(iter_GenRef_check, block_base, GenRef_check);
314    }
315    if(bitblock_has_bit(ref_Callouts.DecRef_starts)){
316        BitBlockForwardIterator iter_DecRef_check(&ref_Callouts.DecRef_starts);
317        validate_block(iter_DecRef_check, block_base, DecRef_check);
318    }
319    if(bitblock_has_bit(ref_Callouts.HexRef_starts)){
320        BitBlockForwardIterator iter_HexRef_check(&ref_Callouts.HexRef_starts);
321        validate_block(iter_HexRef_check, block_base, HexRef_check);
322    }
323    if(bitblock_has_bit(check_streams.att_refs)){
324        BitBlockForwardIterator iter_AttRef_check(&check_streams.att_refs);
325        validate_block(iter_AttRef_check, block_base, AttRef_check);
326    }
327
328    if(error_tracker.Has_Noted_Error()){
329        int error_line, error_column;
330        tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
331        ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
332        exit(-1);
333    }
334
335    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
336    tracker.AdvanceBlock();
337
338}
339
340void do_process(FILE *infile, FILE *outfile) {
341
342@decl
343  int buf_pos = 0;
344  int block_pos = 0;
345  int errpos = 0;
346  int chars_avail = 0;
347  int check_pos = 0;
348  int chars_read = 0;
349  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(SIMD_type)];
350
351  char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
352  buffer_base = buf_pos;
353  source = srcbuf;
354  chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
355  chars_avail = chars_read;
356  if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
357
358  matcher.setSrc(srcbuf);
359
360  if(chars_read<4){
361    fprintf(stderr,"File is too short. Not well formed.\n");
362    exit(-1);
363  }
364
365  Entity_Info * e = new Entity_Info;
366  e->AnalyzeSignature((unsigned char *)srcbuf);
367
368  if (e->code_unit_base == ASCII) {
369
370    XML_Decl_Parser<ASCII> decl_parser((unsigned char *)srcbuf);
371
372    decl_parser.ReadXMLInfo(*e);
373
374    if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
375        fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
376        exit(-1);
377    }
378  }
379  else {
380    fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
381        exit(-1);
382  }
383
384  if (e->content_start != 0) {
385        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
386        buf_pos = e->content_start;
387        buffer_base = buf_pos;
388        if (chars_avail == BUFFER_SIZE) {
389                chars_read = chars_read - e->content_start +
390                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
391                chars_avail = chars_read;
392                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
393        }
394        else {
395          chars_read -=e->content_start;
396          chars_avail -=e->content_start;
397        }
398  }
399
400@stream_stmts
401
402/* Full Buffers */
403    while (chars_avail == BUFFER_SIZE) {
404      PERF_SEC_START(parser_timer);
405      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
406          block_base = blk*BLOCK_SIZE;
407          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
408          @block_stmts
409          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
410      }
411      matcher.StreamScan(chars_avail);
412      matcher.Advance_buffer();
413      tracker.Advance_buffer();
414#ifndef CACHE_PERF
415    PERF_SEC_END(parser_timer, chars_avail);
416#else
417    PERF_SEC_END(parser_timer, 1);
418#endif
419
420      int bytes_left = chars_read - chars_avail;
421      //memmove(srcbuf, &srcbuf[BUFFER_SIZE], bytes_left);
422      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
423      chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
424
425      chars_avail = chars_read;
426      if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
427      buf_pos += chars_avail;
428      buffer_base = buf_pos;
429
430  }
431/* Final Partial Buffer */
432    PERF_SEC_START(parser_timer);
433    block_pos = 0;
434    int remaining = chars_avail;
435/* Full Blocks */
436    while (remaining >= BLOCK_SIZE) {
437          block_base = block_pos;
438          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
439          @block_stmts
440          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
441          block_pos += BLOCK_SIZE;
442          remaining -= BLOCK_SIZE;
443    }
444    block_base = block_pos;
445    if (remaining > 0 || @any_carry) {
446          EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
447          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
448          @final_block_stmts
449          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
450    }
451    buf_pos += chars_avail;
452    buffer_base = buf_pos;
453
454    matcher.StreamScan(chars_avail);
455    matcher.Advance_buffer();
456    tracker.Advance_buffer();
457#ifndef CACHE_PERF
458    PERF_SEC_END(parser_timer, chars_avail);
459#else
460    PERF_SEC_END(parser_timer, 1);
461#endif
462    if (matcher.depth != 0) {
463      fprintf(stderr, "tag matching error (depth %i) at position %i\n", matcher.depth, buffer_base);
464      exit(-1);
465    }
466
467}
Note: See TracBrowser for help on using the repository browser.