source: proto/SymbolTable/symtab_pbgs_div_template.cpp @ 4740

Last change on this file since 4740 was 1793, checked in by vla24, 8 years ago

Added some text files for wdc. updated performance test.

File size: 13.6 KB
Line 
1#include "../common_definitions.h"
2#include <pbgs_div_symbol_table.h>
3
4#include "../symtab_common_functions.h"
5#include "../xmlwf_common_functions.h"
6
7#ifdef BUFFER_PROFILING
8        BOM_Table * parser_timer;
9
10#elif CODE_CLOCKER
11        #define NUM_EVENTS 1
12        //      int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
13#ifdef L2_PERF
14        int Events[NUM_EVENTS] = {PAPI_L2_DCM};
15#elif L1_PERF
16        int Events[NUM_EVENTS] = {PAPI_L1_DCM};
17#else
18        int Events[NUM_EVENTS] = {PAPI_BR_MSP};
19#endif
20        int cal_size = 20;
21        CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
22#else
23        void * parser_timer;
24#endif
25
26
27int block_base=0;
28int buffer_base=0;
29int buffer_last;
30
31TagMatcher matcher;
32
33BitBlock elem_ends;
34int last_elem_start;
35bool block_boundary_case = false;
36BytePack hashvalues[2];
37
38vector <int> gids;
39PBGSDivSymbolTable pbgs_symbol_table;
40
41static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail);
42
43void do_process(FILE *infile, FILE *outfile);
44
45template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
46
47static inline int ElemStart_grouping(int start_pos, int lgth); // lgth > 16
48template <int L> static inline int ElemEnd_grouping(int pos, int length);
49
50int main(int argc, char * argv[]) {
51    char * infilename, * outfilename;
52    FILE *infile, *outfile;
53
54    getFilenames(argc, argv, infilename, outfilename);
55    openInputOutputFiles(infilename, outfilename,
56                         infile, outfile);
57
58//      PERF_SEC_BIND(1);
59
60    PERF_SEC_INIT(parser_timer);
61
62    // store symbols form text to Symbol Table
63    do_process(infile, outfile);
64
65    PERF_SEC_DUMP(parser_timer);
66
67    PERF_SEC_DESTROY(parser_timer);
68
69#if PRINT_SYMBOL_DISTRIBUTION
70//    print_GIDS();
71    pbgs_symbol_table.Print_Symbol_Table_Distribution();
72#endif
73
74    fclose(infile);
75    fclose(outfile);
76
77        return(0);
78}
79
80// length in [1,16]
81template <int L>
82static inline int ElemEnd_grouping(int end) {
83    int start = end - L;
84    int hashvalue = compute_hash_value(L, start - block_base, hashvalues);
85    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
86    gids.push_back(gid);
87#if DEBUG
88    char* symbol = new char[L+1];
89    strncpy ( symbol, source + start, L );
90    symbol[L] ='\0';
91    printf ("%s | start: %i[%i] | end: %i[%i] | lgth: %i | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, L, gid, hashvalue, symbol );
92    delete symbol; symbol = 0;
93#endif
94    return 0;
95}
96
97// length > 16
98static inline int ElemStart_grouping(int start, int lgth) {
99    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
100    int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
101    gids.push_back(gid);
102#if DEBUG
103    char* symbol = new char[lgth+1];
104    strncpy ( symbol, source + start, lgth );
105    symbol[lgth] ='\0';
106    printf ("%s | start: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, lgth, hashvalue, gid, symbol);
107#endif
108    return 0;
109}
110
111// L = 2, pass in bitstream for symbols length [1,2]
112// L = 4, pass in bitstream for symbols length [3,4]
113// L = 6, pass in bitstream for symbols length [5,6]
114// L = 8, pass in bitstream for symbols length [7,8]
115// L = 10, pass in bitstream for symbols length [9,10]
116// L = 12, pass in bitstream for symbols length [11,12]
117// L = 14, pass in bitstream for symbols length [13,14]
118// L = 16, pass in bitstream for symbols length [15,16]
119// L = 17, pass in bitstream for symbols length longer than 16
120template <int L>
121static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base) {
122
123    BitBlockForwardIterator end;
124    int end_pos;
125
126    while(start != end) {
127        end_pos = *start;
128        ElemEnd_grouping<L>(end_pos + block_base);
129        start++;
130    }
131}
132
133template <>
134inline void validate_block_length_grouping<17>(BitBlockForwardIterator & start, int block_base) {
135
136    BitBlockForwardIterator end;
137    int start_pos, end_pos;
138
139    while(start != end) {
140        start_pos = *start;
141        end_pos = ScanForwardPos (&elem_ends, start_pos);
142        if (end_pos)
143        {
144            ElemStart_grouping(start_pos - 16 + block_base, end_pos - start_pos + 16);
145        }
146        else
147        {
148#if DEBUG
149            printf ("There is no more 1 bit in the block. pos: %i | sym: %c%c[%c]\n", start_pos,
150                    source[start_pos + block_base-2], source[start_pos + block_base-1], source[start_pos + block_base]);
151#endif
152            //handle boundary case
153            block_boundary_case = true;
154            last_elem_start = start_pos - 16 - BLOCK_SIZE;
155#if DEBUG
156            printf ("last_elem_start: %i\n", last_elem_start);
157#endif
158        }
159        start++;
160    }
161}
162
163static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail){
164
165    tracker.StoreNewlines(lex.LF);
166
167    elem_ends = tag_Callouts.ElemName_ends;
168    hashvalues[1] = hash_data.Hash_value;
169
170    // Check for block boundary case for length 16 and above
171    if (block_boundary_case)
172    {
173#if DEBUG
174        printf ("block boundary case! Special handle!\n");
175#endif
176        int lgth = count_forward_zeroes(elem_ends)-last_elem_start;
177        int start = block_base + last_elem_start;
178        int hashvalue = compute_hash_value(lgth, last_elem_start, hashvalues);
179        int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
180        gids.push_back(gid);
181#if DEBUG
182        printf ("%s | start: %i[%i] | lgth: %i | hashvalue: %i | gid: %i \n", __FUNCTION__, start, start - block_base, lgth, hashvalue, gid);
183#endif
184        block_boundary_case = false;
185    }
186
187    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_1_to_2) )
188    {
189        BitBlockForwardIterator iter_length_grouping_2(&tag_Callouts.ElemName_ends_1_to_2);
190        validate_block_length_grouping<2>(iter_length_grouping_2, block_base);
191    }
192
193    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_3_to_4) )
194    {
195        BitBlockForwardIterator iter_length_grouping_4(&tag_Callouts.ElemName_ends_3_to_4);
196        validate_block_length_grouping<4>(iter_length_grouping_4, block_base);
197    }
198
199    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_5_to_6) )
200    {
201        BitBlockForwardIterator iter_length_grouping_6(&tag_Callouts.ElemName_ends_5_to_6);
202        validate_block_length_grouping<6>(iter_length_grouping_6, block_base);
203    }
204
205    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_7_to_8) )
206    {
207        BitBlockForwardIterator iter_length_grouping_8(&tag_Callouts.ElemName_ends_7_to_8);
208        validate_block_length_grouping<8>(iter_length_grouping_8, block_base);
209    }
210
211    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_9_to_10) )
212    {
213        BitBlockForwardIterator iter_length_grouping_10(&tag_Callouts.ElemName_ends_9_to_10);
214        validate_block_length_grouping<10>(iter_length_grouping_10, block_base);
215    }
216
217    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_11_to_12) )
218    {
219        BitBlockForwardIterator iter_length_grouping_12(&tag_Callouts.ElemName_ends_11_to_12);
220        validate_block_length_grouping<12>(iter_length_grouping_12, block_base);
221    }
222
223    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_13_to_14) )
224    {
225        BitBlockForwardIterator iter_length_grouping_14(&tag_Callouts.ElemName_ends_13_to_14);
226        validate_block_length_grouping<14>(iter_length_grouping_14, block_base);
227    }
228
229    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_15_to_16) )
230    {
231        BitBlockForwardIterator iter_length_grouping_16(&tag_Callouts.ElemName_ends_15_to_16);
232        validate_block_length_grouping<16>(iter_length_grouping_16, block_base);
233    }
234
235    if ( bitblock_has_bit(tag_Callouts.ElemName_remaining_ends) )
236    {
237        BitBlockForwardIterator iter_length_grouping_remaining(&tag_Callouts.ElemName_remaining_ends);
238        validate_block_length_grouping<17>(iter_length_grouping_remaining, block_base);
239    }
240
241    //copy current hash value data as previous one.
242    memmove (&hashvalues[0], &hashvalues[1], 16);
243
244    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
245        BitBlockForwardIterator iter_NameStrt_check(&check_streams.non_ascii_name_starts);
246        validate_block(iter_NameStrt_check, block_base, NameStrt_check);
247        BitBlockForwardIterator iter_Name_check(&check_streams.non_ascii_names);
248        validate_block(iter_Name_check, block_base, Name_check);
249    }
250    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
251        BitBlockForwardIterator iter_PI_name_starts(&(ctCDPI_Callouts.PI_name_starts));
252        validate_block(iter_PI_name_starts, block_base, buffer_base, PIName_check);
253    }
254    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
255        BitBlockForwardIterator iter_CD_check(&ctCDPI_Callouts.CD_starts);
256        validate_block(iter_CD_check, block_base, CD_check);
257    }
258    if(bitblock_has_bit(ref_Callouts.GenRef_starts)){
259        BitBlockForwardIterator iter_GenRef_check(&ref_Callouts.GenRef_starts);
260        validate_block(iter_GenRef_check, block_base, GenRef_check);
261    }
262    if(bitblock_has_bit(ref_Callouts.DecRef_starts)){
263        BitBlockForwardIterator iter_DecRef_check(&ref_Callouts.DecRef_starts);
264        validate_block(iter_DecRef_check, block_base, DecRef_check);
265    }
266    if(bitblock_has_bit(ref_Callouts.HexRef_starts)){
267        BitBlockForwardIterator iter_HexRef_check(&ref_Callouts.HexRef_starts);
268        validate_block(iter_HexRef_check, block_base, HexRef_check);
269    }
270    if(bitblock_has_bit(check_streams.att_refs)){
271        BitBlockForwardIterator iter_AttRef_check(&check_streams.att_refs);
272        validate_block(iter_AttRef_check, block_base, AttRef_check);
273    }
274
275    if(error_tracker.Has_Noted_Error()){
276        int error_line, error_column;
277        tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
278        ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
279        exit(-1);
280    }
281
282    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
283    tracker.AdvanceBlock();
284}
285
286void do_process(FILE *infile, FILE *outfile) {
287
288@decl
289  int buf_pos = 0;
290  int block_pos = 0;
291  int errpos = 0;
292  int chars_avail = 0;
293  int check_pos = 0;
294  int chars_read = 0;
295  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(SIMD_type)];
296
297  char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
298  buffer_base = buf_pos;
299  source = srcbuf;
300  chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
301  chars_avail = chars_read;
302  if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
303
304  matcher.setSrc(srcbuf);
305
306  if(chars_read<4){
307    fprintf(stderr,"File is too short. Not well formed.\n");
308    exit(-1);
309  }
310
311  Entity_Info * e = new Entity_Info;
312  e->AnalyzeSignature((unsigned char *)srcbuf);
313
314  if (e->code_unit_base == ASCII) {
315
316    XML_Decl_Parser<ASCII> decl_parser((unsigned char *)srcbuf);
317
318    decl_parser.ReadXMLInfo(*e);
319
320    if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
321        fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
322        exit(-1);
323    }
324  }
325  else {
326    fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
327        exit(-1);
328  }
329
330  if (e->content_start != 0) {
331        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
332        buf_pos = e->content_start;
333        buffer_base = buf_pos;
334        if (chars_avail == BUFFER_SIZE) {
335                chars_read = chars_read - e->content_start +
336                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
337                chars_avail = chars_read;
338                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
339        }
340        else {
341          chars_read -=e->content_start;
342          chars_avail -=e->content_start;
343        }
344  }
345
346@stream_stmts
347
348/* Full Buffers */
349    while (chars_avail == BUFFER_SIZE) {
350      PERF_SEC_START(parser_timer);
351      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
352          block_base = blk*BLOCK_SIZE;
353          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
354          @block_stmts
355          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
356      }
357      matcher.StreamScan(chars_avail);
358      matcher.Advance_buffer();
359      tracker.Advance_buffer();
360#ifndef CACHE_PERF
361    PERF_SEC_END(parser_timer, chars_avail);
362#else
363    PERF_SEC_END(parser_timer, 1);
364#endif
365
366      int bytes_left = chars_read - chars_avail;
367      //memmove(srcbuf, &srcbuf[BUFFER_SIZE], bytes_left);
368      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
369      chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
370
371      chars_avail = chars_read;
372      if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
373      buf_pos += chars_avail;
374      buffer_base = buf_pos;
375
376  }
377/* Final Partial Buffer */
378    PERF_SEC_START(parser_timer);
379
380    block_pos = 0;
381    int remaining = chars_avail;
382/* Full Blocks */
383    while (remaining >= BLOCK_SIZE) {
384          block_base = block_pos;
385          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
386          @block_stmts
387          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
388          block_pos += BLOCK_SIZE;
389          remaining -= BLOCK_SIZE;
390    }
391    block_base = block_pos;
392    if (remaining > 0 || @any_carry) {
393          EOF_mask = bitblock::srl(simd<1>::constant<1>(),convert(BLOCK_SIZE-remaining));
394          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
395          @final_block_stmts
396          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
397    }
398    buf_pos += chars_avail;
399    buffer_base = buf_pos;
400
401    matcher.StreamScan(chars_avail);
402    matcher.Advance_buffer();
403    tracker.Advance_buffer();
404#ifndef CACHE_PERF
405    PERF_SEC_END(parser_timer, chars_avail);
406#else
407    PERF_SEC_END(parser_timer, 1);
408#endif
409    if (matcher.depth != 0) {
410      fprintf(stderr, "tag matching error (depth %i) at position %i\n", matcher.depth, buffer_base);
411      exit(-1);
412    }
413}
Note: See TracBrowser for help on using the repository browser.