source: proto/SymbolTable/symtab_pbgs_div_template.cpp @ 1721

Last change on this file since 1721 was 1721, checked in by vla24, 8 years ago

SymbolTable?: completed dictionary implementation and refactored templates

File size: 13.4 KB
Line 
1#include "../common_definitions.h"
2#include <pbgs_div_symbol_table.h>
3
4#include "../symtab_common_functions.h"
5#include "../xmlwf_common_functions.h"
6
7#ifdef BUFFER_PROFILING
8        BOM_Table * parser_timer;
9
10#elif CODE_CLOCKER
11        #define NUM_EVENTS 1
12        int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
13        //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
14        //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
15        int cal_size = 20;
16        CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
17#else
18        void * parser_timer;
19#endif
20
21
22int block_base=0;
23int buffer_base=0;
24int buffer_last;
25
26TagMatcher matcher;
27
28BitBlock elem_ends;
29int last_elem_start;
30bool block_boundary_case = false;
31BytePack hashvalues[2];
32
33vector <int> gids;
34PBGSDivSymbolTable pbgs_symbol_table;
35
36static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail);
37
38void do_process(FILE *infile, FILE *outfile);
39
40template <int L> static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base);
41
42static inline int ElemStart_grouping(int start_pos, int lgth); // lgth > 16
43template <int L> static inline int ElemEnd_grouping(int pos, int length);
44
45int main(int argc, char * argv[]) {
46    char * infilename, * outfilename;
47    FILE *infile, *outfile;
48
49    getFilenames(argc, argv, infilename, outfilename);
50    openInputOutputFiles(infilename, outfilename,
51                         infile, outfile);
52
53//      PERF_SEC_BIND(1);
54
55    PERF_SEC_INIT(parser_timer);
56
57    // store symbols form text to Symbol Table
58    do_process(infile, outfile);
59
60    PERF_SEC_DUMP(parser_timer);
61
62    PERF_SEC_DESTROY(parser_timer);
63
64    fclose(infile);
65    fclose(outfile);
66
67#if PRINT_SYMBOL_DISTRIBUTION
68//    print_GIDS();
69    pbgs_symbol_table.Print_Symbol_Table_Distribution();
70#endif
71
72        return(0);
73}
74
75// length in [1,16]
76template <int L>
77static inline int ElemEnd_grouping(int end) {
78    int start = end - L;
79    int hashvalue = compute_hash_value(L, start - block_base, hashvalues);
80    int gid = pbgs_symbol_table.Lookup_or_Insert_Name<L>(source + start, hashvalue);
81    gids.push_back(gid);
82#if DEBUG
83    char* symbol = new char[L+1];
84    strncpy ( symbol, source + start, L );
85    symbol[L] ='\0';
86    printf ("%s | start: %i[%i] | end: %i[%i] | lgth: %i | gid: %i | hashvalue: %i | symbol: %s\n", __FUNCTION__, start, start-buffer_base, end, end-buffer_base, L, gid, hashvalue, symbol );
87    delete symbol; symbol = 0;
88#endif
89    return 0;
90}
91
92// length > 16
93static inline int ElemStart_grouping(int start, int lgth) {
94    int hashvalue = compute_hash_value(lgth, start - block_base, hashvalues);
95    int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
96    gids.push_back(gid);
97#if DEBUG
98    char* symbol = new char[lgth+1];
99    strncpy ( symbol, source + start, lgth );
100    symbol[lgth] ='\0';
101    printf ("%s | start: %i[%i] | lgth: %i | hashvalue: %i | gid: %i | symbol: %s\n", __FUNCTION__, start, start - block_base, lgth, hashvalue, gid, symbol);
102#endif
103    return 0;
104}
105
106// L = 2, pass in bitstream for symbols length [1,2]
107// L = 4, pass in bitstream for symbols length [3,4]
108// L = 6, pass in bitstream for symbols length [5,6]
109// L = 8, pass in bitstream for symbols length [7,8]
110// L = 10, pass in bitstream for symbols length [9,10]
111// L = 12, pass in bitstream for symbols length [11,12]
112// L = 14, pass in bitstream for symbols length [13,14]
113// L = 16, pass in bitstream for symbols length [15,16]
114// L = 17, pass in bitstream for symbols length longer than 16
115template <int L>
116static inline void validate_block_length_grouping(BitBlockForwardIterator & start, int block_base) {
117
118    BitBlockForwardIterator end;
119    int end_pos;
120
121    while(start != end) {
122        end_pos = *start;
123        ElemEnd_grouping<L>(end_pos + block_base);
124        start++;
125    }
126}
127
128template <>
129inline void validate_block_length_grouping<17>(BitBlockForwardIterator & start, int block_base) {
130
131    BitBlockForwardIterator end;
132    int start_pos, end_pos;
133
134    while(start != end) {
135        start_pos = *start;
136        end_pos = ScanForwardPos (&elem_ends, start_pos);
137        if (end_pos)
138        {
139            ElemStart_grouping(start_pos - 16 + block_base, end_pos - start_pos + 16);
140        }
141        else
142        {
143#if DEBUG
144            printf ("There is no more 1 bit in the block. pos: %i | sym: %c%c[%c]\n", start_pos,
145                    source[start_pos + block_base-2], source[start_pos + block_base-1], source[start_pos + block_base]);
146#endif
147            //handle boundary case
148            block_boundary_case = true;
149            last_elem_start = start_pos - 16 - BLOCK_SIZE;
150#if DEBUG
151            printf ("last_elem_start: %i\n", last_elem_start);
152#endif
153        }
154        start++;
155    }
156}
157
158static inline void postprocess_do_block(Lex & lex, CtCDPI_Callouts & ctCDPI_Callouts, Ref_Callouts & ref_Callouts, Check_streams & check_streams, Tag_Callouts & tag_Callouts, Hash_data & hash_data, int chars_avail){
159
160    tracker.StoreNewlines(lex.LF);
161
162    elem_ends = tag_Callouts.ElemName_ends;
163    hashvalues[1] = hash_data.Hash_value;
164
165    // Check for block boundary case for length 16 and above
166    if (block_boundary_case)
167    {
168#if DEBUG
169        printf ("block boundary case! Special handle!\n");
170#endif
171        int lgth = count_forward_zeroes(elem_ends)-last_elem_start;
172        int start = block_base + last_elem_start;
173        int hashvalue = compute_hash_value(lgth, last_elem_start, hashvalues);
174        int gid = pbgs_symbol_table.Lookup_or_Insert_Name(source + start, hashvalue, lgth);
175        gids.push_back(gid);
176#if DEBUG
177        printf ("%s | start: %i[%i] | lgth: %i | hashvalue: %i | gid: %i \n", __FUNCTION__, start, start - block_base, lgth, hashvalue, gid);
178#endif
179        block_boundary_case = false;
180    }
181
182    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_1_to_2) )
183    {
184        BitBlockForwardIterator iter_length_grouping_2(&tag_Callouts.ElemName_ends_1_to_2);
185        validate_block_length_grouping<2>(iter_length_grouping_2, block_base);
186    }
187
188    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_3_to_4) )
189    {
190        BitBlockForwardIterator iter_length_grouping_4(&tag_Callouts.ElemName_ends_3_to_4);
191        validate_block_length_grouping<4>(iter_length_grouping_4, block_base);
192    }
193
194    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_5_to_6) )
195    {
196        BitBlockForwardIterator iter_length_grouping_6(&tag_Callouts.ElemName_ends_5_to_6);
197        validate_block_length_grouping<6>(iter_length_grouping_6, block_base);
198    }
199
200    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_7_to_8) )
201    {
202        BitBlockForwardIterator iter_length_grouping_8(&tag_Callouts.ElemName_ends_7_to_8);
203        validate_block_length_grouping<8>(iter_length_grouping_8, block_base);
204    }
205
206    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_9_to_10) )
207    {
208        BitBlockForwardIterator iter_length_grouping_10(&tag_Callouts.ElemName_ends_9_to_10);
209        validate_block_length_grouping<10>(iter_length_grouping_10, block_base);
210    }
211
212    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_11_to_12) )
213    {
214        BitBlockForwardIterator iter_length_grouping_12(&tag_Callouts.ElemName_ends_11_to_12);
215        validate_block_length_grouping<12>(iter_length_grouping_12, block_base);
216    }
217
218    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_13_to_14) )
219    {
220        BitBlockForwardIterator iter_length_grouping_14(&tag_Callouts.ElemName_ends_13_to_14);
221        validate_block_length_grouping<14>(iter_length_grouping_14, block_base);
222    }
223
224    if ( bitblock_has_bit(tag_Callouts.ElemName_ends_15_to_16) )
225    {
226        BitBlockForwardIterator iter_length_grouping_16(&tag_Callouts.ElemName_ends_15_to_16);
227        validate_block_length_grouping<16>(iter_length_grouping_16, block_base);
228    }
229
230    if ( bitblock_has_bit(tag_Callouts.ElemName_remaining_ends) )
231    {
232        BitBlockForwardIterator iter_length_grouping_remaining(&tag_Callouts.ElemName_remaining_ends);
233        validate_block_length_grouping<17>(iter_length_grouping_remaining, block_base);
234    }
235
236    //copy current hash value data as previous one.
237    memmove (&hashvalues[0], &hashvalues[1], 16);
238
239    if (bitblock_has_bit(simd_or(check_streams.non_ascii_name_starts, check_streams.non_ascii_names))) {
240        BitBlockForwardIterator iter_NameStrt_check(&check_streams.non_ascii_name_starts);
241        validate_block(iter_NameStrt_check, block_base, NameStrt_check);
242        BitBlockForwardIterator iter_Name_check(&check_streams.non_ascii_names);
243        validate_block(iter_Name_check, block_base, Name_check);
244    }
245    if (bitblock_has_bit(ctCDPI_Callouts.PI_name_starts)){
246        BitBlockForwardIterator iter_PI_name_starts(&(ctCDPI_Callouts.PI_name_starts));
247        validate_block(iter_PI_name_starts, block_base, buffer_base, PIName_check);
248    }
249    if (bitblock_has_bit(ctCDPI_Callouts.CD_starts)){
250        BitBlockForwardIterator iter_CD_check(&ctCDPI_Callouts.CD_starts);
251        validate_block(iter_CD_check, block_base, CD_check);
252    }
253    if(bitblock_has_bit(ref_Callouts.GenRef_starts)){
254        BitBlockForwardIterator iter_GenRef_check(&ref_Callouts.GenRef_starts);
255        validate_block(iter_GenRef_check, block_base, GenRef_check);
256    }
257    if(bitblock_has_bit(ref_Callouts.DecRef_starts)){
258        BitBlockForwardIterator iter_DecRef_check(&ref_Callouts.DecRef_starts);
259        validate_block(iter_DecRef_check, block_base, DecRef_check);
260    }
261    if(bitblock_has_bit(ref_Callouts.HexRef_starts)){
262        BitBlockForwardIterator iter_HexRef_check(&ref_Callouts.HexRef_starts);
263        validate_block(iter_HexRef_check, block_base, HexRef_check);
264    }
265    if(bitblock_has_bit(check_streams.att_refs)){
266        BitBlockForwardIterator iter_AttRef_check(&check_streams.att_refs);
267        validate_block(iter_AttRef_check, block_base, AttRef_check);
268    }
269
270    if(error_tracker.Has_Noted_Error()){
271        int error_line, error_column;
272        tracker.get_Line_and_Column(error_tracker.Noted_Pos_In_Block(), error_line, error_column);
273        ReportError(error_tracker.Noted_Error_Msg(), error_line, error_column);
274        exit(-1);
275    }
276
277    matcher.store_streams(check_streams.tag_marks, check_streams.name_follows, check_streams.misc_mask, chars_avail);
278    tracker.AdvanceBlock();
279}
280
281void do_process(FILE *infile, FILE *outfile) {
282
283@decl
284  int buf_pos = 0;
285  int block_pos = 0;
286  int errpos = 0;
287  int chars_avail = 0;
288  int check_pos = 0;
289  int chars_read = 0;
290  BytePack buf[(BUFFER_SIZE+BLOCK_SIZE+OVERLAP_BUFSIZE*2)/sizeof(SIMD_type)];
291
292  char * srcbuf = ((char *) buf) + OVERLAP_BUFSIZE;
293  buffer_base = buf_pos;
294  source = srcbuf;
295  chars_read = fread((void *)srcbuf, 1, BUFFER_SIZE + OVERLAP_BUFSIZE, infile);
296  chars_avail = chars_read;
297  if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
298
299  matcher.setSrc(srcbuf);
300
301  if(chars_read<4){
302    fprintf(stderr,"File is too short. Not well formed.\n");
303    exit(-1);
304  }
305
306  Entity_Info * e = new Entity_Info;
307  e->AnalyzeSignature((unsigned char *)srcbuf);
308
309  if (e->code_unit_base == ASCII) {
310
311    XML_Decl_Parser<ASCII> decl_parser((unsigned char *)srcbuf);
312
313    decl_parser.ReadXMLInfo(*e);
314
315    if (e->code_unit_size != SingleByte || (e->has_encoding_decl && (!at_UTF_8(e->encoding)))){
316        fprintf(stderr,"Sorry, this xmlwf demo only works for UTF-8.\n");
317        exit(-1);
318    }
319  }
320  else {
321    fprintf(stderr,"Sorry, this xmlwf demo does not process EBCDIC.\n");
322        exit(-1);
323  }
324
325  if (e->content_start != 0) {
326        memmove(&srcbuf[0], &srcbuf[e->content_start], chars_read - e->content_start);
327        buf_pos = e->content_start;
328        buffer_base = buf_pos;
329        if (chars_avail == BUFFER_SIZE) {
330                chars_read = chars_read - e->content_start +
331                             fread(&srcbuf[chars_read-e->content_start], 1, e->content_start, infile);
332                chars_avail = chars_read;
333                if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
334        }
335        else {
336          chars_read -=e->content_start;
337          chars_avail -=e->content_start;
338        }
339  }
340
341@stream_stmts
342
343/* Full Buffers */
344    while (chars_avail == BUFFER_SIZE) {
345      PERF_SEC_START(parser_timer);
346      for (int blk = 0; blk < SEGMENT_BLOCKS; blk++) {
347          block_base = blk*BLOCK_SIZE;
348          s2p_do_block((BytePack *) &srcbuf[block_base], basis_bits);
349          @block_stmts
350          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
351      }
352      matcher.StreamScan(chars_avail);
353      matcher.Advance_buffer();
354      tracker.Advance_buffer();
355      PERF_SEC_END(parser_timer, chars_avail);
356
357      int bytes_left = chars_read - chars_avail;
358      //memmove(srcbuf, &srcbuf[BUFFER_SIZE], bytes_left);
359      memmove(buf, &srcbuf[BUFFER_SIZE - OVERLAP_BUFSIZE], bytes_left + OVERLAP_BUFSIZE);
360      chars_read = fread(&srcbuf[bytes_left],1, BUFFER_SIZE + OVERLAP_BUFSIZE - bytes_left, infile) + bytes_left;
361
362      chars_avail = chars_read;
363      if (chars_avail > BUFFER_SIZE) chars_avail = BUFFER_SIZE;
364      buf_pos += chars_avail;
365      buffer_base = buf_pos;
366
367  }
368/* Final Partial Buffer */
369    PERF_SEC_START(parser_timer);
370
371    block_pos = 0;
372    int remaining = chars_avail;
373/* Full Blocks */
374    while (remaining >= BLOCK_SIZE) {
375          block_base = block_pos;
376          s2p_do_block((BytePack *) &srcbuf[block_pos], basis_bits);
377          @block_stmts
378          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
379          block_pos += BLOCK_SIZE;
380          remaining -= BLOCK_SIZE;
381    }
382    block_base = block_pos;
383    if (remaining > 0 || @any_carry) {
384          EOF_mask = bitblock::srl(simd<1>::constant<1>(),convert(BLOCK_SIZE-remaining));
385          s2p_do_final_block((BytePack *) &srcbuf[block_pos], basis_bits, EOF_mask);
386          @final_block_stmts
387          postprocess_do_block(lex, ctCDPI_Callouts, ref_Callouts, check_streams, tag_Callouts, hash_data, chars_avail);
388    }
389    buf_pos += chars_avail;
390    buffer_base = buf_pos;
391
392    matcher.StreamScan(chars_avail);
393    matcher.Advance_buffer();
394    tracker.Advance_buffer();
395
396    PERF_SEC_END(parser_timer, chars_avail);
397    if (matcher.depth != 0) {
398      fprintf(stderr, "tag matching error (depth %i) at position %i\n", matcher.depth, buffer_base);
399      exit(-1);
400    }
401}
Note: See TracBrowser for help on using the repository browser.