source: trunk/markup_stats.cxx @ 1546

Last change on this file since 1546 was 474, checked in by ksherdy, 9 years ago

Reduce 'PAPI' and 'CODE_CLOCKER' constants to the single constant 'CODE_CLOCKER'.

File size: 15.4 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <string>
16#include <iostream>
17using namespace std;
18
19#define ON 1
20#define OFF 2
21
22#define TEMPLATED_SIMD_LIB
23
24#define VALIDATION_MODE OFF
25
26#ifdef CODE_CLOCKER
27#include "../code_clocker/clocker/cc.h"
28#include "../code_clocker/clocker/cc.cxx"
29
30CC * code_clocker;
31
32#define NONE 0
33#define END_TAG_MATCHING 1
34#define ATTRIBUTE_UNIQUENESS 2
35#define NAME_VALIDATION 3
36#define NAME_LOOKUP 4
37
38#define OMISSION NONE
39
40
41#define CHARSET_VALIDATION 1
42#define WS_CONTROL 2
43#define MARKUP_STREAMS 3
44#define BITLEX_ALL 4
45#define BITPLEX 5
46#define BYTEPLEX 6
47#define ADVANCE_BUFFERS 7
48#define BUFFER_TOTAL 8
49#define FILE_READING 9
50
51#define CODE_CLOCKER OMISSION
52
53#endif
54
55//#include "src/ilax.h"
56#include "src/engine.h"
57
58#ifndef REPEAT_RUNS
59#define REPEAT_RUNS 3
60#endif
61
62/* Internals */
63#include "src/xmlmodel.h"
64#include "src/xml_error.h"
65#include "src/bitplex.h"
66#include "src/byteplex.h"
67#include "src/xmldecl.h"
68#include "src/bitlex.h"
69
70
71#include "src/xmlmodel.c"
72#include "src/xml_error.c"
73#include "src/bitplex.c"
74#include "src/byteplex.c"
75#include "src/xmldecl.c"
76#include "src/bitlex.c"
77#include "src/engine.c"
78#include "src/symtab.c"
79
80/* Global declarations of parsing engine. */
81Parser_Interface<UTF_8> * parser;
82
83/* Global declarations for statistics. */
84
85int comment_count = 0;
86int comment_length = 0;
87int CDATA_start_count = 0;
88int CDATA_start_pos = 0;
89int CDATA_length = 0;
90int CDATA_end_count = 0;
91int PI_count = 0;
92int PI_length = 0;
93int empty_elem_count = 0;
94int empty_elem_length = 0;
95int start_tag_count = 0;
96int start_tag_length = 0;
97int attribute_count = 0;
98int end_tag_count = 0;
99int end_tag_length = 0;
100int reference_count = 0;
101int reference_length = 0;
102int text_item_count = 0;
103int text_item_length = 0;
104int error_item_count = 0;
105int error_item_length = 0;
106int nesting_depth = 0;
107int max_nesting_depth = 0;
108int total_attribute_count = 0;
109int total_att_name_length = 0;
110int total_att_value_length = 0;
111int namespace_count = 0;
112int total_namespace_name_length = 0;
113int total_namespace_URI_length = 0;
114
115int last_item_start = 0;
116int last_item_stop = 0;
117int last_buffer_rel_pos = 0;
118
119#ifdef LEAF_COUNTING
120int at_start = 1;
121int leaf_count = 0;
122int interior_count = 0;
123#endif
124
125/* Action routine for an XML comment in "<!--"  "-->" brackets. */
126template<>
127inline void Parser_Interface<UTF_8>::Comment_action(unsigned char * item, int lgth) {
128        comment_count +=1;
129       
130        #if defined(CALC_AVG)
131                comment_length += lgth;
132        #endif
133}
134
135/* Action routine called upon recognizing "<![CDATA[" to start a CDATA section. */
136template<>
137inline void Parser_Interface<UTF_8>::CDATA_start_action(unsigned char * CDATA_ptr){
138        CDATA_start_pos = (int) CDATA_ptr;
139        CDATA_start_count +=1;
140}
141
142/* Action routine called upon recognizing "]]>" to end a CDATA section. */
143template<>
144inline void Parser_Interface<UTF_8>::CDATA_end_action(unsigned char * CDATA_end_ptr) {
145        CDATA_end_count +=1;
146       
147        #if defined(CALC_AVG)
148                CDATA_length += (int) CDATA_end_ptr - CDATA_start_pos;
149        #endif
150}
151
152/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
153template<>
154inline void Parser_Interface<UTF_8>::PI_action(unsigned char * item, int lgth) {
155        PI_count +=1;
156       
157        #if defined(CALC_AVG)
158                PI_length += lgth;
159        #endif
160}
161
162/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
163template<>
164inline void Parser_Interface<UTF_8>::EmptyElement_action(unsigned char * item, int lgth) {
165        empty_elem_count +=1;
166#ifdef LEAF_COUNTING
167        int at_start = 0;
168        leaf_count++;
169#endif
170
171        #if defined(CALC_AVG)
172                empty_elem_length += lgth;
173        #endif
174}
175
176/* Action routine for a start tag enclosed in "<" and ">" brackets. */
177template<>
178inline void Parser_Interface<UTF_8>::StartTag_action(unsigned char * item, int lgth) {
179        start_tag_count +=1;
180#ifdef LEAF_COUNTING
181        at_start = 1;
182#endif
183       
184        #if defined(CALC_AVG)
185                start_tag_length += lgth;
186        #endif
187       
188        nesting_depth += 1;
189        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
190        //cout << string((char *) item, lgth) << endl;
191}
192
193/* Action routine for an end tag enclosed in "</" and ">" brackets. */
194template<>
195inline void Parser_Interface<UTF_8>::EndTag_action(unsigned char * item, int lgth) {
196        end_tag_count +=1;
197#ifdef LEAF_COUNTING
198        leaf_count += at_start;
199        interior_count += (1 - at_start);
200        at_start = 0;
201#endif
202       
203        #if defined(CALC_AVG)
204                end_tag_length += lgth;
205        #endif
206       
207        nesting_depth -= 1;
208}
209
210/* Action routine for an error item */
211template<>
212inline void Parser_Interface<UTF_8>::Error_action(unsigned char * item, int lgth) {
213        error_item_count +=1;
214       
215        #if defined(CALC_AVG)
216                error_item_length += lgth;
217        #endif
218       
219        fprintf(stderr, "Error: illegal markup of length %i.\n", lgth);
220        cerr << string((char *) item, lgth) << endl;
221}
222
223/* Action routine for a text item */
224template<>
225inline void Parser_Interface<UTF_8>::Text_action(unsigned char * item, int lgth, bool more) {
226        text_item_count +=1;
227       
228        #if defined(CALC_AVG)
229                text_item_length += lgth;
230        #endif
231}
232
233template<>
234inline void Parser_Interface<UTF_8>::Reference_action(unsigned char * item, int lgth) {
235        reference_count +=1;
236       
237        #if defined(CALC_AVG)
238                reference_length += lgth;
239        #endif
240}
241
242
243
244
245/* Three action routines for markup components are defined as follows.
246|ElementName_action| is the action routine called upon recognition of
247an element name immediately after the opening angle bracket of a start
248tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
249AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
250It is called with two parameters identifying the
251first and last character positions of the expected XML_name.
252Similarly, |PI_Target_action| is the action routine called upon recognition
253of the XML Name that occurs immediately after the opening "<?"
254delimiter of a processing instruction.
255
256 The third action routine for markup components is Attribute_Value_action,
257which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
258THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
259TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
260*/
261
262/* Semantic action routines for markup components. */
263/* Action routine for an element name occurring immediately after the
264   opening "<" of a start tag or empty element tag. */
265template<>
266inline void Parser_Interface<UTF_8>::ElementName_action(unsigned char * item, int lgth) {
267}
268
269/* Action routine for a processing instruction target name occurring immediately
270   after the opening "<?" of a processing instruction. */
271template<>
272inline void Parser_Interface<UTF_8>::PI_Target_action(unsigned char * item, int lgth) {
273}
274
275/* Action routine for an individual attribute/value pair occurring in
276   a element start tag or an empty element tag. */
277template<>
278inline void Parser_Interface<UTF_8>::AttributeValue_action(unsigned char * name, int name_lgth, 
279                                 unsigned char * val, int val_lgth) {
280        total_attribute_count+=1;
281       
282        #if defined(CALC_AVG)
283                total_att_name_length += name_lgth;
284                total_att_value_length += val_lgth;
285        #endif
286}
287
288/* Action routine for an individual attribute/value pair occurring in
289   a element start tag or an empty element tag. */
290template<>
291inline void Parser_Interface<UTF_8>::Namespace_action(unsigned char * name, int name_lgth,
292                             unsigned char * URI, int URI_lgth) {
293        namespace_count+=1;
294       
295        #if defined(CALC_AVG)
296                total_namespace_name_length += name_lgth;
297                total_namespace_URI_length += URI_lgth;
298        #endif
299}
300
301
302template<>
303void Parser_Interface<UTF_8>::FinalizeBuffer_action() {
304       
305       
306#ifdef DEBUG
307        printf ("FinalizeBuffer; last 16 bytes + lookahead 16 =\n");
308        cout << string((char *) GetCodeUnitPtr(AbsPos()-16), 16) << "::" << string((char *) GetCodeUnitPtr(AbsPos()), 16) << endl;
309#endif
310        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == BUFFER_TOTAL)
311                code_clocker->end_interval(BUFFER_SIZE);
312        #endif
313        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == BUFFER_TOTAL)
314                code_clocker->start_interval();
315        #endif
316       
317}
318
319
320template<>
321inline void Parser_Interface<UTF_8>::DocumentStart_action() {
322        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == BUFFER_TOTAL)
323                code_clocker->start_interval();
324        #endif
325}
326
327template<>
328inline void Parser_Interface<UTF_8>::DocumentEnd_action() {
329        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == BUFFER_TOTAL)
330                code_clocker->end_interval(buffer_rel_pos);
331        #endif
332        //printf("Document end\n");
333        //Parser_Interface<UTF_8>::model_info->ReportSymbolInfo();
334}
335
336template<>
337inline void Parser_Interface<UTF_8>::Doctype_action(unsigned char * item, int lgth) {
338#ifdef SHOW_DTD_ACTIONS
339        printf("Document Type:\n");
340        cout << string((char *) item, lgth) <<endl;
341#endif
342}
343
344template<>
345inline void Parser_Interface<UTF_8>::PEReference_action(unsigned char * item, int lgth) {
346}
347
348
349template<>
350inline void Parser_Interface<UTF_8>::ExtSubsetDecl_action(unsigned char * item, int lgth) {
351#ifdef SHOW_DTD_ACTIONS
352        printf("ExtSubsetDecl:\n");
353        cout << string((char *) item, lgth) <<endl;
354#endif
355}
356
357template<>
358inline void Parser_Interface<UTF_8>::Prolog_action(unsigned char * item, int lgth) {
359#ifdef SHOW_DTD_ACTIONS
360        printf("Prolog:\n");
361        cout << string((char *) item, lgth) <<endl;
362#endif
363}
364
365#define print_stats(stat_string, count, total_lgth) \
366        printf("%i %s", count, stat_string);\
367        if (count == 0) printf("s.\n");\
368        else if (count == 1) printf(" of length %i.\n", total_lgth);\
369        else printf("s of avg. lgth %f.\n", (double)total_lgth/(double)count);
370
371
372#include <sched.h>
373
374int
375main(int argc, char * argv[]) {
376
377#ifdef SET_AFFINITY
378cpu_set_t mask;
379
380
381unsigned int len = sizeof(mask);
382if (sched_getaffinity(0, len, &mask) < 0) {
383    perror("sched_getaffinity");
384    return -1;
385    }
386printf("Original affinity mask is: %08lx\n", mask);
387
388CPU_CLR(0, &mask);
389
390if (sched_setaffinity(0, len, &mask) < 0) {
391    perror("sched_setaffinity");
392}
393if (sched_getaffinity(0, len, &mask) < 0) {
394    perror("sched_getaffinity");
395    return -1;
396    }
397printf("Modified affinity mask is:  %08lx\n", mask);
398#endif
399
400
401
402
403        if (argc != 2) {
404        printf("Usage: %s <filename>\n", argv[0]);
405                exit(-1);
406        }
407       
408        char * src_filename = argv[1];
409        char * cmdline = new char[strlen(argv[0]) + strlen(argv[1]) +1 +1]; 
410        strcat(cmdline, argv[0]);
411        strcat(cmdline," ");
412        strcat(cmdline,argv[1]);
413
414        #ifdef CODE_CLOCKER
415                #define NUM_EVENTS 1
416                int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
417//              int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_L1_DCM};
418//              int Events[NUM_EVENTS] = {PAPI_L2_DCM};
419                int cal_size = 1000;
420                code_clocker = new CC(Events,NUM_EVENTS,cal_size);
421                code_clocker->set_cmd(cmdline);
422        #endif
423
424        // Read the entire file into a memory buffer   
425        FILE * src_file;
426        struct stat fileinfo;
427        int src_filesize;
428
429        // open file and fstat 
430        src_file = fopen ( src_filename , "rb" );
431        if (!src_file) {
432                fprintf(stderr, "Cannot open '%s'. Terminating the process ...\n", src_filename); 
433                exit(-1);
434        }
435        if(fstat(fileno(src_file), &fileinfo)!=0) {
436                fprintf(stderr, "Cannot fstat '%s'. Terminating the process ...\n", src_filename); 
437                exit(-1);
438        }
439       
440        src_filesize = fileinfo.st_size;
441
442        fclose (src_file);
443       
444        for (int run = 0; run < REPEAT_RUNS; run++) {
445
446//              #ifdef PAPI
447//                      code_clocker->start_interval();
448//              #endif
449
450                parser = Parser_Interface<UTF_8>::ParserFactory(src_filename);
451       
452               
453                if (!parser->has_ByteOrderMark()) printf("No ");
454                printf("Byte Order Mark found.\n");
455       
456                if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
457                else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
458                else printf ("XML version 1.0 implied by default.\n");
459                if (parser->has_EncodingDecl()) {
460                        printf("XML encoding declared:  %s\n", parser->get_Encoding());
461                }
462                if (parser->standalone_status() == Standalone_yes) 
463                        printf("XML standalone = yes declared.\n");
464                else if (parser->standalone_status() == Standalone_no) 
465                        printf("XML standalone = no declared.\n");
466                else printf ("XML standalone = no by default.\n");
467               
468
469                #ifdef CODE_CLOCKER
470                        code_clocker->start_interval();
471                #endif
472               
473                parser->Parse_Prolog();
474                parser->Parse_DocumentContent();
475
476                #ifdef CODE_CLOCKER
477                        int elems = src_filesize;
478                        code_clocker->end_interval(elems);
479                #endif
480
481                delete parser;
482                printf("Run %i complete.\n", run);
483
484        }
485
486        #ifdef CODE_CLOCKER
487       
488        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == CHARSET_VALIDATION)
489                code_clocker->set_param("CODE_CLOCKER", "CHARSET_VALIDATION");
490        #endif 
491        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == WS_CONTROL)
492                code_clocker->set_param("CODE_CLOCKER", "WS_CONTROL");
493        #endif
494        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == MARKUP_STREAMS)
495                code_clocker->set_param("CODE_CLOCKER", "MARKUP_STREAMS");
496        #endif
497        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == BITLEX_ALL)
498                code_clocker->set_param("CODE_CLOCKER", "BITLEX_ALL");
499        #endif
500        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == BITPLEX)
501                code_clocker->set_param("CODE_CLOCKER", "BITPLEX");
502        #endif
503        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == BYTEPLEX)
504                code_clocker->set_param("CODE_CLOCKER", "BYTEPLEX");
505        #endif
506        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == ADVANCE_BUFFERS)
507                code_clocker->set_param("CODE_CLOCKER", "ADVANCE_BUFFERS");
508        #endif
509        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == BUFFER_TOTAL)
510                code_clocker->set_param("CODE_CLOCKER", "BUFFER_TOTAL");
511        #endif
512        #if defined(CODE_CLOCKER) and (CODE_CLOCKER == FILE_READING)
513                code_clocker->set_param("CODE_CLOCKER", "FILE_READING");
514        #endif
515        #if defined(OMISSION) and (OMISSION == NONE)
516                code_clocker->set_param("OMISSION", "NONE");
517        #endif
518        #if defined(OMISSION) and (OMISSION == END_TAG_MATCHING)
519                code_clocker->set_param("OMISSION", "END_TAG_MATCHING");
520        #endif
521        #if defined(OMISSION) and (OMISSION == ATTRIBUTE_UNIQUENESS)
522                code_clocker->set_param("OMISSION", "ATTRIBUTE_UNIQUENESS");
523        #endif
524        #if defined(OMISSION) and (OMISSION == NAME_VALIDATION)
525                code_clocker->set_param("OMISSION", "NAME_VALIDATION");
526        #endif
527        #if defined(OMISSION) and (OMISSION == NAME_LOOKUP)
528                code_clocker->set_param("OMISSION", "NAME_LOOKUP");
529        #endif
530        #if defined(VALIDATION_MODE) and (VALIDATION_MODE == ON)
531                code_clocker->set_param("VALIDATION_MODE", "ON");
532        #endif
533        #if defined(VALIDATION_MODE) and (VALIDATION_MODE == OFF)
534                code_clocker->set_param("VALIDATION_MODE", "OFF");
535        #endif
536       
537                code_clocker->write_xml_file();
538                code_clocker->display_system_info();
539                code_clocker->display_raw_event_data();
540                delete code_clocker;
541        #endif 
542       
543        print_stats("comment", comment_count, comment_length);
544        print_stats("CDATA section", CDATA_end_count, CDATA_length);
545        print_stats("processing instruction", PI_count, PI_length);
546        print_stats("empty element", empty_elem_count, empty_elem_length);
547        print_stats("start tag", start_tag_count, start_tag_length);
548        printf("%i total attributes\n", total_attribute_count);
549        print_stats("attribute name", total_attribute_count, total_att_name_length);
550        print_stats("attribute value", total_attribute_count, total_att_value_length);
551        print_stats("namespace name", namespace_count, total_namespace_name_length);
552        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
553        print_stats("end tag", end_tag_count, end_tag_length);
554        print_stats("text item", text_item_count, text_item_length);
555        print_stats("reference", reference_count, reference_length);
556        print_stats("error item", error_item_count, error_item_length);
557        printf("Maximum nesting depth = %i\n", max_nesting_depth);
558#ifdef LEAF_COUNTING
559        printf("%i leaf nodes, %i interior nodes.\n", leaf_count, interior_count);
560#endif
561       
562        return(0);
563}
Note: See TracBrowser for help on using the repository browser.