source: trunk/markup_stats.cxx @ 361

Last change on this file since 361 was 274, checked in by ksherdy, 10 years ago

Report averages as floating point values.

File size: 15.5 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <string>
16#include <iostream>
17using namespace std;
18
19#define ON 1
20#define OFF 2
21
22#define TEMPLATED_SIMD_LIB
23
24#define VALIDATION_MODE OFF
25
26#ifdef PAPI
27#include "../code_clocker/clocker/cc.h"
28#include "../code_clocker/clocker/cc.cxx"
29
30CC * code_clocker;
31
32#define NONE 0
33#define END_TAG_MATCHING 1
34#define ATTRIBUTE_UNIQUENESS 2
35#define NAME_VALIDATION 3
36#define NAME_LOOKUP 4
37
38#define OMISSION NONE
39
40
41#define CHARSET_VALIDATION 1
42#define WS_CONTROL 2
43#define MARKUP_STREAMS 3
44#define BITLEX_ALL 4
45#define BITPLEX 5
46#define BYTEPLEX 6
47#define ADVANCE_BUFFERS 7
48#define BUFFER_TOTAL 8
49#define FILE_READING 9
50
51#define CODE_CLOCKING BUFFER_TOTAL
52
53#endif
54
55//#include "src/ilax.h"
56#include "src/engine.h"
57
58#ifndef REPEAT_RUNS
59#define REPEAT_RUNS 1
60#endif
61
62/* Internals */
63#include "src/xmlmodel.h"
64#include "src/xml_error.h"
65#include "src/bitplex.h"
66#include "src/byteplex.h"
67#include "src/xmldecl.h"
68#include "src/bitlex.h"
69
70
71#include "src/xmlmodel.c"
72#include "src/xml_error.c"
73#include "src/bitplex.c"
74#include "src/byteplex.c"
75#include "src/xmldecl.c"
76#include "src/bitlex.c"
77#include "src/engine.c"
78#include "src/symtab.c"
79
80/* Global declarations of parsing engine. */
81Parser_Interface<UTF_8> * parser;
82
83/* Global declarations for statistics. */
84
85int comment_count = 0;
86int comment_length = 0;
87int CDATA_start_count = 0;
88int CDATA_start_pos = 0;
89int CDATA_length = 0;
90int CDATA_end_count = 0;
91int PI_count = 0;
92int PI_length = 0;
93int empty_elem_count = 0;
94int empty_elem_length = 0;
95int start_tag_count = 0;
96int start_tag_length = 0;
97int attribute_count = 0;
98int end_tag_count = 0;
99int end_tag_length = 0;
100int reference_count = 0;
101int reference_length = 0;
102int text_item_count = 0;
103int text_item_length = 0;
104int error_item_count = 0;
105int error_item_length = 0;
106int nesting_depth = 0;
107int max_nesting_depth = 0;
108int total_attribute_count = 0;
109int total_att_name_length = 0;
110int total_att_value_length = 0;
111int namespace_count = 0;
112int total_namespace_name_length = 0;
113int total_namespace_URI_length = 0;
114
115int last_item_start = 0;
116int last_item_stop = 0;
117int last_buffer_rel_pos = 0;
118
119#define CALC_AVG
120
121#ifdef LEAF_COUNTING
122int at_start = 1;
123int leaf_count = 0;
124int interior_count = 0;
125#endif
126
127/* Action routine for an XML comment in "<!--"  "-->" brackets. */
128template<>
129inline void Parser_Interface<UTF_8>::Comment_action(unsigned char * item, int lgth) {
130        comment_count +=1;
131       
132        #if defined(CALC_AVG)
133                comment_length += lgth;
134        #endif
135}
136
137/* Action routine called upon recognizing "<![CDATA[" to start a CDATA section. */
138template<>
139inline void Parser_Interface<UTF_8>::CDATA_start_action(unsigned char * CDATA_ptr){
140        CDATA_start_pos = (int) CDATA_ptr;
141        CDATA_start_count +=1;
142}
143
144/* Action routine called upon recognizing "]]>" to end a CDATA section. */
145template<>
146inline void Parser_Interface<UTF_8>::CDATA_end_action(unsigned char * CDATA_end_ptr) {
147        CDATA_end_count +=1;
148       
149        #if defined(CALC_AVG)
150                CDATA_length += (int) CDATA_end_ptr - CDATA_start_pos;
151        #endif
152}
153
154/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
155template<>
156inline void Parser_Interface<UTF_8>::PI_action(unsigned char * item, int lgth) {
157        PI_count +=1;
158       
159        #if defined(CALC_AVG)
160                PI_length += lgth;
161        #endif
162}
163
164/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
165template<>
166inline void Parser_Interface<UTF_8>::EmptyElement_action(unsigned char * item, int lgth) {
167        empty_elem_count +=1;
168#ifdef LEAF_COUNTING
169        int at_start = 0;
170        leaf_count++;
171#endif
172
173        #if defined(CALC_AVG)
174                empty_elem_length += lgth;
175        #endif
176}
177
178/* Action routine for a start tag enclosed in "<" and ">" brackets. */
179template<>
180inline void Parser_Interface<UTF_8>::StartTag_action(unsigned char * item, int lgth) {
181        start_tag_count +=1;
182#ifdef LEAF_COUNTING
183        at_start = 1;
184#endif
185       
186        #if defined(CALC_AVG)
187                start_tag_length += lgth;
188        #endif
189       
190        nesting_depth += 1;
191        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
192        //cout << string((char *) item, lgth) << endl;
193}
194
195/* Action routine for an end tag enclosed in "</" and ">" brackets. */
196template<>
197inline void Parser_Interface<UTF_8>::EndTag_action(unsigned char * item, int lgth) {
198        end_tag_count +=1;
199#ifdef LEAF_COUNTING
200        leaf_count += at_start;
201        interior_count += (1 - at_start);
202        at_start = 0;
203#endif
204       
205        #if defined(CALC_AVG)
206                end_tag_length += lgth;
207        #endif
208       
209        nesting_depth -= 1;
210}
211
212/* Action routine for an error item */
213template<>
214inline void Parser_Interface<UTF_8>::Error_action(unsigned char * item, int lgth) {
215        error_item_count +=1;
216       
217        #if defined(CALC_AVG)
218                error_item_length += lgth;
219        #endif
220       
221        fprintf(stderr, "Error: illegal markup of length %i.\n", lgth);
222        cerr << string((char *) item, lgth) << endl;
223}
224
225/* Action routine for a text item */
226template<>
227inline void Parser_Interface<UTF_8>::Text_action(unsigned char * item, int lgth, bool more) {
228        text_item_count +=1;
229       
230        #if defined(CALC_AVG)
231                text_item_length += lgth;
232        #endif
233}
234
235template<>
236inline void Parser_Interface<UTF_8>::Reference_action(unsigned char * item, int lgth) {
237        reference_count +=1;
238       
239        #if defined(CALC_AVG)
240                reference_length += lgth;
241        #endif
242}
243
244
245
246
247/* Three action routines for markup components are defined as follows.
248|ElementName_action| is the action routine called upon recognition of
249an element name immediately after the opening angle bracket of a start
250tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
251AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
252It is called with two parameters identifying the
253first and last character positions of the expected XML_name.
254Similarly, |PI_Target_action| is the action routine called upon recognition
255of the XML Name that occurs immediately after the opening "<?"
256delimiter of a processing instruction.
257
258 The third action routine for markup components is Attribute_Value_action,
259which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
260THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
261TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
262*/
263
264/* Semantic action routines for markup components. */
265/* Action routine for an element name occurring immediately after the
266   opening "<" of a start tag or empty element tag. */
267template<>
268inline void Parser_Interface<UTF_8>::ElementName_action(unsigned char * item, int lgth) {
269}
270
271/* Action routine for a processing instruction target name occurring immediately
272   after the opening "<?" of a processing instruction. */
273template<>
274inline void Parser_Interface<UTF_8>::PI_Target_action(unsigned char * item, int lgth) {
275}
276
277/* Action routine for an individual attribute/value pair occurring in
278   a element start tag or an empty element tag. */
279template<>
280inline void Parser_Interface<UTF_8>::AttributeValue_action(unsigned char * name, int name_lgth, 
281                                 unsigned char * val, int val_lgth) {
282        total_attribute_count+=1;
283       
284        #if defined(CALC_AVG)
285                total_att_name_length += name_lgth;
286                total_att_value_length += val_lgth;
287        #endif
288}
289
290/* Action routine for an individual attribute/value pair occurring in
291   a element start tag or an empty element tag. */
292template<>
293inline void Parser_Interface<UTF_8>::Namespace_action(unsigned char * name, int name_lgth,
294                             unsigned char * URI, int URI_lgth) {
295        namespace_count+=1;
296       
297        #if defined(CALC_AVG)
298                total_namespace_name_length += name_lgth;
299                total_namespace_URI_length += URI_lgth;
300        #endif
301}
302
303
304template<>
305void Parser_Interface<UTF_8>::FinalizeBuffer_action() {
306       
307       
308#ifdef DEBUG
309        printf ("FinalizeBuffer; last 16 bytes + lookahead 16 =\n");
310        cout << string((char *) GetCodeUnitPtr(AbsPos()-16), 16) << "::" << string((char *) GetCodeUnitPtr(AbsPos()), 16) << endl;
311#endif
312        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
313                code_clocker->end_interval(BUFFER_SIZE);
314        #endif
315        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
316                code_clocker->start_interval();
317        #endif
318       
319}
320
321
322template<>
323inline void Parser_Interface<UTF_8>::DocumentStart_action() {
324        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
325                code_clocker->start_interval();
326        #endif
327}
328
329template<>
330inline void Parser_Interface<UTF_8>::DocumentEnd_action() {
331        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
332                code_clocker->end_interval(buffer_rel_pos);
333        #endif
334        //printf("Document end\n");
335        //Parser_Interface<UTF_8>::model_info->ReportSymbolInfo();
336}
337
338template<>
339inline void Parser_Interface<UTF_8>::Doctype_action(unsigned char * item, int lgth) {
340#ifdef SHOW_DTD_ACTIONS
341        printf("Document Type:\n");
342        cout << string((char *) item, lgth) <<endl;
343#endif
344}
345
346template<>
347inline void Parser_Interface<UTF_8>::PEReference_action(unsigned char * item, int lgth) {
348}
349
350
351template<>
352inline void Parser_Interface<UTF_8>::ExtSubsetDecl_action(unsigned char * item, int lgth) {
353#ifdef SHOW_DTD_ACTIONS
354        printf("ExtSubsetDecl:\n");
355        cout << string((char *) item, lgth) <<endl;
356#endif
357}
358
359template<>
360inline void Parser_Interface<UTF_8>::Prolog_action(unsigned char * item, int lgth) {
361#ifdef SHOW_DTD_ACTIONS
362        printf("Prolog:\n");
363        cout << string((char *) item, lgth) <<endl;
364#endif
365}
366
367#define print_stats(stat_string, count, total_lgth) \
368        printf("%i %s", count, stat_string);\
369        if (count == 0) printf("s.\n");\
370        else if (count == 1) printf(" of length %i.\n", total_lgth);\
371        else printf("s of avg. lgth %f.\n", (double)total_lgth/(double)count);
372
373
374#include <sched.h>
375
376int
377main(int argc, char * argv[]) {
378
379#ifdef SET_AFFINITY
380cpu_set_t mask;
381
382
383unsigned int len = sizeof(mask);
384if (sched_getaffinity(0, len, &mask) < 0) {
385    perror("sched_getaffinity");
386    return -1;
387    }
388printf("Original affinity mask is: %08lx\n", mask);
389
390CPU_CLR(0, &mask);
391
392if (sched_setaffinity(0, len, &mask) < 0) {
393    perror("sched_setaffinity");
394}
395if (sched_getaffinity(0, len, &mask) < 0) {
396    perror("sched_getaffinity");
397    return -1;
398    }
399printf("Modified affinity mask is:  %08lx\n", mask);
400#endif
401
402
403
404
405        if (argc != 2) {
406        printf("Usage: %s <filename>\n", argv[0]);
407                exit(-1);
408        }
409       
410        char * src_filename = argv[1];
411        char * cmdline = new char[strlen(argv[0]) + strlen(argv[1]) +1 +1]; 
412        strcat(cmdline, argv[0]);
413        strcat(cmdline," ");
414        strcat(cmdline,argv[1]);
415
416        #ifdef PAPI
417                #define NUM_EVENTS 1
418                int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
419//              int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_L1_DCM};
420//              int Events[NUM_EVENTS] = {PAPI_L2_DCM};
421                int cal_size = 1000;
422                code_clocker = new CC(Events,NUM_EVENTS,cal_size);
423                code_clocker->set_cmd(cmdline);
424        #endif
425
426        // Read the entire file into a memory buffer   
427        FILE * src_file;
428        struct stat fileinfo;
429        int src_filesize;
430
431        // open file and fstat 
432        src_file = fopen ( src_filename , "rb" );
433        if (!src_file) {
434                fprintf(stderr, "Cannot open '%s'. Terminating the process ...\n", src_filename); 
435                exit(-1);
436        }
437        if(fstat(fileno(src_file), &fileinfo)!=0) {
438                fprintf(stderr, "Cannot fstat '%s'. Terminating the process ...\n", src_filename); 
439                exit(-1);
440        }
441       
442        src_filesize = fileinfo.st_size;
443
444        fclose (src_file);
445       
446        for (int run = 0; run < REPEAT_RUNS; run++) {
447
448                #ifdef PAPI
449                        code_clocker->start_interval();
450                #endif
451
452                parser = Parser_Interface<UTF_8>::ParserFactory(src_filename);
453       
454               
455                if (!parser->has_ByteOrderMark()) printf("No ");
456                printf("Byte Order Mark found.\n");
457       
458                if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
459                else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
460                else printf ("XML version 1.0 implied by default.\n");
461                if (parser->has_EncodingDecl()) {
462                        printf("XML encoding declared:  %s\n", parser->get_Encoding());
463                }
464                if (parser->standalone_status() == Standalone_yes) 
465                        printf("XML standalone = yes declared.\n");
466                else if (parser->standalone_status() == Standalone_no) 
467                        printf("XML standalone = no declared.\n");
468                else printf ("XML standalone = no by default.\n");
469               
470
471//              #ifdef PAPI
472//                      code_clocker->start_interval();
473//              #endif
474               
475                parser->Parse_Prolog();
476                parser->Parse_DocumentContent();
477
478//              #ifdef PAPI
479//                      int elems = src_filesize;
480//                      code_clocker->end_interval(elems);
481//              #endif
482
483                delete parser;
484                printf("Run %i complete.\n", run);
485
486        }
487
488        #ifdef PAPI
489       
490        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == CHARSET_VALIDATION)
491                code_clocker->set_param("CODE_CLOCKING", "CHARSET_VALIDATION");
492        #endif 
493        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == WS_CONTROL)
494                code_clocker->set_param("CODE_CLOCKING", "WS_CONTROL");
495        #endif
496        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == MARKUP_STREAMS)
497                code_clocker->set_param("CODE_CLOCKING", "MARKUP_STREAMS");
498        #endif
499        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == BITLEX_ALL)
500                code_clocker->set_param("CODE_CLOCKING", "BITLEX_ALL");
501        #endif
502        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == BITPLEX)
503                code_clocker->set_param("CODE_CLOCKING", "BITPLEX");
504        #endif
505        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == BYTEPLEX)
506                code_clocker->set_param("CODE_CLOCKING", "BYTEPLEX");
507        #endif
508        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == ADVANCE_BUFFERS)
509                code_clocker->set_param("CODE_CLOCKING", "ADVANCE_BUFFERS");
510        #endif
511        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
512                code_clocker->set_param("CODE_CLOCKING", "BUFFER_TOTAL");
513        #endif
514        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == FILE_READING)
515                code_clocker->set_param("CODE_CLOCKING", "FILE_READING");
516        #endif
517        #if defined(OMISSION) and (OMISSION == NONE)
518                code_clocker->set_param("OMISSION", "NONE");
519        #endif
520        #if defined(OMISSION) and (OMISSION == END_TAG_MATCHING)
521                code_clocker->set_param("OMISSION", "END_TAG_MATCHING");
522        #endif
523        #if defined(OMISSION) and (OMISSION == ATTRIBUTE_UNIQUENESS)
524                code_clocker->set_param("OMISSION", "ATTRIBUTE_UNIQUENESS");
525        #endif
526        #if defined(OMISSION) and (OMISSION == NAME_VALIDATION)
527                code_clocker->set_param("OMISSION", "NAME_VALIDATION");
528        #endif
529        #if defined(OMISSION) and (OMISSION == NAME_LOOKUP)
530                code_clocker->set_param("OMISSION", "NAME_LOOKUP");
531        #endif
532        #if defined(VALIDATION_MODE) and (VALIDATION_MODE == ON)
533                code_clocker->set_param("VALIDATION_MODE", "ON");
534        #endif
535        #if defined(VALIDATION_MODE) and (VALIDATION_MODE == OFF)
536                code_clocker->set_param("VALIDATION_MODE", "OFF");
537        #endif
538       
539                code_clocker->write_xml_file();
540                code_clocker->display_system_info();
541                //code_clocker->display_raw_event_data();
542                delete code_clocker;
543        #endif 
544       
545        print_stats("comment", comment_count, comment_length);
546        print_stats("CDATA section", CDATA_end_count, CDATA_length);
547        print_stats("processing instruction", PI_count, PI_length);
548        print_stats("empty element", empty_elem_count, empty_elem_length);
549        print_stats("start tag", start_tag_count, start_tag_length);
550        printf("%i total attributes\n", total_attribute_count);
551        print_stats("attribute name", total_attribute_count, total_att_name_length);
552        print_stats("attribute value", total_attribute_count, total_att_value_length);
553        print_stats("namespace name", namespace_count, total_namespace_name_length);
554        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
555        print_stats("end tag", end_tag_count, end_tag_length);
556        print_stats("text item", text_item_count, text_item_length);
557        print_stats("reference", reference_count, reference_length);
558        print_stats("error item", error_item_count, error_item_length);
559        printf("Maximum nesting depth = %i\n", max_nesting_depth);
560#ifdef LEAF_COUNTING
561        printf("%i leaf nodes, %i interior nodes.\n", leaf_count, interior_count);
562#endif
563       
564        return(0);
565}
Note: See TracBrowser for help on using the repository browser.