source: branches/parabix-svgopen-2008/markup_stats.cxx @ 217

Last change on this file since 217 was 214, checked in by ksherdy, 10 years ago
File size: 15.4 KB
RevLine 
[4]1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
[45]15#include <string>
16#include <iostream>
17using namespace std;
[4]18
[152]19#define ON 1
20#define OFF 2
21
[183]22#define TEMPLATED_SIMD_LIB
23
[152]24#define VALIDATION_MODE OFF
25
[116]26#ifdef PAPI
27#include "../code_clocker/clocker/cc.h"
[205]28//#include "../code_clocker/clocker/cc.cxx"
[65]29
[133]30CC * code_clocker;
31
[152]32#define NONE 0
33#define END_TAG_MATCHING 1
34#define ATTRIBUTE_UNIQUENESS 2
35#define NAME_VALIDATION 3
36#define NAME_LOOKUP 4
37
[155]38#define OMISSION NONE
[152]39
40
[133]41#define CHARSET_VALIDATION 1
42#define WS_CONTROL 2
43#define MARKUP_STREAMS 3
44#define BITLEX_ALL 4
45#define BITPLEX 5
46#define BYTEPLEX 6
47#define ADVANCE_BUFFERS 7
48#define BUFFER_TOTAL 8
[152]49#define FILE_READING 9
[133]50
[160]51#define CODE_CLOCKING BUFFER_TOTAL
[133]52
[118]53#endif
[65]54
[49]55//#include "src/ilax.h"
[38]56#include "src/engine.h"
57
[57]58#ifndef REPEAT_RUNS
[120]59#define REPEAT_RUNS 1
[57]60#endif
[38]61
62/* Internals */
[74]63#include "src/xmlmodel.h"
[111]64#include "src/xml_error.h"
[74]65#include "src/bitplex.h"
66#include "src/byteplex.h"
67#include "src/xmldecl.h"
[4]68#include "src/bitlex.h"
[74]69
70
71#include "src/xmlmodel.c"
[111]72#include "src/xml_error.c"
[74]73#include "src/bitplex.c"
74#include "src/byteplex.c"
75#include "src/xmldecl.c"
[4]76#include "src/bitlex.c"
77#include "src/engine.c"
[135]78#include "src/symtab.c"
[4]79
[45]80/* Global declarations of parsing engine. */
81Parser_Interface * parser;
82
[4]83/* Global declarations for statistics. */
84
[38]85int comment_count = 0;
86int comment_length = 0;
[81]87int CDATA_start_count = 0;
88int CDATA_start_pos = 0;
[38]89int CDATA_length = 0;
[81]90int CDATA_end_count = 0;
[38]91int PI_count = 0;
92int PI_length = 0;
93int empty_elem_count = 0;
94int empty_elem_length = 0;
95int start_tag_count = 0;
96int start_tag_length = 0;
97int attribute_count = 0;
98int end_tag_count = 0;
99int end_tag_length = 0;
100int reference_count = 0;
101int reference_length = 0;
102int text_item_count = 0;
103int text_item_length = 0;
104int error_item_count = 0;
105int error_item_length = 0;
106int nesting_depth = 0;
107int max_nesting_depth = 0;
108int total_attribute_count = 0;
109int total_att_name_length = 0;
110int total_att_value_length = 0;
111int namespace_count = 0;
112int total_namespace_name_length = 0;
113int total_namespace_URI_length = 0;
[4]114
[49]115int last_item_start = 0;
116int last_item_stop = 0;
117int last_buffer_rel_pos = 0;
[4]118
[74]119
[194]120#ifdef LEAF_COUNTING
121int at_start = 1;
122int leaf_count = 0;
123int interior_count = 0;
124#endif
125
[4]126/* Action routine for an XML comment in "<!--"  "-->" brackets. */
[161]127template <class B>
128inline void ParsingEngine<B>::Comment_action(unsigned char * item, int lgth) {
[38]129        comment_count +=1;
[142]130       
131        #if defined(CALC_AVG)
132                comment_length += lgth;
133        #endif
[4]134}
135
[81]136/* Action routine called upon recognizing "<![CDATA[" to start a CDATA section. */
[161]137template <class B>
138inline void ParsingEngine<B>::CDATA_start_action(unsigned char * CDATA_ptr){
[92]139        CDATA_start_pos = AbsPos() - 9;
[81]140        CDATA_start_count +=1;
[4]141}
142
[81]143/* Action routine called upon recognizing "]]>" to end a CDATA section. */
[161]144template <class B>
145inline void ParsingEngine<B>::CDATA_end_action(unsigned char * CDATA_end_ptr) {
[81]146        CDATA_end_count +=1;
[142]147       
148        #if defined(CALC_AVG)
149                CDATA_length += AbsPos() - CDATA_start_pos;
150        #endif
[81]151}
152
[4]153/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
[161]154template <class B>
155inline void ParsingEngine<B>::PI_action(unsigned char * item, int lgth) {
[38]156        PI_count +=1;
[205]157
[142]158        #if defined(CALC_AVG)
159                PI_length += lgth;
160        #endif
[4]161}
162
163/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
[161]164template <class B>
165inline void ParsingEngine<B>::EmptyElement_action(unsigned char * item, int lgth) {
[38]166        empty_elem_count +=1;
[194]167#ifdef LEAF_COUNTING
168        int at_start = 0;
169        leaf_count++;
170#endif
171
[142]172        #if defined(CALC_AVG)
173                empty_elem_length += lgth;
174        #endif
[4]175}
176
177/* Action routine for a start tag enclosed in "<" and ">" brackets. */
[161]178template <class B>
179inline void ParsingEngine<B>::StartTag_action(unsigned char * item, int lgth) {
[38]180        start_tag_count +=1;
[194]181#ifdef LEAF_COUNTING
182        at_start = 1;
183#endif
[205]184
[142]185        #if defined(CALC_AVG)
186                start_tag_length += lgth;
187        #endif
[205]188
[38]189        nesting_depth += 1;
190        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
[101]191//      cout << string((char *) item, lgth) << endl;
[4]192}
193
194/* Action routine for an end tag enclosed in "</" and ">" brackets. */
[161]195template <class B>
196inline void ParsingEngine<B>::EndTag_action(unsigned char * item, int lgth) {
[38]197        end_tag_count +=1;
[194]198#ifdef LEAF_COUNTING
199        leaf_count += at_start;
200        interior_count += (1 - at_start);
201        at_start = 0;
202#endif
[205]203
[142]204        #if defined(CALC_AVG)
205                end_tag_length += lgth;
206        #endif
[205]207
[38]208        nesting_depth -= 1;
[4]209}
210
211/* Action routine for an error item */
[161]212template <class B>
213inline void ParsingEngine<B>::Error_action(unsigned char * item, int lgth) {
[38]214        error_item_count +=1;
[205]215
[142]216        #if defined(CALC_AVG)
217                error_item_length += lgth;
218        #endif
[205]219
[121]220        fprintf(stderr, "Error: illegal markup at positions %i of length %i.\n", AbsPos()-lgth, lgth);
[124]221        cerr << string((char *) item, lgth) << endl;
[4]222}
223
224/* Action routine for a text item */
[161]225template <class B>
[180]226inline void ParsingEngine<B>::Text_action(unsigned char * item, int lgth, bool more) {
[38]227        text_item_count +=1;
[205]228
[142]229        #if defined(CALC_AVG)
230                text_item_length += lgth;
231        #endif
[4]232}
233
[161]234template <class B>
235inline void ParsingEngine<B>::Reference_action(unsigned char * item, int lgth) {
[38]236        reference_count +=1;
[205]237
[142]238        #if defined(CALC_AVG)
239                reference_length += lgth;
240        #endif
[7]241}
[4]242
243
244
[7]245
[4]246/* Three action routines for markup components are defined as follows.
247|ElementName_action| is the action routine called upon recognition of
248an element name immediately after the opening angle bracket of a start
249tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
250AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
251It is called with two parameters identifying the
252first and last character positions of the expected XML_name.
[205]253Similarly, |PI_Target_action| is the action routine called upon recognition
[4]254of the XML Name that occurs immediately after the opening "<?"
255delimiter of a processing instruction.
256
257 The third action routine for markup components is Attribute_Value_action,
258which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
259THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
260TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
261*/
262
263/* Semantic action routines for markup components. */
[205]264/* Action routine for an element name occurring immediately after the
[4]265   opening "<" of a start tag or empty element tag. */
[161]266template <class B>
267inline void ParsingEngine<B>::ElementName_action(unsigned char * item, int lgth) {
[4]268}
269
[205]270/* Action routine for a processing instruction target name occurring immediately
[4]271   after the opening "<?" of a processing instruction. */
[161]272template <class B>
273inline void ParsingEngine<B>::PI_Target_action(unsigned char * item, int lgth) {
[4]274}
275
276/* Action routine for an individual attribute/value pair occurring in
277   a element start tag or an empty element tag. */
[161]278template <class B>
[205]279inline void ParsingEngine<B>::AttributeValue_action(unsigned char * name, int name_lgth,
[92]280                                 unsigned char * val, int val_lgth) {
[38]281        total_attribute_count+=1;
[205]282
[142]283        #if defined(CALC_AVG)
284                total_att_name_length += name_lgth;
285                total_att_value_length += val_lgth;
286        #endif
[4]287}
288
289/* Action routine for an individual attribute/value pair occurring in
290   a element start tag or an empty element tag. */
[161]291template <class B>
292inline void ParsingEngine<B>::Namespace_action(unsigned char * name, int name_lgth,
[92]293                             unsigned char * URI, int URI_lgth) {
[38]294        namespace_count+=1;
[205]295
[142]296        #if defined(CALC_AVG)
297                total_namespace_name_length += name_lgth;
298                total_namespace_URI_length += URI_lgth;
299        #endif
[4]300}
301
302
[161]303template <class B>
[178]304void ParsingEngine<B>::FinalizeBuffer_action() {
[205]305
306
[74]307#ifdef DEBUG
308        printf ("FinalizeBuffer; last 16 bytes + lookahead 16 =\n");
309        cout << string((char *) GetCodeUnitPtr(AbsPos()-16), 16) << "::" << string((char *) GetCodeUnitPtr(AbsPos()), 16) << endl;
310#endif
[133]311        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
[185]312                code_clocker->end_interval(BUFFER_SIZE);
[133]313        #endif
314        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
[185]315                code_clocker->start_interval();
[133]316        #endif
[205]317
[4]318}
319
320
[161]321template <class B>
322inline void ParsingEngine<B>::DocumentStart_action() {
[133]323        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
[185]324                code_clocker->start_interval();
[133]325        #endif
[64]326}
327
[161]328template <class B>
329inline void ParsingEngine<B>::DocumentEnd_action() {
[133]330        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
[185]331                code_clocker->end_interval(buffer_rel_pos);
[133]332        #endif
[64]333}
334
[161]335template <class B>
336inline void ParsingEngine<B>::Doctype_action(unsigned char * item, int lgth) {
[79]337#ifdef SHOW_DTD_ACTIONS
338        printf("Document Type:\n");
[92]339        cout << string((char *) item, lgth) <<endl;
[79]340#endif
341}
[45]342
[161]343template <class B>
344inline void ParsingEngine<B>::PEReference_action(unsigned char * item, int lgth) {
[79]345}
[45]346
[79]347
[161]348template <class B>
349inline void ParsingEngine<B>::ExtSubsetDecl_action(unsigned char * item, int lgth) {
[101]350#ifdef SHOW_DTD_ACTIONS
351        printf("ExtSubsetDecl:\n");
352        cout << string((char *) item, lgth) <<endl;
353#endif
354}
355
[161]356template <class B>
357inline void ParsingEngine<B>::Prolog_action(unsigned char * item, int lgth) {
[79]358#ifdef SHOW_DTD_ACTIONS
359        printf("Prolog:\n");
[92]360        cout << string((char *) item, lgth) <<endl;
[79]361#endif
362}
363
[92]364#define print_stats(stat_string, count, total_lgth) \
365        printf("%i %s", count, stat_string);\
366        if (count == 0) printf("s.\n");\
367        else if (count == 1) printf(" of length %i.\n", total_lgth);\
368        else printf("s of avg. lgth %i.\n", total_lgth/count);
369
[133]370
[167]371#include <sched.h>
[133]372
[4]373int
374main(int argc, char * argv[]) {
[167]375
376#ifdef SET_AFFINITY
377cpu_set_t mask;
378
379
380unsigned int len = sizeof(mask);
381if (sched_getaffinity(0, len, &mask) < 0) {
382    perror("sched_getaffinity");
383    return -1;
384    }
385printf("Original affinity mask is: %08lx\n", mask);
386
387CPU_CLR(0, &mask);
388
389if (sched_setaffinity(0, len, &mask) < 0) {
390    perror("sched_setaffinity");
391}
392if (sched_getaffinity(0, len, &mask) < 0) {
393    perror("sched_getaffinity");
394    return -1;
395    }
396printf("Modified affinity mask is:  %08lx\n", mask);
397#endif
398
399
400
401
[38]402        if (argc != 2) {
403        printf("Usage: %s <filename>\n", argv[0]);
404                exit(-1);
405        }
[205]406
[142]407        char * src_filename = argv[1];
[205]408        char * cmdline = new char[strlen(argv[0]) + strlen(argv[1]) +1 +1];
[142]409        strcat(cmdline, argv[0]);
410        strcat(cmdline," ");
411        strcat(cmdline,argv[1]);
[52]412
[116]413        #ifdef PAPI
[160]414                #define NUM_EVENTS 2
415                int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
[155]416//              int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_L1_DCM};
[160]417//              int Events[NUM_EVENTS] = {PAPI_L2_DCM};
[116]418                int cal_size = 1000;
[133]419                code_clocker = new CC(Events,NUM_EVENTS,cal_size);
[185]420                code_clocker->set_cmd(cmdline);
[116]421        #endif
[65]422
[205]423        // Read the entire file into a memory buffer
[142]424        FILE * src_file;
425        struct stat fileinfo;
426        int src_filesize;
[116]427
[205]428        // open file and fstat
[142]429        src_file = fopen ( src_filename , "rb" );
[161]430        if (!src_file) {
[205]431                fprintf(stderr, "Cannot open '%s'. Terminating the process ...\n", src_filename);
[161]432                exit(-1);
433        }
[142]434        if(fstat(fileno(src_file), &fileinfo)!=0) {
[205]435                fprintf(stderr, "Cannot fstat '%s'. Terminating the process ...\n", src_filename);
[142]436                exit(-1);
[38]437        }
[205]438
[142]439        src_filesize = fileinfo.st_size;
[205]440
[142]441        // close file
442        fclose (src_file);
[205]443
[142]444        for (int run = 0; run < REPEAT_RUNS; run++) {
[101]445
[142]446                #ifdef PAPI
[185]447                        code_clocker->start_interval();
[142]448                #endif
[108]449
[142]450                parser = Parser_Interface::ParserFactory(src_filename);
[205]451
452
[142]453                if (!parser->has_ByteOrderMark()) printf("No ");
454                printf("Byte Order Mark found.\n");
[205]455
[142]456                if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
457                else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
458                else printf ("XML version 1.0 implied by default.\n");
459                if (parser->has_EncodingDecl()) {
460                        printf("XML encoding declared:  %s\n", parser->get_Encoding());
461                }
[205]462                if (parser->standalone_status() == Standalone_yes)
[142]463                        printf("XML standalone = yes declared.\n");
[205]464                else if (parser->standalone_status() == Standalone_no)
[142]465                        printf("XML standalone = no declared.\n");
466                else printf ("XML standalone = no by default.\n");
467
[205]468
[143]469//              #ifdef PAPI
[185]470//                      code_clocker->start_interval();
[143]471//              #endif
[205]472
[142]473                parser->Parse_Prolog();
474                parser->Parse_DocumentContent();
475
[143]476//              #ifdef PAPI
477//                      int elems = src_filesize;
[185]478//                      code_clocker->end_interval(elems);
[143]479//              #endif
[142]480
481                parser->~Parser_Interface();
482                printf("Run %i complete.\n", run);
483
[52]484        }
[116]485
486        #ifdef PAPI
[205]487
[152]488        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == CHARSET_VALIDATION)
[185]489                code_clocker->set_param("CODE_CLOCKING", "CHARSET_VALIDATION");
[205]490        #endif
[152]491        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == WS_CONTROL)
[185]492                code_clocker->set_param("CODE_CLOCKING", "WS_CONTROL");
[152]493        #endif
494        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == MARKUP_STREAMS)
[185]495                code_clocker->set_param("CODE_CLOCKING", "MARKUP_STREAMS");
[152]496        #endif
497        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == BITLEX_ALL)
[185]498                code_clocker->set_param("CODE_CLOCKING", "BITLEX_ALL");
[152]499        #endif
500        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == BITPLEX)
[185]501                code_clocker->set_param("CODE_CLOCKING", "BITPLEX");
[152]502        #endif
503        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == BYTEPLEX)
[185]504                code_clocker->set_param("CODE_CLOCKING", "BYTEPLEX");
[152]505        #endif
506        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == ADVANCE_BUFFERS)
[185]507                code_clocker->set_param("CODE_CLOCKING", "ADVANCE_BUFFERS");
[152]508        #endif
509        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
[185]510                code_clocker->set_param("CODE_CLOCKING", "BUFFER_TOTAL");
[152]511        #endif
512        #if defined(CODE_CLOCKING) and (CODE_CLOCKING == FILE_READING)
[185]513                code_clocker->set_param("CODE_CLOCKING", "FILE_READING");
[152]514        #endif
515        #if defined(OMISSION) and (OMISSION == NONE)
[185]516                code_clocker->set_param("OMISSION", "NONE");
[152]517        #endif
518        #if defined(OMISSION) and (OMISSION == END_TAG_MATCHING)
[185]519                code_clocker->set_param("OMISSION", "END_TAG_MATCHING");
[152]520        #endif
521        #if defined(OMISSION) and (OMISSION == ATTRIBUTE_UNIQUENESS)
[185]522                code_clocker->set_param("OMISSION", "ATTRIBUTE_UNIQUENESS");
[152]523        #endif
524        #if defined(OMISSION) and (OMISSION == NAME_VALIDATION)
[185]525                code_clocker->set_param("OMISSION", "NAME_VALIDATION");
[152]526        #endif
527        #if defined(OMISSION) and (OMISSION == NAME_LOOKUP)
[185]528                code_clocker->set_param("OMISSION", "NAME_LOOKUP");
[152]529        #endif
530        #if defined(VALIDATION_MODE) and (VALIDATION_MODE == ON)
[185]531                code_clocker->set_param("VALIDATION_MODE", "ON");
[152]532        #endif
533        #if defined(VALIDATION_MODE) and (VALIDATION_MODE == OFF)
[185]534                code_clocker->set_param("VALIDATION_MODE", "OFF");
[152]535        #endif
[205]536
[185]537                code_clocker->write_xml_file();
538                code_clocker->display_system_info();
539                code_clocker->display_raw_event_data();
[133]540                delete code_clocker;
[205]541        #endif
542
[38]543        print_stats("comment", comment_count, comment_length);
[81]544        print_stats("CDATA section", CDATA_end_count, CDATA_length);
[38]545        print_stats("processing instruction", PI_count, PI_length);
546        print_stats("empty element", empty_elem_count, empty_elem_length);
547        print_stats("start tag", start_tag_count, start_tag_length);
[154]548        printf("%i total attributes\n", total_attribute_count);
[38]549        print_stats("attribute name", total_attribute_count, total_att_name_length);
550        print_stats("attribute value", total_attribute_count, total_att_value_length);
551        print_stats("namespace name", namespace_count, total_namespace_name_length);
552        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
553        print_stats("end tag", end_tag_count, end_tag_length);
554        print_stats("text item", text_item_count, text_item_length);
555        print_stats("reference", reference_count, reference_length);
556        print_stats("error item", error_item_count, error_item_length);
557        printf("Maximum nesting depth = %i\n", max_nesting_depth);
[194]558#ifdef LEAF_COUNTING
559        printf("%i leaf nodes, %i interior nodes.\n", leaf_count, interior_count);
560#endif
[205]561
[38]562        return(0);
[4]563}
Note: See TracBrowser for help on using the repository browser.