source: trunk/markup_stats.cxx @ 133

Last change on this file since 133 was 133, checked in by cameron, 11 years ago

PAPI instrumentation of various parabix components.

File size: 11.3 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <string>
16#include <iostream>
17using namespace std;
18
19#ifdef PAPI
20#include "../code_clocker/clocker/cc.h"
21#include "../code_clocker/clocker/cc.cxx"
22
23CC * code_clocker;
24
25#define CHARSET_VALIDATION 1
26#define WS_CONTROL 2
27#define MARKUP_STREAMS 3
28#define BITLEX_ALL 4
29#define BITPLEX 5
30#define BYTEPLEX 6
31#define ADVANCE_BUFFERS 7
32#define BUFFER_TOTAL 8
33
34#define CODE_CLOCKING BUFFER_TOTAL
35
36#endif
37
38//#include "src/ilax.h"
39#include "src/engine.h"
40
41#ifndef REPEAT_RUNS
42#define REPEAT_RUNS 1
43#endif
44
45/* Internals */
46#include "src/xmlmodel.h"
47#include "src/xml_error.h"
48#include "src/bitplex.h"
49#include "src/byteplex.h"
50#include "src/xmldecl.h"
51#include "src/bitlex.h"
52
53
54#include "src/xmlmodel.c"
55#include "src/xml_error.c"
56#include "src/bitplex.c"
57#include "src/byteplex.c"
58#include "src/xmldecl.c"
59#include "src/bitlex.c"
60#include "src/engine.c"
61
62/* Global declarations of parsing engine. */
63Parser_Interface * parser;
64
65/* Global declarations for statistics. */
66
67int comment_count = 0;
68int comment_length = 0;
69int CDATA_start_count = 0;
70int CDATA_start_pos = 0;
71int CDATA_length = 0;
72int CDATA_end_count = 0;
73int PI_count = 0;
74int PI_length = 0;
75int empty_elem_count = 0;
76int empty_elem_length = 0;
77int start_tag_count = 0;
78int start_tag_length = 0;
79int attribute_count = 0;
80int end_tag_count = 0;
81int end_tag_length = 0;
82int reference_count = 0;
83int reference_length = 0;
84int text_item_count = 0;
85int text_item_length = 0;
86int error_item_count = 0;
87int error_item_length = 0;
88int nesting_depth = 0;
89int max_nesting_depth = 0;
90int total_attribute_count = 0;
91int total_att_name_length = 0;
92int total_att_value_length = 0;
93int namespace_count = 0;
94int total_namespace_name_length = 0;
95int total_namespace_URI_length = 0;
96
97int last_item_start = 0;
98int last_item_stop = 0;
99int last_buffer_rel_pos = 0;
100
101
102/* Action routine for an XML comment in "<!--"  "-->" brackets. */
103template <CodeUnit_Base C>
104inline void ParsingEngine<C>::Comment_action(unsigned char * item, int lgth) {
105        comment_count +=1;
106        comment_length += lgth;
107}
108
109/* Action routine called upon recognizing "<![CDATA[" to start a CDATA section. */
110template <CodeUnit_Base C>
111inline void ParsingEngine<C>::CDATA_start_action(unsigned char * CDATA_ptr){
112        CDATA_start_pos = AbsPos() - 9;
113        CDATA_start_count +=1;
114}
115
116/* Action routine called upon recognizing "]]>" to end a CDATA section. */
117template <CodeUnit_Base C>
118inline void ParsingEngine<C>::CDATA_end_action(unsigned char * CDATA_end_ptr) {
119        CDATA_end_count +=1;
120        CDATA_length += AbsPos() - CDATA_start_pos;
121}
122
123/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
124template <CodeUnit_Base C>
125inline void ParsingEngine<C>::PI_action(unsigned char * item, int lgth) {
126        PI_count +=1;
127        PI_length += lgth;
128}
129
130/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
131template <CodeUnit_Base C>
132inline void ParsingEngine<C>::EmptyElement_action(unsigned char * item, int lgth) {
133        empty_elem_count +=1;
134        empty_elem_length += lgth;
135}
136
137/* Action routine for a start tag enclosed in "<" and ">" brackets. */
138template <CodeUnit_Base C>
139inline void ParsingEngine<C>::StartTag_action(unsigned char * item, int lgth) {
140        start_tag_count +=1;
141        start_tag_length += lgth;
142        nesting_depth += 1;
143        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
144//      cout << string((char *) item, lgth) << endl;
145}
146
147/* Action routine for an end tag enclosed in "</" and ">" brackets. */
148template <CodeUnit_Base C>
149inline void ParsingEngine<C>::EndTag_action(unsigned char * item, int lgth) {
150        end_tag_count +=1;
151        end_tag_length += lgth;
152        nesting_depth -= 1;
153}
154
155/* Action routine for an error item */
156template <CodeUnit_Base C>
157inline void ParsingEngine<C>::Error_action(unsigned char * item, int lgth) {
158        error_item_count +=1;
159        error_item_length += lgth;
160        fprintf(stderr, "Error: illegal markup at positions %i of length %i.\n", AbsPos()-lgth, lgth);
161        cerr << string((char *) item, lgth) << endl;
162}
163
164/* Action routine for a text item */
165template <CodeUnit_Base C>
166inline void ParsingEngine<C>::Text_action(unsigned char * item, int lgth) {
167        text_item_count +=1;
168        text_item_length += lgth;
169}
170
171template <CodeUnit_Base C>
172inline void ParsingEngine<C>::Reference_action(unsigned char * item, int lgth) {
173        reference_count +=1;
174        reference_length += lgth;
175}
176
177
178
179
180/* Three action routines for markup components are defined as follows.
181|ElementName_action| is the action routine called upon recognition of
182an element name immediately after the opening angle bracket of a start
183tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
184AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
185It is called with two parameters identifying the
186first and last character positions of the expected XML_name.
187Similarly, |PI_Target_action| is the action routine called upon recognition
188of the XML Name that occurs immediately after the opening "<?"
189delimiter of a processing instruction.
190
191 The third action routine for markup components is Attribute_Value_action,
192which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
193THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
194TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
195*/
196
197/* Semantic action routines for markup components. */
198/* Action routine for an element name occurring immediately after the
199   opening "<" of a start tag or empty element tag. */
200template <CodeUnit_Base C>
201inline void ParsingEngine<C>::ElementName_action(unsigned char * item, int lgth) {
202}
203
204/* Action routine for a processing instruction target name occurring immediately
205   after the opening "<?" of a processing instruction. */
206template <CodeUnit_Base C>
207inline void ParsingEngine<C>::PI_Target_action(unsigned char * item, int lgth) {
208}
209
210/* Action routine for an individual attribute/value pair occurring in
211   a element start tag or an empty element tag. */
212template <CodeUnit_Base C>
213inline void ParsingEngine<C>::AttributeValue_action(unsigned char * name, int name_lgth, 
214                                 unsigned char * val, int val_lgth) {
215        total_attribute_count+=1;
216        total_att_name_length += name_lgth;
217        total_att_value_length += val_lgth;
218}
219
220/* Action routine for an individual attribute/value pair occurring in
221   a element start tag or an empty element tag. */
222template <CodeUnit_Base C>
223inline void ParsingEngine<C>::Namespace_action(unsigned char * name, int name_lgth,
224                             unsigned char * URI, int URI_lgth) {
225        namespace_count+=1;
226        total_namespace_name_length += name_lgth;
227        total_namespace_URI_length += URI_lgth;
228}
229
230
231template <CodeUnit_Base C>
232inline void ParsingEngine<C>::FinalizeBuffer_action(int& preserve_pos) {
233       
234       
235#ifdef DEBUG
236        printf ("FinalizeBuffer; last 16 bytes + lookahead 16 =\n");
237        cout << string((char *) GetCodeUnitPtr(AbsPos()-16), 16) << "::" << string((char *) GetCodeUnitPtr(AbsPos()), 16) << endl;
238#endif
239        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
240                code_clocker->cc_end_interval(BUFFER_SIZE);
241        #endif
242        preserve_pos = AbsPos();
243       
244        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
245                code_clocker->cc_start_interval();
246        #endif
247       
248}
249
250
251template <CodeUnit_Base C>
252inline void ParsingEngine<C>::DocumentStart_action() {
253        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
254                code_clocker->cc_start_interval();
255        #endif
256}
257
258template <CodeUnit_Base C>
259inline void ParsingEngine<C>::DocumentEnd_action() {
260        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
261                code_clocker->cc_end_interval(buffer_rel_pos);
262        #endif
263}
264
265template <CodeUnit_Base C>
266inline void ParsingEngine<C>::Doctype_action(unsigned char * item, int lgth) {
267#ifdef SHOW_DTD_ACTIONS
268        printf("Document Type:\n");
269        cout << string((char *) item, lgth) <<endl;
270#endif
271}
272
273template <CodeUnit_Base C>
274inline void ParsingEngine<C>::PEReference_action(unsigned char * item, int lgth) {
275}
276
277
278template <CodeUnit_Base C>
279inline void ParsingEngine<C>::ExtSubsetDecl_action(unsigned char * item, int lgth) {
280#ifdef SHOW_DTD_ACTIONS
281        printf("ExtSubsetDecl:\n");
282        cout << string((char *) item, lgth) <<endl;
283#endif
284}
285
286template <CodeUnit_Base C>
287inline void ParsingEngine<C>::Prolog_action(unsigned char * item, int lgth) {
288#ifdef SHOW_DTD_ACTIONS
289        printf("Prolog:\n");
290        cout << string((char *) item, lgth) <<endl;
291#endif
292}
293
294#define print_stats(stat_string, count, total_lgth) \
295        printf("%i %s", count, stat_string);\
296        if (count == 0) printf("s.\n");\
297        else if (count == 1) printf(" of length %i.\n", total_lgth);\
298        else printf("s of avg. lgth %i.\n", total_lgth/count);
299
300
301
302
303int
304main(int argc, char * argv[]) {
305        if (argc != 2) {
306        printf("Usage: %s <filename>\n", argv[0]);
307                exit(-1);
308        }
309        char * filename = argv[1];
310
311        #ifdef PAPI
312                #define NUM_EVENTS 2
313                int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
314                int cal_size = 1000;
315                code_clocker = new CC(Events,NUM_EVENTS,cal_size);
316                code_clocker->cc_set_cmd(argv[0]);
317                code_clocker->cc_set_param("Mhz","2127.997");
318        #endif
319
320        for (int run = 0; run < REPEAT_RUNS; run++) {
321//      #ifdef PAPI
322//              code_clocker->cc_start_interval();
323//      #endif
324
325        parser = Parser_Interface::ParserFactory(filename);
326       
327        /*             
328        if (!parser->has_ByteOrderMark()) printf("No ");
329        printf("Byte Order Mark found.\n");
330
331        if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
332        else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
333        else printf ("XML version 1.0 implied by default.\n");
334        if (parser->has_EncodingDecl()) {
335                printf("XML encoding declared:  %s\n", parser->get_Encoding());
336        }
337        if (parser->standalone_status() == Standalone_yes)
338                printf("XML standalone = yes declared.\n");
339        else if (parser->standalone_status() == Standalone_no)
340                printf("XML standalone = no declared.\n");
341        else printf ("XML standalone = no by default.\n");
342        */
343       
344        parser->Parse_Prolog();
345
346        parser->Parse_DocumentContent();
347
348        parser->~Parser_Interface();
349        printf("Run %i complete.\n", run);
350//      #ifdef PAPI
351//              int elems = 0;
352//              code_clocker->cc_end_interval(100);
353//      #endif
354       
355       
356        }
357
358        #ifdef PAPI
359                code_clocker->cc_display();
360                code_clocker->cc_write_xml_file();
361                code_clocker->cc_write_csv_file();
362                delete code_clocker;
363        #endif 
364       
365        print_stats("comment", comment_count, comment_length);
366        print_stats("CDATA section", CDATA_end_count, CDATA_length);
367        print_stats("processing instruction", PI_count, PI_length);
368        print_stats("empty element", empty_elem_count, empty_elem_length);
369        print_stats("start tag", start_tag_count, start_tag_length);
370        printf("%i total attributes\n", attribute_count);
371        print_stats("attribute name", total_attribute_count, total_att_name_length);
372        print_stats("attribute value", total_attribute_count, total_att_value_length);
373        print_stats("namespace name", namespace_count, total_namespace_name_length);
374        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
375        print_stats("end tag", end_tag_count, end_tag_length);
376        print_stats("text item", text_item_count, text_item_length);
377        print_stats("reference", reference_count, reference_length);
378        print_stats("error item", error_item_count, error_item_length);
379        printf("Maximum nesting depth = %i\n", max_nesting_depth);
380       
381        return(0);
382}
Note: See TracBrowser for help on using the repository browser.