source: trunk/markup_stats.cxx @ 143

Last change on this file since 143 was 143, checked in by cameron, 11 years ago

Instrumentation with 100K buffer size.

File size: 12.2 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <string>
16#include <iostream>
17using namespace std;
18
19#ifdef PAPI
20#include "../code_clocker/clocker/cc.h"
21#include "../code_clocker/clocker/cc.cxx"
22
23CC * code_clocker;
24
25#define CHARSET_VALIDATION 1
26#define WS_CONTROL 2
27#define MARKUP_STREAMS 3
28#define BITLEX_ALL 4
29#define BITPLEX 5
30#define BYTEPLEX 6
31#define ADVANCE_BUFFERS 7
32#define BUFFER_TOTAL 8
33
34#define CODE_CLOCKING BUFFER_TOTAL
35
36#endif
37
38//#include "src/ilax.h"
39#include "src/engine.h"
40
41#ifndef REPEAT_RUNS
42#define REPEAT_RUNS 1
43#endif
44
45/* Internals */
46#include "src/xmlmodel.h"
47#include "src/xml_error.h"
48#include "src/bitplex.h"
49#include "src/byteplex.h"
50#include "src/xmldecl.h"
51#include "src/bitlex.h"
52
53
54#include "src/xmlmodel.c"
55#include "src/xml_error.c"
56#include "src/bitplex.c"
57#include "src/byteplex.c"
58#include "src/xmldecl.c"
59#include "src/bitlex.c"
60#include "src/engine.c"
61#include "src/symtab.c"
62
63/* Global declarations of parsing engine. */
64Parser_Interface * parser;
65
66/* Global declarations for statistics. */
67
68int comment_count = 0;
69int comment_length = 0;
70int CDATA_start_count = 0;
71int CDATA_start_pos = 0;
72int CDATA_length = 0;
73int CDATA_end_count = 0;
74int PI_count = 0;
75int PI_length = 0;
76int empty_elem_count = 0;
77int empty_elem_length = 0;
78int start_tag_count = 0;
79int start_tag_length = 0;
80int attribute_count = 0;
81int end_tag_count = 0;
82int end_tag_length = 0;
83int reference_count = 0;
84int reference_length = 0;
85int text_item_count = 0;
86int text_item_length = 0;
87int error_item_count = 0;
88int error_item_length = 0;
89int nesting_depth = 0;
90int max_nesting_depth = 0;
91int total_attribute_count = 0;
92int total_att_name_length = 0;
93int total_att_value_length = 0;
94int namespace_count = 0;
95int total_namespace_name_length = 0;
96int total_namespace_URI_length = 0;
97
98int last_item_start = 0;
99int last_item_stop = 0;
100int last_buffer_rel_pos = 0;
101
102
103/* Action routine for an XML comment in "<!--"  "-->" brackets. */
104template <CodeUnit_Base C>
105inline void ParsingEngine<C>::Comment_action(unsigned char * item, int lgth) {
106        comment_count +=1;
107       
108        #if defined(CALC_AVG)
109                comment_length += lgth;
110        #endif
111}
112
113/* Action routine called upon recognizing "<![CDATA[" to start a CDATA section. */
114template <CodeUnit_Base C>
115inline void ParsingEngine<C>::CDATA_start_action(unsigned char * CDATA_ptr){
116        CDATA_start_pos = AbsPos() - 9;
117        CDATA_start_count +=1;
118}
119
120/* Action routine called upon recognizing "]]>" to end a CDATA section. */
121template <CodeUnit_Base C>
122inline void ParsingEngine<C>::CDATA_end_action(unsigned char * CDATA_end_ptr) {
123        CDATA_end_count +=1;
124       
125        #if defined(CALC_AVG)
126                CDATA_length += AbsPos() - CDATA_start_pos;
127        #endif
128}
129
130/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
131template <CodeUnit_Base C>
132inline void ParsingEngine<C>::PI_action(unsigned char * item, int lgth) {
133        PI_count +=1;
134       
135        #if defined(CALC_AVG)
136                PI_length += lgth;
137        #endif
138}
139
140/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
141template <CodeUnit_Base C>
142inline void ParsingEngine<C>::EmptyElement_action(unsigned char * item, int lgth) {
143        empty_elem_count +=1;
144       
145        #if defined(CALC_AVG)
146                empty_elem_length += lgth;
147        #endif
148}
149
150/* Action routine for a start tag enclosed in "<" and ">" brackets. */
151template <CodeUnit_Base C>
152inline void ParsingEngine<C>::StartTag_action(unsigned char * item, int lgth) {
153        start_tag_count +=1;
154       
155        #if defined(CALC_AVG)
156                start_tag_length += lgth;
157        #endif
158       
159        nesting_depth += 1;
160        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
161//      cout << string((char *) item, lgth) << endl;
162}
163
164/* Action routine for an end tag enclosed in "</" and ">" brackets. */
165template <CodeUnit_Base C>
166inline void ParsingEngine<C>::EndTag_action(unsigned char * item, int lgth) {
167        end_tag_count +=1;
168       
169        #if defined(CALC_AVG)
170                end_tag_length += lgth;
171        #endif
172       
173        nesting_depth -= 1;
174}
175
176/* Action routine for an error item */
177template <CodeUnit_Base C>
178inline void ParsingEngine<C>::Error_action(unsigned char * item, int lgth) {
179        error_item_count +=1;
180       
181        #if defined(CALC_AVG)
182                error_item_length += lgth;
183        #endif
184       
185        fprintf(stderr, "Error: illegal markup at positions %i of length %i.\n", AbsPos()-lgth, lgth);
186        cerr << string((char *) item, lgth) << endl;
187}
188
189/* Action routine for a text item */
190template <CodeUnit_Base C>
191inline void ParsingEngine<C>::Text_action(unsigned char * item, int lgth) {
192        text_item_count +=1;
193       
194        #if defined(CALC_AVG)
195                text_item_length += lgth;
196        #endif
197}
198
199template <CodeUnit_Base C>
200inline void ParsingEngine<C>::Reference_action(unsigned char * item, int lgth) {
201        reference_count +=1;
202       
203        #if defined(CALC_AVG)
204                reference_length += lgth;
205        #endif
206}
207
208
209
210
211/* Three action routines for markup components are defined as follows.
212|ElementName_action| is the action routine called upon recognition of
213an element name immediately after the opening angle bracket of a start
214tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
215AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
216It is called with two parameters identifying the
217first and last character positions of the expected XML_name.
218Similarly, |PI_Target_action| is the action routine called upon recognition
219of the XML Name that occurs immediately after the opening "<?"
220delimiter of a processing instruction.
221
222 The third action routine for markup components is Attribute_Value_action,
223which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
224THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
225TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
226*/
227
228/* Semantic action routines for markup components. */
229/* Action routine for an element name occurring immediately after the
230   opening "<" of a start tag or empty element tag. */
231template <CodeUnit_Base C>
232inline void ParsingEngine<C>::ElementName_action(unsigned char * item, int lgth) {
233}
234
235/* Action routine for a processing instruction target name occurring immediately
236   after the opening "<?" of a processing instruction. */
237template <CodeUnit_Base C>
238inline void ParsingEngine<C>::PI_Target_action(unsigned char * item, int lgth) {
239}
240
241/* Action routine for an individual attribute/value pair occurring in
242   a element start tag or an empty element tag. */
243template <CodeUnit_Base C>
244inline void ParsingEngine<C>::AttributeValue_action(unsigned char * name, int name_lgth, 
245                                 unsigned char * val, int val_lgth) {
246        total_attribute_count+=1;
247       
248        #if defined(CALC_AVG)
249                total_att_name_length += name_lgth;
250                total_att_value_length += val_lgth;
251        #endif
252}
253
254/* Action routine for an individual attribute/value pair occurring in
255   a element start tag or an empty element tag. */
256template <CodeUnit_Base C>
257inline void ParsingEngine<C>::Namespace_action(unsigned char * name, int name_lgth,
258                             unsigned char * URI, int URI_lgth) {
259        namespace_count+=1;
260       
261        #if defined(CALC_AVG)
262                total_namespace_name_length += name_lgth;
263                total_namespace_URI_length += URI_lgth;
264        #endif
265}
266
267
268template <CodeUnit_Base C>
269inline void ParsingEngine<C>::FinalizeBuffer_action() {
270       
271       
272#ifdef DEBUG
273        printf ("FinalizeBuffer; last 16 bytes + lookahead 16 =\n");
274        cout << string((char *) GetCodeUnitPtr(AbsPos()-16), 16) << "::" << string((char *) GetCodeUnitPtr(AbsPos()), 16) << endl;
275#endif
276        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
277                code_clocker->cc_end_interval(BUFFER_SIZE);
278        #endif
279        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
280                code_clocker->cc_start_interval();
281        #endif
282       
283}
284
285
286template <CodeUnit_Base C>
287inline void ParsingEngine<C>::DocumentStart_action() {
288        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
289                code_clocker->cc_start_interval();
290        #endif
291}
292
293template <CodeUnit_Base C>
294inline void ParsingEngine<C>::DocumentEnd_action() {
295        #if defined(PAPI) and defined(CODE_CLOCKING) and (CODE_CLOCKING == BUFFER_TOTAL)
296                code_clocker->cc_end_interval(buffer_rel_pos);
297        #endif
298}
299
300template <CodeUnit_Base C>
301inline void ParsingEngine<C>::Doctype_action(unsigned char * item, int lgth) {
302#ifdef SHOW_DTD_ACTIONS
303        printf("Document Type:\n");
304        cout << string((char *) item, lgth) <<endl;
305#endif
306}
307
308template <CodeUnit_Base C>
309inline void ParsingEngine<C>::PEReference_action(unsigned char * item, int lgth) {
310}
311
312
313template <CodeUnit_Base C>
314inline void ParsingEngine<C>::ExtSubsetDecl_action(unsigned char * item, int lgth) {
315#ifdef SHOW_DTD_ACTIONS
316        printf("ExtSubsetDecl:\n");
317        cout << string((char *) item, lgth) <<endl;
318#endif
319}
320
321template <CodeUnit_Base C>
322inline void ParsingEngine<C>::Prolog_action(unsigned char * item, int lgth) {
323#ifdef SHOW_DTD_ACTIONS
324        printf("Prolog:\n");
325        cout << string((char *) item, lgth) <<endl;
326#endif
327}
328
329#define print_stats(stat_string, count, total_lgth) \
330        printf("%i %s", count, stat_string);\
331        if (count == 0) printf("s.\n");\
332        else if (count == 1) printf(" of length %i.\n", total_lgth);\
333        else printf("s of avg. lgth %i.\n", total_lgth/count);
334
335
336
337
338int
339main(int argc, char * argv[]) {
340        if (argc != 2) {
341        printf("Usage: %s <filename>\n", argv[0]);
342                exit(-1);
343        }
344       
345        char * src_filename = argv[1];
346        char * cmdline = new char[strlen(argv[0]) + strlen(argv[1]) +1 +1]; 
347        strcat(cmdline, argv[0]);
348        strcat(cmdline," ");
349        strcat(cmdline,argv[1]);
350
351        #ifdef PAPI
352                #define NUM_EVENTS 2
353                int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
354                int cal_size = 1000;
355                code_clocker = new CC(Events,NUM_EVENTS,cal_size);
356                code_clocker->cc_set_cmd(cmdline);
357        #endif
358
359        // Read the entire file into a memory buffer   
360        FILE * src_file;
361        struct stat fileinfo;
362        int src_filesize;
363
364        // open file and fstat 
365        src_file = fopen ( src_filename , "rb" );
366        if(fstat(fileno(src_file), &fileinfo)!=0) {
367                fprintf(stderr, "Cannot fstat '%s'. Terminating the process ...\n", src_filename); 
368                exit(-1);
369        }
370       
371        src_filesize = fileinfo.st_size;
372       
373        // close file
374        fclose (src_file);
375       
376        for (int run = 0; run < REPEAT_RUNS; run++) {
377
378                #ifdef PAPI
379                        code_clocker->cc_start_interval();
380                #endif
381
382                parser = Parser_Interface::ParserFactory(src_filename);
383       
384                /*             
385                if (!parser->has_ByteOrderMark()) printf("No ");
386                printf("Byte Order Mark found.\n");
387       
388                if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
389                else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
390                else printf ("XML version 1.0 implied by default.\n");
391                if (parser->has_EncodingDecl()) {
392                        printf("XML encoding declared:  %s\n", parser->get_Encoding());
393                }
394                if (parser->standalone_status() == Standalone_yes)
395                        printf("XML standalone = yes declared.\n");
396                else if (parser->standalone_status() == Standalone_no)
397                        printf("XML standalone = no declared.\n");
398                else printf ("XML standalone = no by default.\n");
399                */
400
401//              #ifdef PAPI
402//                      code_clocker->cc_start_interval();
403//              #endif
404               
405                parser->Parse_Prolog();
406                parser->Parse_DocumentContent();
407
408//              #ifdef PAPI
409//                      int elems = src_filesize;
410//                      code_clocker->cc_end_interval(elems);
411//              #endif
412
413                parser->~Parser_Interface();
414                printf("Run %i complete.\n", run);
415
416        }
417
418        #ifdef PAPI
419                code_clocker->cc_write_xml_file();
420                code_clocker->cc_display();
421                delete code_clocker;
422        #endif 
423       
424        print_stats("comment", comment_count, comment_length);
425        print_stats("CDATA section", CDATA_end_count, CDATA_length);
426        print_stats("processing instruction", PI_count, PI_length);
427        print_stats("empty element", empty_elem_count, empty_elem_length);
428        print_stats("start tag", start_tag_count, start_tag_length);
429        printf("%i total attributes\n", attribute_count);
430        print_stats("attribute name", total_attribute_count, total_att_name_length);
431        print_stats("attribute value", total_attribute_count, total_att_value_length);
432        print_stats("namespace name", namespace_count, total_namespace_name_length);
433        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
434        print_stats("end tag", end_tag_count, end_tag_length);
435        print_stats("text item", text_item_count, text_item_length);
436        print_stats("reference", reference_count, reference_length);
437        print_stats("error item", error_item_count, error_item_length);
438        printf("Maximum nesting depth = %i\n", max_nesting_depth);
439       
440        return(0);
441}
Note: See TracBrowser for help on using the repository browser.