source: trunk/markup_stats.cxx @ 111

Last change on this file since 111 was 111, checked in by cameron, 11 years ago

Consolidation of error handling: xml_error.c

File size: 11.8 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <string>
16#include <iostream>
17using namespace std;
18
19#ifdef CODE_CLOCKING
20#include "codeclocker/clocker/code_clocker_session.h"
21#include "codeclocker/clocker/code_clocker_session.c"
22Code_Clocker * total_clocker;
23#endif
24
25
26
27//#include "src/ilax.h"
28#include "src/engine.h"
29
30#ifndef REPEAT_RUNS
31#define REPEAT_RUNS 1
32#endif
33
34/* Internals */
35#include "src/xmlmodel.h"
36#include "src/xml_error.h"
37#include "src/bitplex.h"
38#include "src/byteplex.h"
39#include "src/xmldecl.h"
40#include "src/bitlex.h"
41
42
43#include "src/xmlmodel.c"
44#include "src/xml_error.c"
45#include "src/bitplex.c"
46#include "src/byteplex.c"
47#include "src/xmldecl.c"
48#include "src/bitlex.c"
49#include "src/engine.c"
50
51/* Global declarations of parsing engine. */
52Parser_Interface * parser;
53
54/* Global declarations for statistics. */
55
56int comment_count = 0;
57int comment_length = 0;
58int CDATA_start_count = 0;
59int CDATA_start_pos = 0;
60int CDATA_length = 0;
61int CDATA_end_count = 0;
62int PI_count = 0;
63int PI_length = 0;
64int empty_elem_count = 0;
65int empty_elem_length = 0;
66int start_tag_count = 0;
67int start_tag_length = 0;
68int attribute_count = 0;
69int end_tag_count = 0;
70int end_tag_length = 0;
71int reference_count = 0;
72int reference_length = 0;
73int text_item_count = 0;
74int text_item_length = 0;
75int error_item_count = 0;
76int error_item_length = 0;
77int nesting_depth = 0;
78int max_nesting_depth = 0;
79int total_attribute_count = 0;
80int total_att_name_length = 0;
81int total_att_value_length = 0;
82int namespace_count = 0;
83int total_namespace_name_length = 0;
84int total_namespace_URI_length = 0;
85
86int last_item_start = 0;
87int last_item_stop = 0;
88int last_buffer_rel_pos = 0;
89
90
91/* Action routine for an XML comment in "<!--"  "-->" brackets. */
92template <CodeUnit_Base C>
93inline void ParsingEngine<C>::Comment_action(unsigned char * item, int lgth) {
94        comment_count +=1;
95        comment_length += lgth;
96}
97
98/* Action routine called upon recognizing "<![CDATA[" to start a CDATA section. */
99template <CodeUnit_Base C>
100inline void ParsingEngine<C>::CDATA_start_action(unsigned char * CDATA_ptr){
101        CDATA_start_pos = AbsPos() - 9;
102        CDATA_start_count +=1;
103}
104
105/* Action routine called upon recognizing "]]>" to end a CDATA section. */
106template <CodeUnit_Base C>
107inline void ParsingEngine<C>::CDATA_end_action(unsigned char * CDATA_end_ptr) {
108        CDATA_end_count +=1;
109        CDATA_length += AbsPos() - CDATA_start_pos;
110}
111
112/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
113template <CodeUnit_Base C>
114inline void ParsingEngine<C>::PI_action(unsigned char * item, int lgth) {
115        PI_count +=1;
116        PI_length += lgth;
117}
118
119/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
120template <CodeUnit_Base C>
121inline void ParsingEngine<C>::EmptyElement_action(unsigned char * item, int lgth) {
122        empty_elem_count +=1;
123        empty_elem_length += lgth;
124}
125
126/* Action routine for a start tag enclosed in "<" and ">" brackets. */
127template <CodeUnit_Base C>
128inline void ParsingEngine<C>::StartTag_action(unsigned char * item, int lgth) {
129        start_tag_count +=1;
130        start_tag_length += lgth;
131        nesting_depth += 1;
132        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
133//      cout << string((char *) item, lgth) << endl;
134}
135
136/* Action routine for an end tag enclosed in "</" and ">" brackets. */
137template <CodeUnit_Base C>
138inline void ParsingEngine<C>::EndTag_action(unsigned char * item, int lgth) {
139        end_tag_count +=1;
140        end_tag_length += lgth;
141        nesting_depth -= 1;
142}
143
144/* Action routine for an error item */
145template <CodeUnit_Base C>
146inline void ParsingEngine<C>::Error_action(unsigned char * item, int lgth) {
147        error_item_count +=1;
148        error_item_length += lgth;
149        printf("Error: illegal markup at positions %i of length %i.\n", AbsPos()-lgth, lgth);
150        cout << string((char *) item, lgth) << endl;
151}
152
153/* Action routine for a text item */
154template <CodeUnit_Base C>
155inline void ParsingEngine<C>::Text_action(unsigned char * item, int lgth) {
156        text_item_count +=1;
157        text_item_length += lgth;
158}
159
160template <CodeUnit_Base C>
161inline void ParsingEngine<C>::Reference_action(unsigned char * item, int lgth) {
162        reference_count +=1;
163        reference_length += lgth;
164}
165
166
167
168
169/* Three action routines for markup components are defined as follows.
170|ElementName_action| is the action routine called upon recognition of
171an element name immediately after the opening angle bracket of a start
172tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
173AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
174It is called with two parameters identifying the
175first and last character positions of the expected XML_name.
176Similarly, |PI_Target_action| is the action routine called upon recognition
177of the XML Name that occurs immediately after the opening "<?"
178delimiter of a processing instruction.
179
180 The third action routine for markup components is Attribute_Value_action,
181which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
182THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
183TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
184*/
185
186/* Semantic action routines for markup components. */
187/* Action routine for an element name occurring immediately after the
188   opening "<" of a start tag or empty element tag. */
189template <CodeUnit_Base C>
190inline void ParsingEngine<C>::ElementName_action(unsigned char * item, int lgth) {
191}
192
193/* Action routine for a processing instruction target name occurring immediately
194   after the opening "<?" of a processing instruction. */
195template <CodeUnit_Base C>
196inline void ParsingEngine<C>::PI_Target_action(unsigned char * item, int lgth) {
197}
198
199/* Action routine for an individual attribute/value pair occurring in
200   a element start tag or an empty element tag. */
201template <CodeUnit_Base C>
202inline void ParsingEngine<C>::AttributeValue_action(unsigned char * name, int name_lgth, 
203                                 unsigned char * val, int val_lgth) {
204        total_attribute_count+=1;
205        total_att_name_length += name_lgth;
206        total_att_value_length += val_lgth;
207}
208
209/* Action routine for an individual attribute/value pair occurring in
210   a element start tag or an empty element tag. */
211template <CodeUnit_Base C>
212inline void ParsingEngine<C>::Namespace_action(unsigned char * name, int name_lgth,
213                             unsigned char * URI, int URI_lgth) {
214        namespace_count+=1;
215        total_namespace_name_length += name_lgth;
216        total_namespace_URI_length += URI_lgth;
217}
218
219
220template <CodeUnit_Base C>
221inline void ParsingEngine<C>::FinalizeBuffer_action(int& preserve_pos) {
222#ifdef DEBUG
223        printf ("FinalizeBuffer; last 16 bytes + lookahead 16 =\n");
224        cout << string((char *) GetCodeUnitPtr(AbsPos()-16), 16) << "::" << string((char *) GetCodeUnitPtr(AbsPos()), 16) << endl;
225#endif
226
227        preserve_pos = AbsPos();
228#ifdef CODE_CLOCKING
229end_Interval (total_clocker, buffer_rel_pos);
230start_Interval (total_clocker);
231#endif
232}
233
234
235template <CodeUnit_Base C>
236inline void ParsingEngine<C>::DocumentStart_action() {
237#ifdef CODE_CLOCKING
238start_Interval (total_clocker);
239#endif
240}
241
242template <CodeUnit_Base C>
243inline void ParsingEngine<C>::DocumentEnd_action() {
244#ifdef CODE_CLOCKING
245end_Interval (total_clocker, buffer_rel_pos);
246#endif
247}
248
249template <CodeUnit_Base C>
250inline void ParsingEngine<C>::Doctype_action(unsigned char * item, int lgth) {
251#ifdef SHOW_DTD_ACTIONS
252        printf("Document Type:\n");
253        cout << string((char *) item, lgth) <<endl;
254#endif
255}
256
257template <CodeUnit_Base C>
258inline void ParsingEngine<C>::PEReference_action(unsigned char * item, int lgth) {
259}
260
261template <CodeUnit_Base C>
262inline void ParsingEngine<C>::AttlistDecl_action(unsigned char * item, int lgth) {
263#ifdef SHOW_DTD_ACTIONS
264        printf("AttlistDecl:\n");
265        cout << string((char *) item, lgth) <<endl;
266#endif
267}
268
269template <CodeUnit_Base C>
270inline void ParsingEngine<C>::Entitydecl_action(unsigned char * entity_name, int entity_name_lgth, unsigned char * item, int lgth) {
271#ifdef SHOW_DTD_ACTIONS
272        printf("Entitydecl:\n");
273        cout << string((char *) item, lgth) <<endl;
274#endif
275}
276
277template <CodeUnit_Base C>
278inline void ParsingEngine<C>::Notationdecl_action(unsigned char * item, int lgth) {
279#ifdef SHOW_DTD_ACTIONS
280        printf("Notationdecl:\n");
281        cout << string((char *) item, lgth) <<endl;
282#endif
283}
284
285template <CodeUnit_Base C>
286inline void ParsingEngine<C>::ExtSubsetDecl_action(unsigned char * item, int lgth) {
287#ifdef SHOW_DTD_ACTIONS
288        printf("ExtSubsetDecl:\n");
289        cout << string((char *) item, lgth) <<endl;
290#endif
291        printf("Finish parsing ExtSubsetDecl!\n");
292}
293
294template <CodeUnit_Base C>
295inline void ParsingEngine<C>::Prolog_action(unsigned char * item, int lgth) {
296#ifdef SHOW_DTD_ACTIONS
297        printf("Prolog:\n");
298        cout << string((char *) item, lgth) <<endl;
299#endif
300}
301
302#define print_stats(stat_string, count, total_lgth) \
303        printf("%i %s", count, stat_string);\
304        if (count == 0) printf("s.\n");\
305        else if (count == 1) printf(" of length %i.\n", total_lgth);\
306        else printf("s of avg. lgth %i.\n", total_lgth/count);
307
308int
309main(int argc, char * argv[]) {
310        if (argc != 2) {
311        printf("Usage: %s <filename>\n", argv[0]);
312                exit(-1);
313        }
314        char * filename = argv[1];
315#ifdef CODE_CLOCKING
316
317init_Code_Clocker_Session(8, // num code clockers
318                         64096, // max individual timestamp measurements
319                         4ul, // num session parameters
320                         65536, // num timestamp calibration measurements,
321                            65534,// num constant time loop executions
322                         8 // num constant time loop iterations
323                         );
324set_Session_Cmd_Line_Args(argv[0]);
325total_clocker = register_Code_Clocker("Markup_stats_total", "Total markup_stats time\n");
326#endif
327
328        for (int run = 0; run < REPEAT_RUNS; run++) {
329       
330        parser = Parser_Interface::ParserFactory(filename);
331       
332       
333        if (!parser->has_ByteOrderMark()) printf("No ");
334        printf("Byte Order Mark found.\n");
335
336        if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
337        else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
338        else printf ("XML version 1.0 implied by default.\n");
339        if (parser->has_EncodingDecl()) {
340                printf("XML encoding declared:  %s\n", parser->get_Encoding());
341        }
342        if (parser->standalone_status() == Standalone_yes) 
343                printf("XML standalone = yes declared.\n");
344        else if (parser->standalone_status() == Standalone_no) 
345                printf("XML standalone = no declared.\n");
346        else printf ("XML standalone = no by default.\n");
347       
348        parser->Parse_Prolog();
349//      parser->Parse_ExtSubsetDecl();
350
351//#define VALIDATION
352#ifdef VALIDATION
353        parser->Parse_DocumentContent();
354#endif
355
356#ifndef VALIDATION
357        parser->ParseContent();
358#endif
359
360        parser->~Parser_Interface();
361        printf("Run %i complete.\n", run);
362       
363       
364        }
365       
366        print_stats("comment", comment_count, comment_length);
367        print_stats("CDATA section", CDATA_end_count, CDATA_length);
368        print_stats("processing instruction", PI_count, PI_length);
369        print_stats("empty element", empty_elem_count, empty_elem_length);
370        print_stats("start tag", start_tag_count, start_tag_length);
371        printf("%i total attributes\n", attribute_count);
372        print_stats("attribute name", total_attribute_count, total_att_name_length);
373        print_stats("attribute value", total_attribute_count, total_att_value_length);
374        print_stats("namespace name", namespace_count, total_namespace_name_length);
375        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
376        print_stats("end tag", end_tag_count, end_tag_length);
377        print_stats("text item", text_item_count, text_item_length);
378        print_stats("reference", reference_count, reference_length);
379        print_stats("error item", error_item_count, error_item_length);
380        printf("Maximum nesting depth = %i\n", max_nesting_depth);
381#ifdef CODE_CLOCKING
382        write_XML_File();
383#endif
384       
385        return(0);
386}
Note: See TracBrowser for help on using the repository browser.