source: trunk/markup_stats.cxx @ 108

Last change on this file since 108 was 108, checked in by lindanl, 11 years ago
File size: 11.7 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <string>
16#include <iostream>
17using namespace std;
18
19#ifdef CODE_CLOCKING
20#include "codeclocker/clocker/code_clocker_session.h"
21#include "codeclocker/clocker/code_clocker_session.c"
22Code_Clocker * total_clocker;
23#endif
24
25
26
27//#include "src/ilax.h"
28#include "src/engine.h"
29
30#ifndef REPEAT_RUNS
31#define REPEAT_RUNS 1
32#endif
33
34/* Internals */
35#include "src/xmlmodel.h"
36#include "src/bitplex.h"
37#include "src/byteplex.h"
38#include "src/xmldecl.h"
39#include "src/bitlex.h"
40
41
42#include "src/xmlmodel.c"
43#include "src/bitplex.c"
44#include "src/byteplex.c"
45#include "src/xmldecl.c"
46#include "src/bitlex.c"
47#include "src/engine.c"
48
49/* Global declarations of parsing engine. */
50Parser_Interface * parser;
51
52/* Global declarations for statistics. */
53
54int comment_count = 0;
55int comment_length = 0;
56int CDATA_start_count = 0;
57int CDATA_start_pos = 0;
58int CDATA_length = 0;
59int CDATA_end_count = 0;
60int PI_count = 0;
61int PI_length = 0;
62int empty_elem_count = 0;
63int empty_elem_length = 0;
64int start_tag_count = 0;
65int start_tag_length = 0;
66int attribute_count = 0;
67int end_tag_count = 0;
68int end_tag_length = 0;
69int reference_count = 0;
70int reference_length = 0;
71int text_item_count = 0;
72int text_item_length = 0;
73int error_item_count = 0;
74int error_item_length = 0;
75int nesting_depth = 0;
76int max_nesting_depth = 0;
77int total_attribute_count = 0;
78int total_att_name_length = 0;
79int total_att_value_length = 0;
80int namespace_count = 0;
81int total_namespace_name_length = 0;
82int total_namespace_URI_length = 0;
83
84int last_item_start = 0;
85int last_item_stop = 0;
86int last_buffer_rel_pos = 0;
87
88
89/* Action routine for an XML comment in "<!--"  "-->" brackets. */
90template <CodeUnit_Base C>
91inline void ParsingEngine<C>::Comment_action(unsigned char * item, int lgth) {
92        comment_count +=1;
93        comment_length += lgth;
94}
95
96/* Action routine called upon recognizing "<![CDATA[" to start a CDATA section. */
97template <CodeUnit_Base C>
98inline void ParsingEngine<C>::CDATA_start_action(unsigned char * CDATA_ptr){
99        CDATA_start_pos = AbsPos() - 9;
100        CDATA_start_count +=1;
101}
102
103/* Action routine called upon recognizing "]]>" to end a CDATA section. */
104template <CodeUnit_Base C>
105inline void ParsingEngine<C>::CDATA_end_action(unsigned char * CDATA_end_ptr) {
106        CDATA_end_count +=1;
107        CDATA_length += AbsPos() - CDATA_start_pos;
108}
109
110/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
111template <CodeUnit_Base C>
112inline void ParsingEngine<C>::PI_action(unsigned char * item, int lgth) {
113        PI_count +=1;
114        PI_length += lgth;
115}
116
117/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
118template <CodeUnit_Base C>
119inline void ParsingEngine<C>::EmptyElement_action(unsigned char * item, int lgth) {
120        empty_elem_count +=1;
121        empty_elem_length += lgth;
122}
123
124/* Action routine for a start tag enclosed in "<" and ">" brackets. */
125template <CodeUnit_Base C>
126inline void ParsingEngine<C>::StartTag_action(unsigned char * item, int lgth) {
127        start_tag_count +=1;
128        start_tag_length += lgth;
129        nesting_depth += 1;
130        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
131//      cout << string((char *) item, lgth) << endl;
132}
133
134/* Action routine for an end tag enclosed in "</" and ">" brackets. */
135template <CodeUnit_Base C>
136inline void ParsingEngine<C>::EndTag_action(unsigned char * item, int lgth) {
137        end_tag_count +=1;
138        end_tag_length += lgth;
139        nesting_depth -= 1;
140}
141
142/* Action routine for an error item */
143template <CodeUnit_Base C>
144inline void ParsingEngine<C>::Error_action(unsigned char * item, int lgth) {
145        error_item_count +=1;
146        error_item_length += lgth;
147        printf("Error: illegal markup at positions %i of length %i.\n", AbsPos()-lgth, lgth);
148        cout << string((char *) item, lgth) << endl;
149}
150
151/* Action routine for a text item */
152template <CodeUnit_Base C>
153inline void ParsingEngine<C>::Text_action(unsigned char * item, int lgth) {
154        text_item_count +=1;
155        text_item_length += lgth;
156}
157
158template <CodeUnit_Base C>
159inline void ParsingEngine<C>::Reference_action(unsigned char * item, int lgth) {
160        reference_count +=1;
161        reference_length += lgth;
162}
163
164
165
166
167/* Three action routines for markup components are defined as follows.
168|ElementName_action| is the action routine called upon recognition of
169an element name immediately after the opening angle bracket of a start
170tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
171AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
172It is called with two parameters identifying the
173first and last character positions of the expected XML_name.
174Similarly, |PI_Target_action| is the action routine called upon recognition
175of the XML Name that occurs immediately after the opening "<?"
176delimiter of a processing instruction.
177
178 The third action routine for markup components is Attribute_Value_action,
179which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
180THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
181TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
182*/
183
184/* Semantic action routines for markup components. */
185/* Action routine for an element name occurring immediately after the
186   opening "<" of a start tag or empty element tag. */
187template <CodeUnit_Base C>
188inline void ParsingEngine<C>::ElementName_action(unsigned char * item, int lgth) {
189}
190
191/* Action routine for a processing instruction target name occurring immediately
192   after the opening "<?" of a processing instruction. */
193template <CodeUnit_Base C>
194inline void ParsingEngine<C>::PI_Target_action(unsigned char * item, int lgth) {
195}
196
197/* Action routine for an individual attribute/value pair occurring in
198   a element start tag or an empty element tag. */
199template <CodeUnit_Base C>
200inline void ParsingEngine<C>::AttributeValue_action(unsigned char * name, int name_lgth, 
201                                 unsigned char * val, int val_lgth) {
202        total_attribute_count+=1;
203        total_att_name_length += name_lgth;
204        total_att_value_length += val_lgth;
205}
206
207/* Action routine for an individual attribute/value pair occurring in
208   a element start tag or an empty element tag. */
209template <CodeUnit_Base C>
210inline void ParsingEngine<C>::Namespace_action(unsigned char * name, int name_lgth,
211                             unsigned char * URI, int URI_lgth) {
212        namespace_count+=1;
213        total_namespace_name_length += name_lgth;
214        total_namespace_URI_length += URI_lgth;
215}
216
217
218template <CodeUnit_Base C>
219inline void ParsingEngine<C>::FinalizeBuffer_action(int& preserve_pos) {
220#ifdef DEBUG
221        printf ("FinalizeBuffer; last 16 bytes + lookahead 16 =\n");
222        cout << string((char *) GetCodeUnitPtr(AbsPos()-16), 16) << "::" << string((char *) GetCodeUnitPtr(AbsPos()), 16) << endl;
223#endif
224
225        preserve_pos = AbsPos();
226#ifdef CODE_CLOCKING
227end_Interval (total_clocker, buffer_rel_pos);
228start_Interval (total_clocker);
229#endif
230}
231
232
233template <CodeUnit_Base C>
234inline void ParsingEngine<C>::DocumentStart_action() {
235#ifdef CODE_CLOCKING
236start_Interval (total_clocker);
237#endif
238}
239
240template <CodeUnit_Base C>
241inline void ParsingEngine<C>::DocumentEnd_action() {
242#ifdef CODE_CLOCKING
243end_Interval (total_clocker, buffer_rel_pos);
244#endif
245}
246
247template <CodeUnit_Base C>
248inline void ParsingEngine<C>::Doctype_action(unsigned char * item, int lgth) {
249#ifdef SHOW_DTD_ACTIONS
250        printf("Document Type:\n");
251        cout << string((char *) item, lgth) <<endl;
252#endif
253}
254
255template <CodeUnit_Base C>
256inline void ParsingEngine<C>::PEReference_action(unsigned char * item, int lgth) {
257}
258
259template <CodeUnit_Base C>
260inline void ParsingEngine<C>::AttlistDecl_action(unsigned char * item, int lgth) {
261#ifdef SHOW_DTD_ACTIONS
262        printf("AttlistDecl:\n");
263        cout << string((char *) item, lgth) <<endl;
264#endif
265}
266
267template <CodeUnit_Base C>
268inline void ParsingEngine<C>::Entitydecl_action(unsigned char * entity_name, int entity_name_lgth, unsigned char * item, int lgth) {
269#ifdef SHOW_DTD_ACTIONS
270        printf("Entitydecl:\n");
271        cout << string((char *) item, lgth) <<endl;
272#endif
273}
274
275template <CodeUnit_Base C>
276inline void ParsingEngine<C>::Notationdecl_action(unsigned char * item, int lgth) {
277#ifdef SHOW_DTD_ACTIONS
278        printf("Notationdecl:\n");
279        cout << string((char *) item, lgth) <<endl;
280#endif
281}
282
283template <CodeUnit_Base C>
284inline void ParsingEngine<C>::ExtSubsetDecl_action(unsigned char * item, int lgth) {
285#ifdef SHOW_DTD_ACTIONS
286        printf("ExtSubsetDecl:\n");
287        cout << string((char *) item, lgth) <<endl;
288#endif
289        printf("Finish parsing ExtSubsetDecl!\n");
290}
291
292template <CodeUnit_Base C>
293inline void ParsingEngine<C>::Prolog_action(unsigned char * item, int lgth) {
294#ifdef SHOW_DTD_ACTIONS
295        printf("Prolog:\n");
296        cout << string((char *) item, lgth) <<endl;
297#endif
298}
299
300#define print_stats(stat_string, count, total_lgth) \
301        printf("%i %s", count, stat_string);\
302        if (count == 0) printf("s.\n");\
303        else if (count == 1) printf(" of length %i.\n", total_lgth);\
304        else printf("s of avg. lgth %i.\n", total_lgth/count);
305
306int
307main(int argc, char * argv[]) {
308        if (argc != 2) {
309        printf("Usage: %s <filename>\n", argv[0]);
310                exit(-1);
311        }
312        char * filename = argv[1];
313#ifdef CODE_CLOCKING
314
315init_Code_Clocker_Session(8, // num code clockers
316                         64096, // max individual timestamp measurements
317                         4ul, // num session parameters
318                         65536, // num timestamp calibration measurements,
319                            65534,// num constant time loop executions
320                         8 // num constant time loop iterations
321                         );
322set_Session_Cmd_Line_Args(argv[0]);
323total_clocker = register_Code_Clocker("Markup_stats_total", "Total markup_stats time\n");
324#endif
325
326        for (int run = 0; run < REPEAT_RUNS; run++) {
327       
328        parser = Parser_Interface::ParserFactory(filename);
329       
330       
331        if (!parser->has_ByteOrderMark()) printf("No ");
332        printf("Byte Order Mark found.\n");
333
334        if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
335        else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
336        else printf ("XML version 1.0 implied by default.\n");
337        if (parser->has_EncodingDecl()) {
338                printf("XML encoding declared:  %s\n", parser->get_Encoding());
339        }
340        if (parser->standalone_status() == Standalone_yes) 
341                printf("XML standalone = yes declared.\n");
342        else if (parser->standalone_status() == Standalone_no) 
343                printf("XML standalone = no declared.\n");
344        else printf ("XML standalone = no by default.\n");
345       
346        parser->Parse_Prolog();
347//      parser->Parse_ExtSubsetDecl();
348
349//#define VALIDATION
350#ifdef VALIDATION
351        parser->Parse_DocumentContent();
352#endif
353
354#ifndef VALIDATION
355        parser->ParseContent();
356#endif
357
358        parser->~Parser_Interface();
359        printf("Run %i complete.\n", run);
360       
361       
362        }
363       
364        print_stats("comment", comment_count, comment_length);
365        print_stats("CDATA section", CDATA_end_count, CDATA_length);
366        print_stats("processing instruction", PI_count, PI_length);
367        print_stats("empty element", empty_elem_count, empty_elem_length);
368        print_stats("start tag", start_tag_count, start_tag_length);
369        printf("%i total attributes\n", attribute_count);
370        print_stats("attribute name", total_attribute_count, total_att_name_length);
371        print_stats("attribute value", total_attribute_count, total_att_value_length);
372        print_stats("namespace name", namespace_count, total_namespace_name_length);
373        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
374        print_stats("end tag", end_tag_count, end_tag_length);
375        print_stats("text item", text_item_count, text_item_length);
376        print_stats("reference", reference_count, reference_length);
377        print_stats("error item", error_item_count, error_item_length);
378        printf("Maximum nesting depth = %i\n", max_nesting_depth);
379#ifdef CODE_CLOCKING
380        write_XML_File();
381#endif
382       
383        return(0);
384}
Note: See TracBrowser for help on using the repository browser.