source: trunk/markup_stats.cxx @ 79

Last change on this file since 79 was 79, checked in by lindanl, 11 years ago

DTD actions.

File size: 12.5 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <unistd.h>
16#include <string>
17#include <iostream>
18using namespace std;
19
20#ifdef CODE_CLOCKING
21#include "codeclocker/clocker/code_clocker_session.h"
22#include "codeclocker/clocker/code_clocker_session.c"
23Code_Clocker * total_clocker;
24#endif
25
26
27
28//#include "src/ilax.h"
29#include "src/engine.h"
30
31#ifndef REPEAT_RUNS
32#define REPEAT_RUNS 1
33#endif
34
35/* Internals */
36#include "src/xmlmodel.h"
37#include "src/bitplex.h"
38#include "src/byteplex.h"
39#include "src/xmldecl.h"
40#include "src/bitlex.h"
41
42
43#include "src/xmlmodel.c"
44#include "src/bitplex.c"
45#include "src/byteplex.c"
46#include "src/xmldecl.c"
47#include "src/bitlex.c"
48#include "src/engine.c"
49
50/* Global declarations of parsing engine. */
51Parser_Interface * parser;
52
53/* Global declarations for statistics. */
54
55int comment_count = 0;
56int comment_length = 0;
57int CDATA_count = 0;
58int CDATA_length = 0;
59int PI_count = 0;
60int PI_length = 0;
61int empty_elem_count = 0;
62int empty_elem_length = 0;
63int start_tag_count = 0;
64int start_tag_length = 0;
65int attribute_count = 0;
66int end_tag_count = 0;
67int end_tag_length = 0;
68int reference_count = 0;
69int reference_length = 0;
70int text_item_count = 0;
71int text_item_length = 0;
72int error_item_count = 0;
73int error_item_length = 0;
74int nesting_depth = 0;
75int max_nesting_depth = 0;
76int total_attribute_count = 0;
77int total_att_name_length = 0;
78int total_att_value_length = 0;
79int namespace_count = 0;
80int total_namespace_name_length = 0;
81int total_namespace_URI_length = 0;
82
83int last_item_start = 0;
84int last_item_stop = 0;
85int last_buffer_rel_pos = 0;
86
87void LastItemAction(int start_pos, int end_pos, int buffer_rel_pos) {
88#ifdef DEBUG
89        if (start_pos < last_item_stop) {
90                printf("Anomaly start_pos %i < last_item_stop_pos %i \n", start_pos, last_item_stop);
91                printf("rel_positions: last = %i, this = %i\n", last_buffer_rel_pos, buffer_rel_pos);
92        }
93#endif
94        last_item_start = start_pos;
95        last_item_stop = end_pos;
96        last_buffer_rel_pos = buffer_rel_pos;
97}
98
99/* Action routine for an XML comment in "<!--"  "-->" brackets. */
100template <CodeUnit_Base C>
101inline void ParsingEngine<C>::Comment_action(int start_pos, int end_pos) {
102        comment_count +=1;
103        comment_length += end_pos - start_pos;
104        LastItemAction(start_pos, end_pos, buffer_rel_pos);
105}
106
107/* Action routine for a CDATA section enclosed in "<![CDATA[" and "]]>" brackets. */
108template <CodeUnit_Base C>
109inline void ParsingEngine<C>::CDATA_action(int start_pos, int end_pos) {
110        CDATA_count +=1;
111        CDATA_length += end_pos - start_pos;
112        LastItemAction(start_pos, end_pos, buffer_rel_pos);
113}
114
115/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
116template <CodeUnit_Base C>
117inline void ParsingEngine<C>::PI_action(int start_pos, int end_pos) {
118        PI_count +=1;
119        PI_length += end_pos - start_pos;
120        LastItemAction(start_pos, end_pos, buffer_rel_pos);
121}
122
123/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
124template <CodeUnit_Base C>
125inline void ParsingEngine<C>::EmptyElement_action(int start_pos, int end_pos) {
126        empty_elem_count +=1;
127        empty_elem_length += end_pos - start_pos;
128        LastItemAction(start_pos, end_pos, buffer_rel_pos);
129}
130
131/* Action routine for a start tag enclosed in "<" and ">" brackets. */
132template <CodeUnit_Base C>
133inline void ParsingEngine<C>::StartTag_action(int start_pos, int end_pos) {
134        start_tag_count +=1;
135        start_tag_length += end_pos - start_pos;
136        nesting_depth += 1;
137        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
138        LastItemAction(start_pos, end_pos, buffer_rel_pos);
139}
140
141/* Action routine for an end tag enclosed in "</" and ">" brackets. */
142template <CodeUnit_Base C>
143inline void ParsingEngine<C>::EndTag_action(int start_pos, int end_pos) {
144        end_tag_count +=1;
145        end_tag_length += end_pos - start_pos;
146        nesting_depth -= 1;
147        LastItemAction(start_pos, end_pos, buffer_rel_pos);
148}
149
150/* Action routine for an error item */
151template <CodeUnit_Base C>
152inline void ParsingEngine<C>::Error_action(int start_pos, int end_pos) {
153        error_item_count +=1;
154        error_item_length += end_pos - start_pos;
155        printf("Error: illegal markup at positions %i through %i.\n", start_pos, end_pos);
156        printf("length = %i; buffer_rel_pos = %i\n", end_pos - start_pos, buffer_rel_pos);
157        printf("last_item from %i to %i (rel_pos = %i)\n", 
158               last_item_start, last_item_stop, last_buffer_rel_pos);
159
160        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos+1 - start_pos) << endl;
161
162
163
164}
165
166/* Action routine for a text item */
167template <CodeUnit_Base C>
168inline void ParsingEngine<C>::Text_action(int start_pos, int end_pos) {
169        text_item_count +=1;
170        text_item_length += end_pos - start_pos;
171        LastItemAction(start_pos, end_pos, buffer_rel_pos);
172}
173
174template <CodeUnit_Base C>
175inline void ParsingEngine<C>::Reference_action(int start_pos, int end_pos) {
176        reference_count +=1;
177        reference_length += end_pos - start_pos;
178        LastItemAction(start_pos, end_pos, buffer_rel_pos);
179}
180
181
182
183
184/* Three action routines for markup components are defined as follows.
185|ElementName_action| is the action routine called upon recognition of
186an element name immediately after the opening angle bracket of a start
187tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
188AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
189It is called with two parameters identifying the
190first and last character positions of the expected XML_name.
191Similarly, |PI_Target_action| is the action routine called upon recognition
192of the XML Name that occurs immediately after the opening "<?"
193delimiter of a processing instruction.
194
195 The third action routine for markup components is Attribute_Value_action,
196which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
197THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
198TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
199*/
200
201/* Semantic action routines for markup components. */
202/* Action routine for an element name occurring immediately after the
203   opening "<" of a start tag or empty element tag. */
204template <CodeUnit_Base C>
205inline void ParsingEngine<C>::ElementName_action(int start_pos, int end_pos) {
206}
207
208/* Action routine for a processing instruction target name occurring immediately
209   after the opening "<?" of a processing instruction. */
210template <CodeUnit_Base C>
211inline void ParsingEngine<C>::PI_Target_action(int start_pos, int end_pos) {
212}
213
214/* Action routine for an individual attribute/value pair occurring in
215   a element start tag or an empty element tag. */
216template <CodeUnit_Base C>
217inline void ParsingEngine<C>::AttributeValue_action(int name_start, int name_end, 
218                                  int val_start, int val_end) {
219        total_attribute_count+=1;
220        total_att_name_length += name_end - name_start;
221        total_att_value_length += val_end - val_start;
222}
223
224/* Action routine for an individual attribute/value pair occurring in
225   a element start tag or an empty element tag. */
226template <CodeUnit_Base C>
227inline void ParsingEngine<C>::Namespace_action(int name_start, int name_end, 
228                             int URI_start, int URI_end) {
229        namespace_count+=1;
230        total_namespace_name_length += name_end - name_start;
231        total_namespace_URI_length += URI_end - URI_start;
232}
233
234
235template <CodeUnit_Base C>
236inline void ParsingEngine<C>::FinalizeBuffer_action(int& preserve_pos) {
237#ifdef DEBUG
238        printf ("FinalizeBuffer; last 16 bytes + lookahead 16 =\n");
239        cout << string((char *) GetCodeUnitPtr(AbsPos()-16), 16) << "::" << string((char *) GetCodeUnitPtr(AbsPos()), 16) << endl;
240#endif
241
242        preserve_pos = AbsPos();
243#ifdef CODE_CLOCKING
244end_Interval (total_clocker, buffer_rel_pos);
245start_Interval (total_clocker);
246#endif
247}
248
249
250template <CodeUnit_Base C>
251inline void ParsingEngine<C>::DocumentStart_action() {
252#ifdef CODE_CLOCKING
253start_Interval (total_clocker);
254#endif
255}
256
257template <CodeUnit_Base C>
258inline void ParsingEngine<C>::DocumentEnd_action() {
259#ifdef CODE_CLOCKING
260end_Interval (total_clocker, buffer_rel_pos);
261#endif
262}
263
264#define print_stats(stat_string, count, total_lgth) \
265        printf("%i %s", count, stat_string);\
266        if (count == 0) printf("s.\n");\
267        else if (count == 1) printf(" of length %i.\n", total_lgth);\
268        else printf("s of avg. lgth %i.\n", total_lgth/count);
269
270
271template <CodeUnit_Base C>
272inline void ParsingEngine<C>::Doctype_action(int start_pos, int end_pos) {
273#ifdef SHOW_DTD_ACTIONS
274        printf("Document Type:\n");
275        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
276#endif
277}
278
279template <CodeUnit_Base C>
280inline void ParsingEngine<C>::PEReference_action(int start_pos, int end_pos) {
281}
282
283template <CodeUnit_Base C>
284inline void ParsingEngine<C>::Elementdecl_action(int start_pos, int end_pos) {
285#ifdef SHOW_DTD_ACTIONS
286        printf("Elementdecl:\n");
287        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
288#endif
289}
290
291template <CodeUnit_Base C>
292inline void ParsingEngine<C>::AttlistDecl_action(int start_pos, int end_pos) {
293#ifdef SHOW_DTD_ACTIONS
294        printf("AttlistDecl:\n");
295        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
296#endif
297}
298
299template <CodeUnit_Base C>
300inline void ParsingEngine<C>::Entitydecl_action(int start_pos, int end_pos) {
301#ifdef SHOW_DTD_ACTIONS
302        printf("Entitydecl:\n");
303        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
304#endif
305}
306
307template <CodeUnit_Base C>
308inline void ParsingEngine<C>::Notationdecl_action(int start_pos, int end_pos) {
309#ifdef SHOW_DTD_ACTIONS
310        printf("Notationdecl:\n");
311        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
312#endif
313}
314
315template <CodeUnit_Base C>
316inline void ParsingEngine<C>::Prolog_action(int start_pos, int end_pos) {
317#ifdef SHOW_DTD_ACTIONS
318        printf("Prolog:\n");
319        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
320#endif
321}
322
323int
324main(int argc, char * argv[]) {
325        if (argc != 2) {
326        printf("Usage: %s <filename>\n", argv[0]);
327                exit(-1);
328        }
329        char * filename = argv[1];
330#ifdef CODE_CLOCKING
331
332init_Code_Clocker_Session(8, // num code clockers
333                         64096, // max individual timestamp measurements
334                         4ul, // num session parameters
335                         65536, // num timestamp calibration measurements,
336                            65534,// num constant time loop executions
337                         8 // num constant time loop iterations
338                         );
339set_Session_Cmd_Line_Args(argv[0]);
340total_clocker = register_Code_Clocker("Markup_stats_total", "Total markup_stats time\n");
341#endif
342
343        for (int run = 0; run < REPEAT_RUNS; run++) {
344       
345        parser = Parser_Interface::ParserFactory(filename);
346       
347       
348        if (!parser->has_ByteOrderMark()) printf("No ");
349        printf("Byte Order Mark found.\n");
350
351        if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
352        else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
353        else printf ("XML version 1.0 implied by default.\n");
354        if (parser->has_EncodingDecl()) {
355                printf("XML encoding declared:  %s\n", parser->get_Encoding());
356        }
357        if (parser->standalone_status() == Standalone_yes) 
358                printf("XML standalone = yes declared.\n");
359        else if (parser->standalone_status() == Standalone_no) 
360                printf("XML standalone = no declared.\n");
361        else printf ("XML standalone = no by default.\n");
362       
363        parser->Parse_Prolog();
364        parser->ParseContent();
365        parser->~Parser_Interface();
366        printf("Run %i complete.\n", run);
367        }
368       
369        print_stats("comment", comment_count, comment_length);
370        print_stats("CDATA section", CDATA_count, CDATA_length);
371        print_stats("processing instruction", PI_count, PI_length);
372        print_stats("empty element", empty_elem_count, empty_elem_length);
373        print_stats("start tag", start_tag_count, start_tag_length);
374        printf("%i total attributes\n", attribute_count);
375        print_stats("attribute name", total_attribute_count, total_att_name_length);
376        print_stats("attribute value", total_attribute_count, total_att_value_length);
377        print_stats("namespace name", namespace_count, total_namespace_name_length);
378        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
379        print_stats("end tag", end_tag_count, end_tag_length);
380        print_stats("text item", text_item_count, text_item_length);
381        print_stats("reference", reference_count, reference_length);
382        print_stats("error item", error_item_count, error_item_length);
383        printf("Maximum nesting depth = %i\n", max_nesting_depth);
384#ifdef CODE_CLOCKING
385        write_XML_File();
386#endif
387       
388        return(0);
389}
Note: See TracBrowser for help on using the repository browser.