source: trunk/markup_stats.cxx @ 64

Last change on this file since 64 was 64, checked in by cameron, 11 years ago

Document_Start/End actions; OpenMP version

File size: 9.4 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <unistd.h>
16#include <string>
17#include <iostream>
18using namespace std;
19
20//#include "src/ilax.h"
21#include "src/engine.h"
22
23#ifndef REPEAT_RUNS
24#define REPEAT_RUNS 1
25#endif
26
27/* Internals */
28#include "src/xmlbuffer.h"
29#include "src/xmlbuffer.c"
30#include "src/bitlex.h"
31#include "src/bitlex.c"
32#include "src/engine.c"
33
34/* Global declarations of parsing engine. */
35Parser_Interface * parser;
36
37/* Global declarations for statistics. */
38
39int comment_count = 0;
40int comment_length = 0;
41int CDATA_count = 0;
42int CDATA_length = 0;
43int PI_count = 0;
44int PI_length = 0;
45int empty_elem_count = 0;
46int empty_elem_length = 0;
47int start_tag_count = 0;
48int start_tag_length = 0;
49int attribute_count = 0;
50int end_tag_count = 0;
51int end_tag_length = 0;
52int reference_count = 0;
53int reference_length = 0;
54int text_item_count = 0;
55int text_item_length = 0;
56int error_item_count = 0;
57int error_item_length = 0;
58int nesting_depth = 0;
59int max_nesting_depth = 0;
60int total_attribute_count = 0;
61int total_att_name_length = 0;
62int total_att_value_length = 0;
63int namespace_count = 0;
64int total_namespace_name_length = 0;
65int total_namespace_URI_length = 0;
66
67int last_item_start = 0;
68int last_item_stop = 0;
69int last_buffer_rel_pos = 0;
70
71/* Action routine for an XML comment in "<!--"  "-->" brackets. */
72template <CodeUnit_Base C>
73inline void ParsingEngine<C>::Comment_action(int start_pos, int end_pos) {
74        comment_count +=1;
75        comment_length += end_pos - start_pos;
76        last_item_start = start_pos;
77        last_item_stop = end_pos;
78        last_buffer_rel_pos = buffer_rel_pos;
79}
80
81/* Action routine for a CDATA section enclosed in "<![CDATA[" and "]]>" brackets. */
82template <CodeUnit_Base C>
83inline void ParsingEngine<C>::CDATA_action(int start_pos, int end_pos) {
84        CDATA_count +=1;
85        CDATA_length += end_pos - start_pos;
86        last_item_start = start_pos;
87        last_item_stop = end_pos;
88        last_buffer_rel_pos = buffer_rel_pos;
89}
90
91/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
92template <CodeUnit_Base C>
93inline void ParsingEngine<C>::PI_action(int start_pos, int end_pos) {
94        PI_count +=1;
95        PI_length += end_pos - start_pos;
96        last_item_start = start_pos;
97        last_item_stop = end_pos;
98        last_buffer_rel_pos = buffer_rel_pos;
99}
100
101/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
102template <CodeUnit_Base C>
103inline void ParsingEngine<C>::EmptyElement_action(int start_pos, int end_pos) {
104        empty_elem_count +=1;
105        empty_elem_length += end_pos - start_pos;
106        last_item_start = start_pos;
107        last_item_stop = end_pos;
108        last_buffer_rel_pos = buffer_rel_pos;
109}
110
111/* Action routine for a start tag enclosed in "<" and ">" brackets. */
112template <CodeUnit_Base C>
113inline void ParsingEngine<C>::StartTag_action(int start_pos, int end_pos) {
114        start_tag_count +=1;
115        start_tag_length += end_pos - start_pos;
116        nesting_depth += 1;
117        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
118        last_item_start = start_pos;
119        last_item_stop = end_pos;
120        last_buffer_rel_pos = buffer_rel_pos;
121}
122
123/* Action routine for an end tag enclosed in "</" and ">" brackets. */
124template <CodeUnit_Base C>
125inline void ParsingEngine<C>::EndTag_action(int start_pos, int end_pos) {
126        end_tag_count +=1;
127        end_tag_length += end_pos - start_pos;
128        nesting_depth -= 1;
129        last_item_start = start_pos;
130        last_item_stop = end_pos;
131        last_buffer_rel_pos = buffer_rel_pos;
132}
133
134/* Action routine for an error item */
135template <CodeUnit_Base C>
136inline void ParsingEngine<C>::Error_action(int start_pos, int end_pos) {
137        error_item_count +=1;
138        error_item_length += end_pos - start_pos;
139        printf("Error: illegal markup at positions %i through %i.\n", start_pos, end_pos);
140        printf("length = %i; buffer_rel_pos = %i\n", end_pos - start_pos, buffer_rel_pos);
141        printf("last_item from %i to %i (rel_pos = %i)\n", 
142               last_item_start, last_item_stop, last_buffer_rel_pos);
143
144        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos+1 - start_pos) << endl;
145
146
147
148}
149
150/* Action routine for a text item */
151template <CodeUnit_Base C>
152inline void ParsingEngine<C>::Text_action(int start_pos, int end_pos) {
153        text_item_count +=1;
154        text_item_length += end_pos - start_pos;
155}
156
157template <CodeUnit_Base C>
158inline void ParsingEngine<C>::Reference_action(int start_pos, int end_pos) {
159        reference_count +=1;
160        reference_length += end_pos - start_pos;
161        last_item_start = start_pos;
162        last_item_stop = end_pos;
163        last_buffer_rel_pos = buffer_rel_pos;
164}
165
166
167
168
169/* Three action routines for markup components are defined as follows.
170|ElementName_action| is the action routine called upon recognition of
171an element name immediately after the opening angle bracket of a start
172tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
173AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
174It is called with two parameters identifying the
175first and last character positions of the expected XML_name.
176Similarly, |PI_Target_action| is the action routine called upon recognition
177of the XML Name that occurs immediately after the opening "<?"
178delimiter of a processing instruction.
179
180 The third action routine for markup components is Attribute_Value_action,
181which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
182THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
183TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
184*/
185
186/* Semantic action routines for markup components. */
187/* Action routine for an element name occurring immediately after the
188   opening "<" of a start tag or empty element tag. */
189template <CodeUnit_Base C>
190inline void ParsingEngine<C>::ElementName_action(int start_pos, int end_pos) {
191}
192
193/* Action routine for a processing instruction target name occurring immediately
194   after the opening "<?" of a processing instruction. */
195template <CodeUnit_Base C>
196inline void ParsingEngine<C>::PI_Target_action(int start_pos, int end_pos) {
197}
198
199/* Action routine for an individual attribute/value pair occurring in
200   a element start tag or an empty element tag. */
201template <CodeUnit_Base C>
202inline void ParsingEngine<C>::AttributeValue_action(int name_start, int name_end, 
203                                  int val_start, int val_end) {
204        total_attribute_count+=1;
205        total_att_name_length += name_end - name_start;
206        total_att_value_length += val_end - val_start;
207}
208
209/* Action routine for an individual attribute/value pair occurring in
210   a element start tag or an empty element tag. */
211template <CodeUnit_Base C>
212inline void ParsingEngine<C>::Namespace_action(int name_start, int name_end, 
213                             int URI_start, int URI_end) {
214        namespace_count+=1;
215        total_namespace_name_length += name_end - name_start;
216        total_namespace_URI_length += URI_end - URI_start;
217}
218
219
220template <CodeUnit_Base C>
221inline void ParsingEngine<C>::FinalizeBuffer_action() {
222}
223
224
225template <CodeUnit_Base C>
226inline void ParsingEngine<C>::DocumentStart_action() {
227}
228
229template <CodeUnit_Base C>
230inline void ParsingEngine<C>::DocumentEnd_action() {
231}
232
233#define print_stats(stat_string, count, total_lgth) \
234        printf("%i %s", count, stat_string);\
235        if (count == 0) printf("s.\n");\
236        else if (count == 1) printf(" of length %i.\n", total_lgth);\
237        else printf("s of avg. lgth %i.\n", total_lgth/count);
238
239
240
241
242int
243main(int argc, char * argv[]) {
244        if (argc != 2) {
245        printf("Usage: %s <filename>\n", argv[0]);
246                exit(-1);
247        }
248        char * filename = argv[1];
249
250        for (int run = 0; run < REPEAT_RUNS; run++) {
251       
252        parser = Parser_Interface::ParserFactory(filename);
253       
254       
255        if (!parser->has_ByteOrderMark()) printf("No ");
256        printf("Byte Order Mark found.\n");
257        if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
258        else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
259        else printf ("XML version 1.0 implied by default.\n");
260        if (parser->has_EncodingDecl()) {
261        printf("XML encoding named at positions %i of length %i\n", 
262                parser->get_Encoding_pos(), parser->get_Encoding_lgth());
263        }
264        if (parser->standalone_status() == Standalone_yes) 
265                printf("XML standalone = yes declared.\n");
266        else if (parser->standalone_status() == Standalone_no) 
267                printf("XML standalone = no declared.\n");
268        else printf ("XML standalone = no by default.\n");
269       
270        parser->ParseContent();
271        parser->~Parser_Interface();
272        printf("Run %i complete.\n", run);
273        }
274       
275        print_stats("comment", comment_count, comment_length);
276        print_stats("CDATA section", CDATA_count, CDATA_length);
277        print_stats("processing instruction", PI_count, PI_length);
278        print_stats("empty element", empty_elem_count, empty_elem_length);
279        print_stats("start tag", start_tag_count, start_tag_length);
280        printf("%i total attributes\n", attribute_count);
281        print_stats("attribute name", total_attribute_count, total_att_name_length);
282        print_stats("attribute value", total_attribute_count, total_att_value_length);
283        print_stats("namespace name", namespace_count, total_namespace_name_length);
284        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
285        print_stats("end tag", end_tag_count, end_tag_length);
286        print_stats("text item", text_item_count, text_item_length);
287        print_stats("reference", reference_count, reference_length);
288        print_stats("error item", error_item_count, error_item_length);
289        printf("Maximum nesting depth = %i\n", max_nesting_depth);
290       
291        return(0);
292}
Note: See TracBrowser for help on using the repository browser.