source: tags/parabix-0.40/markup_stats.cxx @ 4027

Last change on this file since 4027 was 45, checked in by cameron, 12 years ago

Error strings

File size: 8.0 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <unistd.h>
16#include <string>
17#include <iostream>
18using namespace std;
19
20#include "src/ilax.h"
21#include "src/engine.h"
22
23
24/* Internals */
25#include "src/xmlbuffer.h"
26#include "src/xmlbuffer.c"
27#include "src/bitlex.h"
28#include "src/bitlex.c"
29#include "src/engine.c"
30
31/* Global declarations of parsing engine. */
32Parser_Interface * parser;
33
34/* Global declarations for statistics. */
35
36int comment_count = 0;
37int comment_length = 0;
38int CDATA_count = 0;
39int CDATA_length = 0;
40int PI_count = 0;
41int PI_length = 0;
42int empty_elem_count = 0;
43int empty_elem_length = 0;
44int start_tag_count = 0;
45int start_tag_length = 0;
46int attribute_count = 0;
47int end_tag_count = 0;
48int end_tag_length = 0;
49int reference_count = 0;
50int reference_length = 0;
51int text_item_count = 0;
52int text_item_length = 0;
53int error_item_count = 0;
54int error_item_length = 0;
55int nesting_depth = 0;
56int max_nesting_depth = 0;
57int total_attribute_count = 0;
58int total_att_name_length = 0;
59int total_att_value_length = 0;
60int namespace_count = 0;
61int total_namespace_name_length = 0;
62int total_namespace_URI_length = 0;
63
64
65/* Action routine for an XML comment in "<!--"  "-->" brackets. */
66//template <CodeUnit_Base C>
67static inline void Comment_action(int start_pos, int end_pos) {
68        comment_count +=1;
69        comment_length += end_pos - start_pos;
70}
71
72/* Action routine for a CDATA section enclosed in "<![CDATA[" and "]]>" brackets. */
73//template <CodeUnit_Base C>
74static inline void CDATA_action(int start_pos, int end_pos) {
75        CDATA_count +=1;
76        CDATA_length += end_pos - start_pos;
77}
78
79/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
80//template <CodeUnit_Base C>
81static inline void PI_action(int start_pos, int end_pos) {
82        PI_count +=1;
83        PI_length += end_pos - start_pos;
84}
85
86/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
87//template <CodeUnit_Base C>
88static inline void EmptyElement_action(int start_pos, int end_pos) {
89        empty_elem_count +=1;
90        empty_elem_length += end_pos - start_pos;
91}
92
93/* Action routine for a start tag enclosed in "<" and ">" brackets. */
94//template <CodeUnit_Base C>
95static inline void StartTag_action(int start_pos, int end_pos) {
96        start_tag_count +=1;
97        start_tag_length += end_pos - start_pos;
98        nesting_depth += 1;
99        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
100}
101
102/* Action routine for an end tag enclosed in "</" and ">" brackets. */
103//template <CodeUnit_Base C>
104static inline void EndTag_action(int start_pos, int end_pos) {
105        end_tag_count +=1;
106        end_tag_length += end_pos - start_pos;
107        nesting_depth -= 1;
108}
109
110/* Action routine for an error item */
111//template <CodeUnit_Base C>
112static inline void Error_action(int start_pos, int end_pos) {
113        error_item_count +=1;
114        error_item_length += end_pos - start_pos;
115        printf("Error: illegal markup at positions %i through %i.\n", start_pos, end_pos);
116
117        cout << string((char *) parser->GetCodeUnitPtr(start_pos), end_pos - start_pos) << endl;
118
119
120
121}
122
123/* Action routine for a text item */
124//template <CodeUnit_Base C>
125static inline void Text_action(int start_pos, int end_pos) {
126        text_item_count +=1;
127        text_item_length += end_pos - start_pos;
128}
129
130//template <CodeUnit_Base C>
131static inline void Reference_action(int start_pos, int end_pos) {
132        reference_count +=1;
133        reference_length += end_pos - start_pos;
134}
135
136
137
138
139/* Three action routines for markup components are defined as follows.
140|ElementName_action| is the action routine called upon recognition of
141an element name immediately after the opening angle bracket of a start
142tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
143AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
144It is called with two parameters identifying the
145first and last character positions of the expected XML_name.
146Similarly, |PI_Target_action| is the action routine called upon recognition
147of the XML Name that occurs immediately after the opening "<?"
148delimiter of a processing instruction.
149
150 The third action routine for markup components is Attribute_Value_action,
151which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
152THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
153TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
154*/
155
156/* Semantic action routines for markup components. */
157/* Action routine for an element name occurring immediately after the
158   opening "<" of a start tag or empty element tag. */
159//template <CodeUnit_Base C>
160static inline void ElementName_action(int start_pos, int end_pos) {
161}
162
163/* Action routine for a processing instruction target name occurring immediately
164   after the opening "<?" of a processing instruction. */
165//template <CodeUnit_Base C>
166static inline void PI_Target_action(int start_pos, int end_pos) {
167}
168
169/* Action routine for an individual attribute/value pair occurring in
170   a element start tag or an empty element tag. */
171//template <CodeUnit_Base C>
172static inline void AttributeValue_action(int name_start, int name_end, 
173                                  int val_start, int val_end) {
174        total_attribute_count+=1;
175        total_att_name_length += name_end - name_start;
176        total_att_value_length += val_end - val_start;
177}
178
179/* Action routine for an individual attribute/value pair occurring in
180   a element start tag or an empty element tag. */
181//template <CodeUnit_Base C>
182static inline void Namespace_action(int name_start, int name_end, 
183                             int URI_start, int URI_end) {
184        namespace_count+=1;
185        total_namespace_name_length += name_end - name_start;
186        total_namespace_URI_length += URI_end - URI_start;
187}
188
189
190//template <CodeUnit_Base C>
191static inline void FinalizeBuffer_action() {
192}
193
194
195#define print_stats(stat_string, count, total_lgth) \
196        printf("%i %s", count, stat_string);\
197        if (count == 0) printf("s.\n");\
198        else if (count == 1) printf(" of length %i.\n", total_lgth);\
199        else printf("s of avg. lgth %i.\n", total_lgth/count);
200
201
202
203
204int
205main(int argc, char * argv[]) {
206        if (argc != 2) {
207        printf("Usage: %s <filename>\n", argv[0]);
208                exit(-1);
209        }
210        char * filename = argv[1];
211       
212       
213        parser = Parser_Interface::ParserFactory(filename);
214       
215       
216        if (!parser->has_ByteOrderMark()) printf("No ");
217        printf("Byte Order Mark found.\n");
218        if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
219        else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
220        else printf ("XML version 1.0 implied by default.\n");
221        if (parser->has_EncodingDecl()) {
222        printf("XML encoding named at positions %i of length %i\n", 
223                parser->get_Encoding_pos(), parser->get_Encoding_lgth());
224        }
225        if (parser->standalone_status() == Standalone_yes) 
226                printf("XML standalone = yes declared.\n");
227        else if (parser->standalone_status() == Standalone_no) 
228                printf("XML standalone = no declared.\n");
229        else printf ("XML standalone = no by default.\n");
230       
231        parser->ParseContent();
232       
233        print_stats("comment", comment_count, comment_length);
234        print_stats("CDATA section", CDATA_count, CDATA_length);
235        print_stats("processing instruction", PI_count, PI_length);
236        print_stats("empty element", empty_elem_count, empty_elem_length);
237        print_stats("start tag", start_tag_count, start_tag_length);
238        printf("%i total attributes\n", attribute_count);
239        print_stats("attribute name", total_attribute_count, total_att_name_length);
240        print_stats("attribute value", total_attribute_count, total_att_value_length);
241        print_stats("namespace name", namespace_count, total_namespace_name_length);
242        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
243        print_stats("end tag", end_tag_count, end_tag_length);
244        print_stats("text item", text_item_count, text_item_length);
245        print_stats("reference", reference_count, reference_length);
246        print_stats("error item", error_item_count, error_item_length);
247        printf("Maximum nesting depth = %i\n", max_nesting_depth);
248       
249        return(0);
250}
Note: See TracBrowser for help on using the repository browser.