source: trunk/markup_stats.cxx @ 38

Last change on this file since 38 was 38, checked in by cameron, 11 years ago

parabix-0.40 (prelim)

File size: 7.8 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <unistd.h>
16
17#include "src/ilax.h"
18#include "src/engine.h"
19
20
21/* Internals */
22#include "src/xmlbuffer.h"
23#include "src/xmlbuffer.c"
24#include "src/bitlex.h"
25#include "src/bitlex.c"
26#include "src/engine.c"
27
28/* Global declarations for statistics. */
29
30int comment_count = 0;
31int comment_length = 0;
32int CDATA_count = 0;
33int CDATA_length = 0;
34int PI_count = 0;
35int PI_length = 0;
36int empty_elem_count = 0;
37int empty_elem_length = 0;
38int start_tag_count = 0;
39int start_tag_length = 0;
40int attribute_count = 0;
41int end_tag_count = 0;
42int end_tag_length = 0;
43int reference_count = 0;
44int reference_length = 0;
45int text_item_count = 0;
46int text_item_length = 0;
47int error_item_count = 0;
48int error_item_length = 0;
49int nesting_depth = 0;
50int max_nesting_depth = 0;
51int total_attribute_count = 0;
52int total_att_name_length = 0;
53int total_att_value_length = 0;
54int namespace_count = 0;
55int total_namespace_name_length = 0;
56int total_namespace_URI_length = 0;
57
58
59/* Action routine for an XML comment in "<!--"  "-->" brackets. */
60//template <CodeUnit_Base C>
61static inline void Comment_action(int start_pos, int end_pos) {
62        comment_count +=1;
63        comment_length += end_pos - start_pos;
64}
65
66/* Action routine for a CDATA section enclosed in "<![CDATA[" and "]]>" brackets. */
67//template <CodeUnit_Base C>
68static inline void CDATA_action(int start_pos, int end_pos) {
69        CDATA_count +=1;
70        CDATA_length += end_pos - start_pos;
71}
72
73/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
74//template <CodeUnit_Base C>
75static inline void PI_action(int start_pos, int end_pos) {
76        PI_count +=1;
77        PI_length += end_pos - start_pos;
78}
79
80/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
81//template <CodeUnit_Base C>
82static inline void EmptyElement_action(int start_pos, int end_pos) {
83        empty_elem_count +=1;
84        empty_elem_length += end_pos - start_pos;
85}
86
87/* Action routine for a start tag enclosed in "<" and ">" brackets. */
88//template <CodeUnit_Base C>
89static inline void StartTag_action(int start_pos, int end_pos) {
90        start_tag_count +=1;
91        start_tag_length += end_pos - start_pos;
92        nesting_depth += 1;
93        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
94}
95
96/* Action routine for an end tag enclosed in "</" and ">" brackets. */
97//template <CodeUnit_Base C>
98static inline void EndTag_action(int start_pos, int end_pos) {
99        end_tag_count +=1;
100        end_tag_length += end_pos - start_pos;
101        nesting_depth -= 1;
102}
103
104/* Action routine for an error item */
105//template <CodeUnit_Base C>
106static inline void Error_action(int start_pos, int end_pos) {
107        error_item_count +=1;
108        error_item_length += end_pos - start_pos;
109        printf("Error: illegal markup at positions %i through %i.\n", start_pos, end_pos);
110}
111
112/* Action routine for a text item */
113//template <CodeUnit_Base C>
114static inline void Text_action(int start_pos, int end_pos) {
115        text_item_count +=1;
116        text_item_length += end_pos - start_pos;
117}
118
119//template <CodeUnit_Base C>
120static inline void Reference_action(int start_pos, int end_pos) {
121        reference_count +=1;
122        reference_length += end_pos - start_pos;
123}
124
125
126
127
128/* Three action routines for markup components are defined as follows.
129|ElementName_action| is the action routine called upon recognition of
130an element name immediately after the opening angle bracket of a start
131tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
132AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
133It is called with two parameters identifying the
134first and last character positions of the expected XML_name.
135Similarly, |PI_Target_action| is the action routine called upon recognition
136of the XML Name that occurs immediately after the opening "<?"
137delimiter of a processing instruction.
138
139 The third action routine for markup components is Attribute_Value_action,
140which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
141THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
142TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
143*/
144
145/* Semantic action routines for markup components. */
146/* Action routine for an element name occurring immediately after the
147   opening "<" of a start tag or empty element tag. */
148//template <CodeUnit_Base C>
149static inline void ElementName_action(int start_pos, int end_pos) {
150}
151
152/* Action routine for a processing instruction target name occurring immediately
153   after the opening "<?" of a processing instruction. */
154//template <CodeUnit_Base C>
155static inline void PI_Target_action(int start_pos, int end_pos) {
156}
157
158/* Action routine for an individual attribute/value pair occurring in
159   a element start tag or an empty element tag. */
160//template <CodeUnit_Base C>
161static inline void AttributeValue_action(int name_start, int name_end, 
162                                  int val_start, int val_end) {
163        total_attribute_count+=1;
164        total_att_name_length += name_end - name_start;
165        total_att_value_length += val_end - val_start;
166}
167
168/* Action routine for an individual attribute/value pair occurring in
169   a element start tag or an empty element tag. */
170//template <CodeUnit_Base C>
171static inline void Namespace_action(int name_start, int name_end, 
172                             int URI_start, int URI_end) {
173        namespace_count+=1;
174        total_namespace_name_length += name_end - name_start;
175        total_namespace_URI_length += URI_end - URI_start;
176}
177
178
179//template <CodeUnit_Base C>
180static inline void FinalizeBuffer_action() {
181}
182
183
184#define print_stats(stat_string, count, total_lgth) \
185        printf("%i %s", count, stat_string);\
186        if (count == 0) printf("s.\n");\
187        else if (count == 1) printf(" of length %i.\n", total_lgth);\
188        else printf("s of avg. lgth %i.\n", total_lgth/count);
189
190
191int
192main(int argc, char * argv[]) {
193        if (argc != 2) {
194        printf("Usage: %s <filename>\n", argv[0]);
195                exit(-1);
196        }
197        char * filename = argv[1];
198       
199       
200        Parser_Interface * parser = Parser_Interface::ParserFactory(filename);
201       
202       
203        if (!parser->has_ByteOrderMark()) printf("No ");
204        printf("Byte Order Mark found.\n");
205        if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
206        else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
207        else printf ("XML version 1.0 implied by default.\n");
208        if (parser->has_EncodingDecl()) {
209        printf("XML encoding named at positions %i of length %i\n", 
210                parser->get_Encoding_pos(), parser->get_Encoding_lgth());
211        }
212        if (parser->standalone_status() == Standalone_yes) 
213                printf("XML standalone = yes declared.\n");
214        else if (parser->standalone_status() == Standalone_no) 
215                printf("XML standalone = no declared.\n");
216        else printf ("XML standalone = no by default.\n");
217       
218        parser->ParseContent();
219       
220        print_stats("comment", comment_count, comment_length);
221        print_stats("CDATA section", CDATA_count, CDATA_length);
222        print_stats("processing instruction", PI_count, PI_length);
223        print_stats("empty element", empty_elem_count, empty_elem_length);
224        print_stats("start tag", start_tag_count, start_tag_length);
225        printf("%i total attributes\n", attribute_count);
226        print_stats("attribute name", total_attribute_count, total_att_name_length);
227        print_stats("attribute value", total_attribute_count, total_att_value_length);
228        print_stats("namespace name", namespace_count, total_namespace_name_length);
229        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
230        print_stats("end tag", end_tag_count, end_tag_length);
231        print_stats("text item", text_item_count, text_item_length);
232        print_stats("reference", reference_count, reference_length);
233        print_stats("error item", error_item_count, error_item_length);
234        printf("Maximum nesting depth = %i\n", max_nesting_depth);
235       
236        return(0);
237}
Note: See TracBrowser for help on using the repository browser.