source: tags/parabix-0.39/markup_stats.cxx @ 4027

Last change on this file since 4027 was 29, checked in by cameron, 11 years ago

Parameterizing ParsingEngine? for ASCII/EBCDIC

File size: 7.6 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <unistd.h>
16
17#include "src/xmlbuffer.h"
18#include "src/xmlbuffer.c"
19
20#include "src/charsets/charset_family.h"
21#include "src/bitlex.h"
22#include "src/bitlex.c"
23#include "src/charsets/ext_ascii_8.h"
24#include "src/charsets/ext_ascii_8.c"
25#include "src/charsets/ext_ascii_16.h"
26#include "src/charsets/ext_ascii_16.c"
27
28#include "src/ilax.h"
29#include "src/engine.h"
30#include "src/engine.c"
31
32/* Global declarations for statistics. */
33
34  int comment_count = 0;
35  int comment_length = 0;
36  int CDATA_count = 0;
37  int CDATA_length = 0;
38  int PI_count = 0;
39  int PI_length = 0;
40  int empty_elem_count = 0;
41  int empty_elem_length = 0;
42  int start_tag_count = 0;
43  int start_tag_length = 0;
44  int attribute_count = 0;
45  int end_tag_count = 0;
46  int end_tag_length = 0;
47  int reference_count = 0;
48  int reference_length = 0;
49  int text_item_count = 0;
50  int text_item_length = 0;
51  int error_item_count = 0;
52  int error_item_length = 0;
53  int nesting_depth = 0;
54  int max_nesting_depth = 0;
55  int total_attribute_count = 0;
56  int total_att_name_length = 0;
57  int total_att_value_length = 0;
58  int namespace_count = 0;
59  int total_namespace_name_length = 0;
60  int total_namespace_URI_length = 0;
61
62
63/* Action routine for an XML comment in "<!--"  "-->" brackets. */
64static inline void Comment_action(int start_pos, int end_pos) {
65  comment_count +=1;
66  comment_length += end_pos - start_pos;
67}
68
69/* Action routine for a CDATA section enclosed in "<![CDATA[" and "]]>" brackets. */
70static inline void CDATA_action(int start_pos, int end_pos) {
71  CDATA_count +=1;
72  CDATA_length += end_pos - start_pos;
73}
74
75/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
76static inline void PI_action(int start_pos, int end_pos) {
77  PI_count +=1;
78  PI_length += end_pos - start_pos;
79}
80
81/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
82static inline void EmptyElement_action(int start_pos, int end_pos) {
83  empty_elem_count +=1;
84  empty_elem_length += end_pos - start_pos;
85}
86
87/* Action routine for a start tag enclosed in "<" and ">" brackets. */
88static inline void StartTag_action(int start_pos, int end_pos) {
89  start_tag_count +=1;
90  start_tag_length += end_pos - start_pos;
91  nesting_depth += 1;
92  if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
93}
94
95/* Action routine for an end tag enclosed in "</" and ">" brackets. */
96static inline void EndTag_action(int start_pos, int end_pos) {
97  end_tag_count +=1;
98  end_tag_length += end_pos - start_pos;
99  nesting_depth -= 1;
100}
101
102/* Action routine for an error item */
103static inline void Error_action(int start_pos, int end_pos) {
104  error_item_count +=1;
105  error_item_length += end_pos - start_pos;
106  printf("Error: illegal markup at positions %i through %i.\n", start_pos, end_pos);
107}
108
109/* Action routine for a text item */
110static inline void Text_action(int start_pos, int end_pos) {
111  text_item_count +=1;
112  text_item_length += end_pos - start_pos;
113}
114
115static inline void Reference_action(int start_pos, int end_pos) {
116  reference_count +=1;
117  reference_length += end_pos - start_pos;
118}
119
120
121
122
123/* Three action routines for markup components are defined as follows.
124|ElementName_action| is the action routine called upon recognition of
125an element name immediately after the opening angle bracket of a start
126tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
127AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
128It is called with two parameters identifying the
129first and last character positions of the expected XML_name.
130Similarly, |PI_Target_action| is the action routine called upon recognition
131of the XML Name that occurs immediately after the opening "<?"
132delimiter of a processing instruction.
133
134 The third action routine for markup components is Attribute_Value_action,
135which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
136THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
137TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
138*/
139
140/* Semantic action routines for markup components. */
141/* Action routine for an element name occurring immediately after the
142   opening "<" of a start tag or empty element tag. */
143static inline void ElementName_action(int start_pos, int end_pos) {
144}
145
146/* Action routine for a processing instruction target name occurring immediately
147   after the opening "<?" of a processing instruction. */
148static inline void PI_Target_action(int start_pos, int end_pos) {
149}
150
151/* Action routine for an individual attribute/value pair occurring in
152   a element start tag or an empty element tag. */
153static inline void AttributeValue_action(int name_start, int name_end, 
154                                  int val_start, int val_end) {
155  total_attribute_count+=1;
156  total_att_name_length += name_end - name_start;
157  total_att_value_length += val_end - val_start;
158}
159
160/* Action routine for an individual attribute/value pair occurring in
161   a element start tag or an empty element tag. */
162static inline void Namespace_action(int name_start, int name_end, 
163                             int URI_start, int URI_end) {
164  namespace_count+=1;
165  total_namespace_name_length += name_end - name_start;
166  total_namespace_URI_length += URI_end - URI_start;
167}
168
169
170static inline void FinalizeBuffer_action() {
171}
172
173
174#define print_stats(stat_string, count, total_lgth) \
175  printf("%i %s", count, stat_string);\
176  if (count == 0) printf("s.\n");\
177  else if (count == 1) printf(" of length %i.\n", total_lgth);\
178  else printf("s of avg. lgth %i.\n", total_lgth/count);
179
180
181int
182main(int argc, char * argv[]) {
183  if (argc != 2) {
184    printf("Usage: %s <filename>\n", argv[0]);
185          exit(-1);
186  }
187  char * filename = argv[1];
188
189  Entity_Declaration_Info xml_info;
190
191  ParsingEngine<ASCII> parser = ParsingEngine<ASCII>(filename);
192  parser.InitLexer();
193
194  parser.ReadXmlInfo(xml_info);
195  printf("has_ByteOrderMark = %i\n", xml_info.has_ByteOrderMark);
196  if (xml_info.has_version_decl) {
197    printf("XML version 1.%i declared.\n", xml_info.version);
198  }
199  else printf ("XML version 1.0 implied by default.\n");
200  if (xml_info.has_encoding_decl) {
201    printf("XML encoding named at positions %i through %i\n", 
202           xml_info.encoding_start_pos, xml_info.encoding_end_pos);
203  }
204  if (xml_info.has_standalone_decl) {
205    printf("XML standalone = %i declared.\n", xml_info.standalone);
206  }
207  else printf ("XML standalone = 0 by default.\n");
208
209  parser.ParseContent();
210
211  print_stats("comment", comment_count, comment_length);
212  print_stats("CDATA section", CDATA_count, CDATA_length);
213  print_stats("processing instruction", PI_count, PI_length);
214  print_stats("empty element", empty_elem_count, empty_elem_length);
215  print_stats("start tag", start_tag_count, start_tag_length);
216  printf("%i total attributes\n", attribute_count);
217  print_stats("attribute name", total_attribute_count, total_att_name_length);
218  print_stats("attribute value", total_attribute_count, total_att_value_length);
219  print_stats("namespace name", namespace_count, total_namespace_name_length);
220  print_stats("namespace URI", namespace_count, total_namespace_URI_length);
221  print_stats("end tag", end_tag_count, end_tag_length);
222  print_stats("text item", text_item_count, text_item_length);
223  print_stats("reference", reference_count, reference_length);
224  print_stats("error item", error_item_count, error_item_length);
225  printf("Maximum nesting depth = %i\n", max_nesting_depth);
226
227  return(0);
228}
Note: See TracBrowser for help on using the repository browser.