source: trunk/markup_stats.cxx @ 75

Last change on this file since 75 was 74, checked in by cameron, 11 years ago

markup_stats.cxx updates reflecting reorganization

File size: 10.9 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <unistd.h>
16#include <string>
17#include <iostream>
18using namespace std;
19
20#ifdef CODE_CLOCKING
21#include "codeclocker/clocker/code_clocker_session.h"
22#include "codeclocker/clocker/code_clocker_session.c"
23Code_Clocker * total_clocker;
24#endif
25
26
27
28//#include "src/ilax.h"
29#include "src/engine.h"
30
31#ifndef REPEAT_RUNS
32#define REPEAT_RUNS 1
33#endif
34
35/* Internals */
36#include "src/xmlmodel.h"
37#include "src/bitplex.h"
38#include "src/byteplex.h"
39#include "src/xmldecl.h"
40#include "src/bitlex.h"
41
42
43#include "src/xmlmodel.c"
44#include "src/bitplex.c"
45#include "src/byteplex.c"
46#include "src/xmldecl.c"
47#include "src/bitlex.c"
48#include "src/engine.c"
49
50/* Global declarations of parsing engine. */
51Parser_Interface * parser;
52
53/* Global declarations for statistics. */
54
55int comment_count = 0;
56int comment_length = 0;
57int CDATA_count = 0;
58int CDATA_length = 0;
59int PI_count = 0;
60int PI_length = 0;
61int empty_elem_count = 0;
62int empty_elem_length = 0;
63int start_tag_count = 0;
64int start_tag_length = 0;
65int attribute_count = 0;
66int end_tag_count = 0;
67int end_tag_length = 0;
68int reference_count = 0;
69int reference_length = 0;
70int text_item_count = 0;
71int text_item_length = 0;
72int error_item_count = 0;
73int error_item_length = 0;
74int nesting_depth = 0;
75int max_nesting_depth = 0;
76int total_attribute_count = 0;
77int total_att_name_length = 0;
78int total_att_value_length = 0;
79int namespace_count = 0;
80int total_namespace_name_length = 0;
81int total_namespace_URI_length = 0;
82
83int last_item_start = 0;
84int last_item_stop = 0;
85int last_buffer_rel_pos = 0;
86
87void LastItemAction(int start_pos, int end_pos, int buffer_rel_pos) {
88#ifdef DEBUG
89        if (start_pos < last_item_stop) {
90                printf("Anomaly start_pos %i < last_item_stop_pos %i \n", start_pos, last_item_stop);
91                printf("rel_positions: last = %i, this = %i\n", last_buffer_rel_pos, buffer_rel_pos);
92        }
93#endif
94        last_item_start = start_pos;
95        last_item_stop = end_pos;
96        last_buffer_rel_pos = buffer_rel_pos;
97}
98
99/* Action routine for an XML comment in "<!--"  "-->" brackets. */
100template <CodeUnit_Base C>
101inline void ParsingEngine<C>::Comment_action(int start_pos, int end_pos) {
102        comment_count +=1;
103        comment_length += end_pos - start_pos;
104        LastItemAction(start_pos, end_pos, buffer_rel_pos);
105}
106
107/* Action routine for a CDATA section enclosed in "<![CDATA[" and "]]>" brackets. */
108template <CodeUnit_Base C>
109inline void ParsingEngine<C>::CDATA_action(int start_pos, int end_pos) {
110        CDATA_count +=1;
111        CDATA_length += end_pos - start_pos;
112        LastItemAction(start_pos, end_pos, buffer_rel_pos);
113}
114
115/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
116template <CodeUnit_Base C>
117inline void ParsingEngine<C>::PI_action(int start_pos, int end_pos) {
118        PI_count +=1;
119        PI_length += end_pos - start_pos;
120        LastItemAction(start_pos, end_pos, buffer_rel_pos);
121}
122
123/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
124template <CodeUnit_Base C>
125inline void ParsingEngine<C>::EmptyElement_action(int start_pos, int end_pos) {
126        empty_elem_count +=1;
127        empty_elem_length += end_pos - start_pos;
128        LastItemAction(start_pos, end_pos, buffer_rel_pos);
129}
130
131/* Action routine for a start tag enclosed in "<" and ">" brackets. */
132template <CodeUnit_Base C>
133inline void ParsingEngine<C>::StartTag_action(int start_pos, int end_pos) {
134        start_tag_count +=1;
135        start_tag_length += end_pos - start_pos;
136        nesting_depth += 1;
137        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
138        LastItemAction(start_pos, end_pos, buffer_rel_pos);
139}
140
141/* Action routine for an end tag enclosed in "</" and ">" brackets. */
142template <CodeUnit_Base C>
143inline void ParsingEngine<C>::EndTag_action(int start_pos, int end_pos) {
144        end_tag_count +=1;
145        end_tag_length += end_pos - start_pos;
146        nesting_depth -= 1;
147        LastItemAction(start_pos, end_pos, buffer_rel_pos);
148}
149
150/* Action routine for an error item */
151template <CodeUnit_Base C>
152inline void ParsingEngine<C>::Error_action(int start_pos, int end_pos) {
153        error_item_count +=1;
154        error_item_length += end_pos - start_pos;
155        printf("Error: illegal markup at positions %i through %i.\n", start_pos, end_pos);
156        printf("length = %i; buffer_rel_pos = %i\n", end_pos - start_pos, buffer_rel_pos);
157        printf("last_item from %i to %i (rel_pos = %i)\n", 
158               last_item_start, last_item_stop, last_buffer_rel_pos);
159
160        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos+1 - start_pos) << endl;
161
162
163
164}
165
166/* Action routine for a text item */
167template <CodeUnit_Base C>
168inline void ParsingEngine<C>::Text_action(int start_pos, int end_pos) {
169        text_item_count +=1;
170        text_item_length += end_pos - start_pos;
171        LastItemAction(start_pos, end_pos, buffer_rel_pos);
172}
173
174template <CodeUnit_Base C>
175inline void ParsingEngine<C>::Reference_action(int start_pos, int end_pos) {
176        reference_count +=1;
177        reference_length += end_pos - start_pos;
178        LastItemAction(start_pos, end_pos, buffer_rel_pos);
179}
180
181
182
183
184/* Three action routines for markup components are defined as follows.
185|ElementName_action| is the action routine called upon recognition of
186an element name immediately after the opening angle bracket of a start
187tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
188AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
189It is called with two parameters identifying the
190first and last character positions of the expected XML_name.
191Similarly, |PI_Target_action| is the action routine called upon recognition
192of the XML Name that occurs immediately after the opening "<?"
193delimiter of a processing instruction.
194
195 The third action routine for markup components is Attribute_Value_action,
196which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
197THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
198TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
199*/
200
201/* Semantic action routines for markup components. */
202/* Action routine for an element name occurring immediately after the
203   opening "<" of a start tag or empty element tag. */
204template <CodeUnit_Base C>
205inline void ParsingEngine<C>::ElementName_action(int start_pos, int end_pos) {
206}
207
208/* Action routine for a processing instruction target name occurring immediately
209   after the opening "<?" of a processing instruction. */
210template <CodeUnit_Base C>
211inline void ParsingEngine<C>::PI_Target_action(int start_pos, int end_pos) {
212}
213
214/* Action routine for an individual attribute/value pair occurring in
215   a element start tag or an empty element tag. */
216template <CodeUnit_Base C>
217inline void ParsingEngine<C>::AttributeValue_action(int name_start, int name_end, 
218                                  int val_start, int val_end) {
219        total_attribute_count+=1;
220        total_att_name_length += name_end - name_start;
221        total_att_value_length += val_end - val_start;
222}
223
224/* Action routine for an individual attribute/value pair occurring in
225   a element start tag or an empty element tag. */
226template <CodeUnit_Base C>
227inline void ParsingEngine<C>::Namespace_action(int name_start, int name_end, 
228                             int URI_start, int URI_end) {
229        namespace_count+=1;
230        total_namespace_name_length += name_end - name_start;
231        total_namespace_URI_length += URI_end - URI_start;
232}
233
234
235template <CodeUnit_Base C>
236inline void ParsingEngine<C>::FinalizeBuffer_action(int& preserve_pos) {
237#ifdef DEBUG
238        printf ("FinalizeBuffer; last 16 bytes + lookahead 16 =\n");
239        cout << string((char *) GetCodeUnitPtr(AbsPos()-16), 16) << "::" << string((char *) GetCodeUnitPtr(AbsPos()), 16) << endl;
240#endif
241
242        preserve_pos = AbsPos();
243#ifdef CODE_CLOCKING
244end_Interval (total_clocker, buffer_rel_pos);
245start_Interval (total_clocker);
246#endif
247}
248
249
250template <CodeUnit_Base C>
251inline void ParsingEngine<C>::DocumentStart_action() {
252#ifdef CODE_CLOCKING
253start_Interval (total_clocker);
254#endif
255}
256
257template <CodeUnit_Base C>
258inline void ParsingEngine<C>::DocumentEnd_action() {
259#ifdef CODE_CLOCKING
260end_Interval (total_clocker, buffer_rel_pos);
261#endif
262}
263
264#define print_stats(stat_string, count, total_lgth) \
265        printf("%i %s", count, stat_string);\
266        if (count == 0) printf("s.\n");\
267        else if (count == 1) printf(" of length %i.\n", total_lgth);\
268        else printf("s of avg. lgth %i.\n", total_lgth/count);
269
270
271
272
273int
274main(int argc, char * argv[]) {
275        if (argc != 2) {
276        printf("Usage: %s <filename>\n", argv[0]);
277                exit(-1);
278        }
279        char * filename = argv[1];
280#ifdef CODE_CLOCKING
281
282init_Code_Clocker_Session(8, // num code clockers
283                         64096, // max individual timestamp measurements
284                         4ul, // num session parameters
285                         65536, // num timestamp calibration measurements,
286                            65534,// num constant time loop executions
287                         8 // num constant time loop iterations
288                         );
289set_Session_Cmd_Line_Args(argv[0]);
290total_clocker = register_Code_Clocker("Markup_stats_total", "Total markup_stats time\n");
291#endif
292
293        for (int run = 0; run < REPEAT_RUNS; run++) {
294       
295        parser = Parser_Interface::ParserFactory(filename);
296       
297       
298        if (!parser->has_ByteOrderMark()) printf("No ");
299        printf("Byte Order Mark found.\n");
300
301        if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
302        else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
303        else printf ("XML version 1.0 implied by default.\n");
304        if (parser->has_EncodingDecl()) {
305                printf("XML encoding declared:  %s\n", parser->get_Encoding());
306        }
307        if (parser->standalone_status() == Standalone_yes) 
308                printf("XML standalone = yes declared.\n");
309        else if (parser->standalone_status() == Standalone_no) 
310                printf("XML standalone = no declared.\n");
311        else printf ("XML standalone = no by default.\n");
312       
313        parser->ParseContent();
314        parser->~Parser_Interface();
315        printf("Run %i complete.\n", run);
316        }
317       
318        print_stats("comment", comment_count, comment_length);
319        print_stats("CDATA section", CDATA_count, CDATA_length);
320        print_stats("processing instruction", PI_count, PI_length);
321        print_stats("empty element", empty_elem_count, empty_elem_length);
322        print_stats("start tag", start_tag_count, start_tag_length);
323        printf("%i total attributes\n", attribute_count);
324        print_stats("attribute name", total_attribute_count, total_att_name_length);
325        print_stats("attribute value", total_attribute_count, total_att_value_length);
326        print_stats("namespace name", namespace_count, total_namespace_name_length);
327        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
328        print_stats("end tag", end_tag_count, end_tag_length);
329        print_stats("text item", text_item_count, text_item_length);
330        print_stats("reference", reference_count, reference_length);
331        print_stats("error item", error_item_count, error_item_length);
332        printf("Maximum nesting depth = %i\n", max_nesting_depth);
333#ifdef CODE_CLOCKING
334        write_XML_File();
335#endif
336       
337        return(0);
338}
Note: See TracBrowser for help on using the repository browser.