source: trunk/markup_stats.cxx @ 91

Last change on this file since 91 was 91, checked in by cameron, 11 years ago

ATTLIST semantics

File size: 12.8 KB
Line 
1/*  markup_stats.c - parabix demo program
2    Copyright (c) 2007, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <errno.h>
13#include <sys/types.h>
14#include <sys/stat.h>
15#include <string>
16#include <iostream>
17using namespace std;
18
19#ifdef CODE_CLOCKING
20#include "codeclocker/clocker/code_clocker_session.h"
21#include "codeclocker/clocker/code_clocker_session.c"
22Code_Clocker * total_clocker;
23#endif
24
25
26
27//#include "src/ilax.h"
28#include "src/engine.h"
29
30#ifndef REPEAT_RUNS
31#define REPEAT_RUNS 1
32#endif
33
34/* Internals */
35#include "src/xmlmodel.h"
36#include "src/bitplex.h"
37#include "src/byteplex.h"
38#include "src/xmldecl.h"
39#include "src/bitlex.h"
40
41
42#include "src/xmlmodel.c"
43#include "src/bitplex.c"
44#include "src/byteplex.c"
45#include "src/xmldecl.c"
46#include "src/bitlex.c"
47#include "src/engine.c"
48
49/* Global declarations of parsing engine. */
50Parser_Interface * parser;
51
52/* Global declarations for statistics. */
53
54int comment_count = 0;
55int comment_length = 0;
56int CDATA_start_count = 0;
57int CDATA_start_pos = 0;
58int CDATA_length = 0;
59int CDATA_end_count = 0;
60int PI_count = 0;
61int PI_length = 0;
62int empty_elem_count = 0;
63int empty_elem_length = 0;
64int start_tag_count = 0;
65int start_tag_length = 0;
66int attribute_count = 0;
67int end_tag_count = 0;
68int end_tag_length = 0;
69int reference_count = 0;
70int reference_length = 0;
71int text_item_count = 0;
72int text_item_length = 0;
73int error_item_count = 0;
74int error_item_length = 0;
75int nesting_depth = 0;
76int max_nesting_depth = 0;
77int total_attribute_count = 0;
78int total_att_name_length = 0;
79int total_att_value_length = 0;
80int namespace_count = 0;
81int total_namespace_name_length = 0;
82int total_namespace_URI_length = 0;
83
84int last_item_start = 0;
85int last_item_stop = 0;
86int last_buffer_rel_pos = 0;
87
88void LastItemAction(int start_pos, int end_pos, int buffer_rel_pos) {
89#ifdef DEBUG
90        if (start_pos < last_item_stop) {
91                printf("Anomaly start_pos %i < last_item_stop_pos %i \n", start_pos, last_item_stop);
92                printf("rel_positions: last = %i, this = %i\n", last_buffer_rel_pos, buffer_rel_pos);
93        }
94#endif
95        last_item_start = start_pos;
96        last_item_stop = end_pos;
97        last_buffer_rel_pos = buffer_rel_pos;
98}
99
100/* Action routine for an XML comment in "<!--"  "-->" brackets. */
101template <CodeUnit_Base C>
102inline void ParsingEngine<C>::Comment_action(int start_pos, int end_pos) {
103        comment_count +=1;
104        comment_length += end_pos - start_pos;
105        LastItemAction(start_pos, end_pos, buffer_rel_pos);
106}
107
108/* Action routine called upon recognizing "<![CDATA[" to start a CDATA section. */
109template <CodeUnit_Base C>
110inline void ParsingEngine<C>::CDATA_start_action(int pos) {
111        CDATA_start_pos = pos - 9;
112        CDATA_start_count +=1;
113        LastItemAction(pos-9, pos, buffer_rel_pos);
114}
115
116/* Action routine called upon recognizing "]]>" to end a CDATA section. */
117template <CodeUnit_Base C>
118inline void ParsingEngine<C>::CDATA_end_action(int pos) {
119        CDATA_end_count +=1;
120        CDATA_length += pos - CDATA_start_pos;
121        LastItemAction(pos-3, pos, buffer_rel_pos);
122}
123
124/* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
125template <CodeUnit_Base C>
126inline void ParsingEngine<C>::PI_action(int start_pos, int end_pos) {
127        PI_count +=1;
128        PI_length += end_pos - start_pos;
129        LastItemAction(start_pos, end_pos, buffer_rel_pos);
130}
131
132/* Action routine for an empty element enclosed in "<" and "/>" brackets. */
133template <CodeUnit_Base C>
134inline void ParsingEngine<C>::EmptyElement_action(int start_pos, int end_pos) {
135        empty_elem_count +=1;
136        empty_elem_length += end_pos - start_pos;
137        LastItemAction(start_pos, end_pos, buffer_rel_pos);
138}
139
140/* Action routine for a start tag enclosed in "<" and ">" brackets. */
141template <CodeUnit_Base C>
142inline void ParsingEngine<C>::StartTag_action(int start_pos, int end_pos) {
143        start_tag_count +=1;
144        start_tag_length += end_pos - start_pos;
145        nesting_depth += 1;
146        if (nesting_depth > max_nesting_depth) max_nesting_depth = nesting_depth;
147        LastItemAction(start_pos, end_pos, buffer_rel_pos);
148}
149
150/* Action routine for an end tag enclosed in "</" and ">" brackets. */
151template <CodeUnit_Base C>
152inline void ParsingEngine<C>::EndTag_action(int start_pos, int end_pos) {
153        end_tag_count +=1;
154        end_tag_length += end_pos - start_pos;
155        nesting_depth -= 1;
156        LastItemAction(start_pos, end_pos, buffer_rel_pos);
157}
158
159/* Action routine for an error item */
160template <CodeUnit_Base C>
161inline void ParsingEngine<C>::Error_action(int start_pos, int end_pos) {
162        error_item_count +=1;
163        error_item_length += end_pos - start_pos;
164        printf("Error: illegal markup at positions %i through %i.\n", start_pos, end_pos);
165        printf("length = %i; buffer_rel_pos = %i\n", end_pos - start_pos, buffer_rel_pos);
166        printf("last_item from %i to %i (rel_pos = %i)\n", 
167               last_item_start, last_item_stop, last_buffer_rel_pos);
168
169        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos+1 - start_pos) << endl;
170
171
172
173}
174
175/* Action routine for a text item */
176template <CodeUnit_Base C>
177inline void ParsingEngine<C>::Text_action(int start_pos, int end_pos) {
178        text_item_count +=1;
179        text_item_length += end_pos - start_pos;
180        LastItemAction(start_pos, end_pos, buffer_rel_pos);
181}
182
183template <CodeUnit_Base C>
184inline void ParsingEngine<C>::Reference_action(int start_pos, int end_pos) {
185        reference_count +=1;
186        reference_length += end_pos - start_pos;
187        LastItemAction(start_pos, end_pos, buffer_rel_pos);
188}
189
190
191
192
193/* Three action routines for markup components are defined as follows.
194|ElementName_action| is the action routine called upon recognition of
195an element name immediately after the opening angle bracket of a start
196tag or empty element tag.  {\bf OR MAYBE THIS SHOULD BE DEFERRED UNTIL
197AFTER ATTRIBUTE PROCESSING SO THAT NAMESPACES ARE SET?}
198It is called with two parameters identifying the
199first and last character positions of the expected XML_name.
200Similarly, |PI_Target_action| is the action routine called upon recognition
201of the XML Name that occurs immediately after the opening "<?"
202delimiter of a processing instruction.
203
204 The third action routine for markup components is Attribute_Value_action,
205which takes three parameters rather than two.  {\bf OR POSSIBLY JUST
206THE QUOTE MARK ITEMS, RELYING ON THE END OF THE LAST COMPONENT PROCESSED
207TO MARK THE SPACE BEFORE THE ATT NAME.- REQUIRES ELEMENT_NAME_ACTION}
208*/
209
210/* Semantic action routines for markup components. */
211/* Action routine for an element name occurring immediately after the
212   opening "<" of a start tag or empty element tag. */
213template <CodeUnit_Base C>
214inline void ParsingEngine<C>::ElementName_action(int start_pos, int end_pos) {
215}
216
217/* Action routine for a processing instruction target name occurring immediately
218   after the opening "<?" of a processing instruction. */
219template <CodeUnit_Base C>
220inline void ParsingEngine<C>::PI_Target_action(int start_pos, int end_pos) {
221}
222
223/* Action routine for an individual attribute/value pair occurring in
224   a element start tag or an empty element tag. */
225template <CodeUnit_Base C>
226inline void ParsingEngine<C>::AttributeValue_action(int name_start, int name_end, 
227                                  int val_start, int val_end) {
228        total_attribute_count+=1;
229        total_att_name_length += name_end - name_start;
230        total_att_value_length += val_end - val_start;
231}
232
233/* Action routine for an individual attribute/value pair occurring in
234   a element start tag or an empty element tag. */
235template <CodeUnit_Base C>
236inline void ParsingEngine<C>::Namespace_action(int name_start, int name_end, 
237                             int URI_start, int URI_end) {
238        namespace_count+=1;
239        total_namespace_name_length += name_end - name_start;
240        total_namespace_URI_length += URI_end - URI_start;
241}
242
243
244template <CodeUnit_Base C>
245inline void ParsingEngine<C>::FinalizeBuffer_action(int& preserve_pos) {
246#ifdef DEBUG
247        printf ("FinalizeBuffer; last 16 bytes + lookahead 16 =\n");
248        cout << string((char *) GetCodeUnitPtr(AbsPos()-16), 16) << "::" << string((char *) GetCodeUnitPtr(AbsPos()), 16) << endl;
249#endif
250
251        preserve_pos = AbsPos();
252#ifdef CODE_CLOCKING
253end_Interval (total_clocker, buffer_rel_pos);
254start_Interval (total_clocker);
255#endif
256}
257
258
259template <CodeUnit_Base C>
260inline void ParsingEngine<C>::DocumentStart_action() {
261#ifdef CODE_CLOCKING
262start_Interval (total_clocker);
263#endif
264}
265
266template <CodeUnit_Base C>
267inline void ParsingEngine<C>::DocumentEnd_action() {
268#ifdef CODE_CLOCKING
269end_Interval (total_clocker, buffer_rel_pos);
270#endif
271}
272
273#define print_stats(stat_string, count, total_lgth) \
274        printf("%i %s", count, stat_string);\
275        if (count == 0) printf("s.\n");\
276        else if (count == 1) printf(" of length %i.\n", total_lgth);\
277        else printf("s of avg. lgth %i.\n", total_lgth/count);
278
279
280template <CodeUnit_Base C>
281inline void ParsingEngine<C>::Doctype_action(int start_pos, int end_pos) {
282#ifdef SHOW_DTD_ACTIONS
283        printf("Document Type:\n");
284        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
285#endif
286}
287
288template <CodeUnit_Base C>
289inline void ParsingEngine<C>::PEReference_action(int start_pos, int end_pos) {
290}
291
292template <CodeUnit_Base C>
293inline void ParsingEngine<C>::Elementdecl_action(int start_pos, int end_pos) {
294#ifdef SHOW_DTD_ACTIONS
295        printf("Elementdecl:\n");
296        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
297#endif
298}
299
300template <CodeUnit_Base C>
301inline void ParsingEngine<C>::AttlistDecl_action(int start_pos, int end_pos) {
302#ifdef SHOW_DTD_ACTIONS
303        printf("AttlistDecl:\n");
304        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
305#endif
306}
307
308template <CodeUnit_Base C>
309inline void ParsingEngine<C>::Entitydecl_action(int entity_name_start, int entity_name_end, int start_pos, int end_pos) {
310#ifdef SHOW_DTD_ACTIONS
311        printf("Entitydecl:\n");
312        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
313#endif
314}
315
316template <CodeUnit_Base C>
317inline void ParsingEngine<C>::Notationdecl_action(int start_pos, int end_pos) {
318#ifdef SHOW_DTD_ACTIONS
319        printf("Notationdecl:\n");
320        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
321#endif
322}
323
324template <CodeUnit_Base C>
325inline void ParsingEngine<C>::Prolog_action(int start_pos, int end_pos) {
326#ifdef SHOW_DTD_ACTIONS
327        printf("Prolog:\n");
328        cout << string((char *) GetCodeUnitPtr(start_pos), end_pos-start_pos) <<endl;
329#endif
330}
331
332int
333main(int argc, char * argv[]) {
334        if (argc != 2) {
335        printf("Usage: %s <filename>\n", argv[0]);
336                exit(-1);
337        }
338        char * filename = argv[1];
339#ifdef CODE_CLOCKING
340
341init_Code_Clocker_Session(8, // num code clockers
342                         64096, // max individual timestamp measurements
343                         4ul, // num session parameters
344                         65536, // num timestamp calibration measurements,
345                            65534,// num constant time loop executions
346                         8 // num constant time loop iterations
347                         );
348set_Session_Cmd_Line_Args(argv[0]);
349total_clocker = register_Code_Clocker("Markup_stats_total", "Total markup_stats time\n");
350#endif
351
352        for (int run = 0; run < REPEAT_RUNS; run++) {
353       
354        parser = Parser_Interface::ParserFactory(filename);
355       
356       
357        if (!parser->has_ByteOrderMark()) printf("No ");
358        printf("Byte Order Mark found.\n");
359
360        if (parser->get_version() == XML_1_0) printf("XML version 1.0 declared.\n");
361        else if (parser->get_version() == XML_1_1) printf("XML version 1.1 declared.\n");
362        else printf ("XML version 1.0 implied by default.\n");
363        if (parser->has_EncodingDecl()) {
364                printf("XML encoding declared:  %s\n", parser->get_Encoding());
365        }
366        if (parser->standalone_status() == Standalone_yes) 
367                printf("XML standalone = yes declared.\n");
368        else if (parser->standalone_status() == Standalone_no) 
369                printf("XML standalone = no declared.\n");
370        else printf ("XML standalone = no by default.\n");
371       
372        parser->Parse_Prolog();
373        parser->ParseContent();
374        parser->~Parser_Interface();
375        printf("Run %i complete.\n", run);
376        }
377       
378        print_stats("comment", comment_count, comment_length);
379        print_stats("CDATA section", CDATA_end_count, CDATA_length);
380        print_stats("processing instruction", PI_count, PI_length);
381        print_stats("empty element", empty_elem_count, empty_elem_length);
382        print_stats("start tag", start_tag_count, start_tag_length);
383        printf("%i total attributes\n", attribute_count);
384        print_stats("attribute name", total_attribute_count, total_att_name_length);
385        print_stats("attribute value", total_attribute_count, total_att_value_length);
386        print_stats("namespace name", namespace_count, total_namespace_name_length);
387        print_stats("namespace URI", namespace_count, total_namespace_URI_length);
388        print_stats("end tag", end_tag_count, end_tag_length);
389        print_stats("text item", text_item_count, text_item_length);
390        print_stats("reference", reference_count, reference_length);
391        print_stats("error item", error_item_count, error_item_length);
392        printf("Maximum nesting depth = %i\n", max_nesting_depth);
393#ifdef CODE_CLOCKING
394        write_XML_File();
395#endif
396       
397        return(0);
398}
Note: See TracBrowser for help on using the repository browser.