source: trunk/src/engine.h @ 73

Last change on this file since 73 was 73, checked in by cameron, 11 years ago

Refactored Parsing Engine

File size: 4.0 KB
Line 
1/*  engine.h - parabix parsing engine
2    Copyright (c) 2007, 2008 Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8#ifndef ENGINE_H
9#define ENGINE_H
10
11#include "xmlmodel.h"
12#include "xmldecl.h"
13#include "byteplex.h"
14#include "bitlex.h"
15
16#define min(x,y) ((x) <(y) ?(x) :(y) )
17/* A ParsingEngine is the principal class for parsing XML
18data.  */
19
20class Parser_Interface {
21public:
22        ~Parser_Interface();
23        static Parser_Interface * ParserFactory(char * filename);
24        virtual void ParseContent() = 0;
25        unsigned char * GetCodeUnitPtr(int pos);
26        bool has_ByteOrderMark();
27        XML_version get_version();
28        XML_standalone standalone_status();
29        bool has_EncodingDecl();
30        unsigned char * get_Encoding();
31protected:
32        /* Co-classes */
33        Model_Info * model_info;
34        Byteplex * byteplex;   
35        Bitplex * bitplex;
36        Lexer_Interface * lexer;
37        /* Parallel data streams for current buffer full of XML data. */
38        BytePack * x8data;
39        LexicalStreamSet * buf;
40               
41        int buffer_base_pos;
42        int buffer_rel_pos;
43        int buffer_limit_pos;
44};
45
46template <CodeUnit_Base C>
47class ParsingEngine : public Parser_Interface {
48public:
49        ParsingEngine(Model_Info * m, Byteplex * b);
50        ~ParsingEngine();
51        void ParseContent();
52protected:
53
54        XML_Decl_Parser<C> * decl_parser;
55       
56        int text_or_markup_start;
57        /* Getters for current point/position information. */
58        int AbsPos() const;
59        int BufferRelPos() const;
60        unsigned char * cur() const;
61
62        bool at_EOF () const;
63        /*Internal helper for text action*/
64        void text_if_nonnull_action();
65        /* Mutators that advance the input. */
66        void Advance(int n);
67        void ScanTo(int lex_item);
68
69        void AdvanceBuffers(int preserve_pos);
70        /* Parsing routines. */
71
72        void Parse_Markup ();
73        void Parse_Comment ();
74        void Parse_StartTag ();
75        void Parse_EndTag ();
76        void Parse_CDATA ();
77        void Parse_PI ();
78        void Parse_Reference ();       
79       
80        /* Action routine for document start. */
81        void DocumentStart_action();   
82       
83        /* Action routine for document end. */
84        void DocumentEnd_action();
85       
86        /* Action routine for an XML comment in "<!--"  "-->" brackets. */
87        void Comment_action(int start_pos, int end_pos);
88       
89        /* Action routine for a CDATA section enclosed in "<![CDATA[" and "]]>" brackets. */
90        void CDATA_action(int start_pos, int end_pos);
91       
92        /* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
93        void PI_action(int start_pos, int end_pos);
94       
95        /* Action routine for an empty element enclosed in "<" and "/>" brackets. */
96        void EmptyElement_action(int start_pos, int end_pos);
97       
98        /* Action routine for a start tag enclosed in "<" and ">" brackets. */
99        void StartTag_action(int start_pos, int end_pos);
100       
101        /* Action routine for an end tag enclosed in "</" and ">" brackets. */
102        void EndTag_action(int start_pos, int end_pos);
103       
104        /* Action routine for an error item */
105        void Error_action(int start_pos, int end_pos);
106       
107        /* Action routine for a text item */
108        void Text_action(int start_pos, int end_pos);
109       
110        /* Action routine for a character or entity reference.*/
111        void Reference_action(int start_pos, int end_pos);
112       
113        /* Action routine for an element name occurring immediately after the
114        opening "<" of a start tag or empty element tag. */
115        void ElementName_action(int start_pos, int end_pos);
116       
117        /* Action routine for a processing instruction target name occurring immediately
118        after the opening "<?" of a processing instruction. */
119        void PI_Target_action(int start_pos, int end_pos);
120       
121        /* Action routine for an individual attribute/value pair occurring in
122        a element start tag or an empty element tag. */
123        void AttributeValue_action(int name_start, int name_end, 
124                                        int val_start, int val_end);
125       
126        /* Action routine for an individual namespace binding occurring in
127        a element start tag or an empty element tag. */
128        void Namespace_action(int name_start, int name_end, 
129                                int URI_start, int URI_end);
130       
131        /*Action routine for end of buffer events.
132         The preserve_pos should be set to indicate the position
133         of data that must be copied into the new buffer.*/
134        void FinalizeBuffer_action(int& preserve_pos);
135
136};
137
138
139#endif
Note: See TracBrowser for help on using the repository browser.