source: trunk/src/engine.h @ 4225

Last change on this file since 4225 was 386, checked in by ksherdy, 9 years ago

Add commented out interface method to support GML2SVG demo.

File size: 7.4 KB
RevLine 
[4]1/*  engine.h - parabix parsing engine
[78]2    Copyright (c) 2007, 2008 Robert D. Cameron and Dan Lin
[4]3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6
7*/
8#ifndef ENGINE_H
9#define ENGINE_H
10
[267]11/*  Parabix provides strings to the application using the WorkingCharacterSet.
[197]12    This parameter is set by template instantiation; normally only one
13    WorkingCharacterSet parameter is chosen.  However, it is possible
14    to compile different parsers having different WorkingCharacterSet
15    parameters within one run-time code base.
16*/
17
18enum WorkingCharacterSet {UTF_8, UTF_16, UTF_32};
19
[73]20#include "xmlmodel.h"
21#include "xmldecl.h"
22#include "byteplex.h"
[4]23#include "bitlex.h"
[109]24#include "xml_error.h"
[106]25#include "contentmodel.h"
[124]26#include "symtab.h"
27
[73]28#define min(x,y) ((x) <(y) ?(x) :(y) )
[4]29/* A ParsingEngine is the principal class for parsing XML
30data.  */
31
[197]32template <WorkingCharacterSet W>
[37]33class Parser_Interface {
34public:
[263]35        virtual ~Parser_Interface();
[267]36        static Parser_Interface * ParserFactory(const char * filename);
37        static Parser_Interface * ParserFactory(const char * filename, Model_Info * m);
[99]38        // Constructor for a subsidiary parser for internal entities.
[267]39        static Parser_Interface * ParserFactory(const char * byte_buffer, int byte_count, Entity_Info * e, Model_Info * m);
[37]40        virtual void ParseContent() = 0;
[108]41        virtual void Parse_DocumentContent() = 0;
[124]42        virtual void Parse_WF_Content() = 0;
[175]43        virtual void Parse_AnyContent() = 0;
44        virtual void Parse_MixedContent(symbol_set_t elems) = 0;
[156]45        virtual void Parse_ValidContent(CM_RegExp * cre, int & cur_state) = 0;
[124]46        virtual bool at_EOF() const = 0;
[97]47        virtual void Parse_ExtSubsetDecl() = 0;
[78]48        virtual void Parse_Prolog() = 0;
[37]49        bool has_ByteOrderMark();
50        XML_version get_version();
51        XML_standalone standalone_status();
52        bool has_EncodingDecl();
[73]53        unsigned char * get_Encoding();
[99]54        Model_Info * model_info;
[100]55        Entity_Info * entity_Info;
[91]56
[197]57
58        /* Action routine for document start. */
[267]59        void DocumentStart_action();
60
[197]61        /* Action routine for document end. */
62        void DocumentEnd_action();
[267]63
[197]64        /* Action routine for an XML comment in "<!--"  "-->" brackets. */
65        void Comment_action(unsigned char * item, int lgth);
[267]66
[197]67        /* Action routine called upon recognizing "<![CDATA[" to start a CDATA section. */
68        void CDATA_start_action(unsigned char * CDATA_ptr);
69
70        /* Action routine called upon recognizing "]]>" to end a CDATA section. */
71        void CDATA_end_action(unsigned char * CDATA_end_ptr);
[267]72
[197]73        /* Action routine for an XML processing instruction enclosed in "<?" and "?>" brackets. */
74        void PI_action(unsigned char * item, int lgth);
[267]75
[197]76        /* Action routine for an empty element enclosed in "<" and "/>" brackets. */
77        void EmptyElement_action(unsigned char * item, int lgth);
[267]78
[197]79        /* Action routine for a start tag enclosed in "<" and ">" brackets. */
80        void StartTag_action(unsigned char * item, int lgth);
[267]81
[197]82        /* Action routine for an end tag enclosed in "</" and ">" brackets. */
83        void EndTag_action(unsigned char * item, int lgth);
[267]84
[197]85        /* Action routine for an error item */
86        void Error_action(unsigned char * item, int lgth);
[267]87
[197]88        /* Action routine for a text item */
89        void Text_action(unsigned char * item, int lgth, bool more);
[267]90
[197]91        /* Action routine for a character or entity reference.*/
92        void Reference_action(unsigned char * item, int lgth);
[267]93
94        /* Action routine for an element name occurring immediately after the
[197]95        opening "<" of a start tag or empty element tag. */
96        void ElementName_action(unsigned char * item, int lgth);
[267]97
98        /* Action routine for a processing instruction target name occurring immediately
[197]99        after the opening "<?" of a processing instruction. */
100        void PI_Target_action(unsigned char * item, int lgth);
[267]101
[197]102        /* Action routine for an individual attribute/value pair occurring in
103        a element start tag or an empty element tag. */
[267]104        void AttributeValue_action(unsigned char * name, int name_lgth,
[197]105                                   unsigned char * val, int val_lgth);
[267]106
[197]107        /* Action routine for an individual namespace binding occurring in
108        a element start tag or an empty element tag. */
[267]109        void Namespace_action(unsigned char * name, int name_end,
[197]110                              unsigned char * URI, int URI_end);
[267]111
[197]112        /*Action routine for end of buffer events. */
113        void FinalizeBuffer_action();
[267]114
[197]115        /*Document Type actions.*/
116        void Doctype_action(unsigned char * item, int lgth);
117        void PEReference_action(unsigned char * item, int lgth);
[267]118
[197]119        void Prolog_action(unsigned char * item, int lgth);
[267]120
[197]121        void ExtSubsetDecl_action(unsigned char * item, int lgth);
[386]122       
123        // GML2SVG Demo
124        // void ilax_coords_to_path(char ts, char cs, char decimal);
[267]125
[37]126};
[4]127
[197]128template <class B, WorkingCharacterSet W>
129class ParsingEngine : public Parser_Interface<W> {
[37]130public:
[100]131        ParsingEngine(Entity_Info * e, Model_Info * m, Byteplex * b, bool is_external);
[263]132        virtual ~ParsingEngine();
[37]133        void ParseContent();
134protected:
[93]135        bool StrictWellFormedness;
136
[91]137        vector<int> LastAttOccurrence;
[160]138        XML_Decl_Parser<B::Base> * decl_parser;
[267]139
[66]140        int text_or_markup_start;
[37]141        /* Getters for current point/position information. */
142        int AbsPos() const;
[92]143        int LengthFrom(int start_pos) const;
[37]144        int BufferRelPos() const;
145        unsigned char * cur() const;
[160]146        unsigned char * GetCodeUnitPtr(int pos);
[4]147
[37]148        bool at_EOF () const;
[66]149        /*Internal helper for text action*/
[180]150        void text_if_nonnull_action(bool more);
[37]151        /* Mutators that advance the input. */
152        void Advance(int n);
153        void ScanTo(int lex_item);
[188]154        void ScanTextTo(int lex_item);  // Specialized version.
[170]155        void AdjustBufferEndForIncompleteSequences();
[134]156        void AdvanceBuffers();
[37]157        /* Parsing routines. */
[4]158
[110]159        void WF_Error (XML_Constraint errCode);
160        void Validity_Error (XML_Constraint errCode);
[97]161        void Syntax_Error (XML_NonTerminal errNT);
[267]162
[37]163        void Parse_Comment ();
164        void Parse_StartTag ();
165        void Parse_EndTag ();
166        void Parse_CDATA ();
167        void Parse_PI ();
[267]168        void Parse_CharRef ();
[175]169        void Parse_EntityRef ();
170        void Parse_EntityRef_inMixed(symbol_set_t elems);
171        void Parse_EntityRef_inAnyContent();
[267]172
[78]173        /* Parsing routine for Document Type*/
174        void Parse_DocType ();
[115]175        void Parse_ExternalID (char *& SystemLiteral, char *& PubidLiteral);
176        void Parse_SystemLiteral ();
177        void Parse_PubidLiteral ();
[78]178        void Parse_IntSubset ();
179        void Parse_PEReference ();
180        void Parse_Elementdecl ();
[106]181        ContentModel * Parse_RemainingMixed ();
182        Content_RE * Parse_RemainingChildren ();
183        Content_RE * Parse_Cp();
[78]184        void Parse_AttlistDecl ();
[94]185        void Parse_Notation (ATT_info * this_info);
[91]186        void Parse_Enumeration (ATT_info * this_info);
187        void Parse_DefaultDecl (ATT_info * this_info);
[78]188        void Parse_Entitydecl ();
189        void Parse_Notationdecl ();
190        void requireWS ();
191        void Parse_AttValue ();
[100]192        void Parse_GEntityValue(GEntity_info * this_info);
193        void Parse_PEntityValue(PEntity_info * this_info);
194        char * Replace_EntityRef(bool& is_simple);
195        char * Replace_CharRef();
[78]196        void Parse_Prolog();
[108]197        void Parse_DocumentContent();
[267]198
[124]199        void Parse_WF_Element();
200        void Parse_WF_Content();
201        int Parse_WF_StartTag(bool& is_empty);
[160]202        void Parse_WF_EndTag(int nameID);
[124]203
[156]204        void Parse_ValidEntityRef(CM_RegExp * cre, int & cur_state);
[108]205        int Parse_ValidElement();
[156]206        void Parse_ValidContent(CM_RegExp * cre, int & cur_state);
[108]207        void Parse_AnyContent();
208        void Parse_MixedContent(symbol_set_t elems);
[267]209
[108]210        int Parse_ValidStartTag(bool& is_empty);
[267]211
[174]212        int Parse_Nmtoken();
[267]213        int Parse_Name();
214
[97]215        /*Parsing routine for external entities*/
216        void Parse_ExtSubsetDecl ();
[267]217
[197]218protected:
219        /* Co-classes */
[81]220
[267]221        Byteplex * byteplex;
[197]222        Bitplex * bitplex;
223        Lexer_Interface * lexer;
224        /* Parallel data streams for current buffer full of XML data. */
225        BytePack * x8data;
226        LexicalStreamSet * buf;
[267]227
[197]228        int buffer_base_pos;
229        int buffer_rel_pos;
230        int buffer_limit_pos;
[37]231};
[4]232
233
234#endif
Note: See TracBrowser for help on using the repository browser.