source: trunk/src/contentmodel.h @ 3977

Last change on this file since 3977 was 219, checked in by cameron, 11 years ago

Content model notes.

File size: 3.9 KB
Line 
1/*  contentmodel.h - Content Models from !ELEMENT declarations.
2    Copyright (c) 2008, Robert D. Cameron and Dan Lin.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
6*/
7
8#ifndef CONTENTMODEL_H_
9#define CONTENTMODEL_H_
10
11#include <iostream>
12#include <string>
13#include <iterator>
14
15using namespace std;
16
17typedef hash_map<int, int> symbol_set_t;
18
19/* 
20  Content Models describe the expected structure of content between
21  the start and end tags of an XML element.   The following data
22  declarations represent content model information derived from
23  the XML DTD.   Content models from XML Schemas or other specifications
24  are not described here.
25
26  The content models are declared using !ELEMENT declarations.
27  http://www.w3.org/TR/xml/#NT-elementdecl
28  [45] elementdecl ::= '<!ELEMENT' S  Name  S  contentspec  S? '>'
29  [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
30
31*/
32
33enum ContentModel_t {
34        cm_Empty, cm_Any, cm_Mixed, cm_RegExp};
35
36class ContentModel {
37        public:
38        ContentModel_t cm_type;
39};
40
41class CM_Any: public ContentModel {
42public:
43        CM_Any();
44};
45
46class CM_Empty: public ContentModel {
47public:
48        CM_Empty();
49};
50
51class CM_Mixed: public ContentModel {
52public:
53        CM_Mixed();
54        symbol_set_t elements;
55};
56
57/*
58  Class Content_RE is used for content models described using regular
59  expressions in the DTD.
60
61  [47] children ::=  (choice | seq) ('?' | '*' | '+')?
62  [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
63  [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
64  [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
65
66*/
67
68class Content_RE{
69public:
70        bool matches_empty;
71        symbol_set_t first_map;
72        symbol_set_t follow_map;
73        /* Set_IDs recursively sets sequential numeric identifiers for each Name in a content model. */
74        virtual int Set_IDs(int base_ID) = 0;
75        /* Build the map of the symbols that may occur first for this content_RE. */
76        virtual void Set_First_Map() = 0;
77        /* Build the map of the symbols that may follow this content_RE. */
78        virtual void Set_Follow_Map(symbol_set_t * transition_map) = 0;
79};
80
81/* Models constructed according to the [50] seq production. 
82   subCMs are the models for each cp in '(' S? cp ( S? ',' S? cp )* S? ')' */
83class CRE_Seq : public Content_RE {
84public:
85        CRE_Seq();
86        vector<Content_RE *> subCMs;
87        int Set_IDs(int base_ID);
88        void Set_First_Map();
89        void Compile();
90        void Set_Follow_Map(symbol_set_t * transition_map);
91};
92
93/* Models constructed according to the [49] choice production.
94   subCMs are the models for each cp in '(' S? cp ( S? '|' S? cp )+ S? ')' */
95class CRE_Choice : public Content_RE {
96public:
97        CRE_Choice();
98        vector<Content_RE *> subCMs;
99        int Set_IDs(int base_ID);
100        void Set_First_Map();
101        void Compile();
102        void Set_Follow_Map(symbol_set_t * transition_map);
103};
104
105/* Models constructed when '*' (Star), '+' (Plus) or '?' (Opt) is used.
106   subCM is the model for the Name, choice or seq in
107   (Name | choice | seq) ('?' | '*' | '+')   */
108class CRE_Star : public Content_RE {
109public:
110        CRE_Star(Content_RE * s);
111        Content_RE * subCM;
112        int Set_IDs(int base_ID);
113        void Set_First_Map();
114        void Set_Follow_Map(symbol_set_t * transition_map);
115};
116
117class CRE_Plus : public Content_RE {
118public:
119        CRE_Plus(Content_RE * s);
120        Content_RE * subCM;
121        int Set_IDs(int base_ID);
122        void Set_First_Map();
123        void Set_Follow_Map(symbol_set_t * transition_map);
124};
125
126class CRE_Opt : public Content_RE {
127public:
128        CRE_Opt(Content_RE * s);
129        Content_RE * subCM;
130        int Set_IDs(int base_ID);
131        void Set_First_Map();
132        void Set_Follow_Map(symbol_set_t * transition_map);
133};
134
135class CRE_Name : public Content_RE {
136public:
137        CRE_Name(int id);
138        int elemID;
139        int stateID;
140        int Set_IDs(int base_ID);
141        void Set_First_Map();
142        void Set_Follow_Map(symbol_set_t * transition_map);
143};
144
145class CM_RegExp: public ContentModel {
146public:
147        CM_RegExp();
148        symbol_set_t  * transition_map;
149        Content_RE * content_re;
150};
151
152#endif /*CONTENTMODEL_H_*/
Note: See TracBrowser for help on using the repository browser.