[2720] | 1 | /* |
---|
| 2 | * Unless required by applicable law or agreed to in writing, software |
---|
| 3 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 4 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 5 | * See the License for the specific language governing permissions and |
---|
| 6 | * limitations under the License. |
---|
| 7 | */ |
---|
| 8 | |
---|
| 9 | /* |
---|
| 10 | * $Id: DFAContentModel.hpp 677705 2008-07-17 20:15:32Z amassari $ |
---|
| 11 | */ |
---|
| 12 | |
---|
| 13 | #if !defined(XERCESC_INCLUDE_GUARD_DFACONTENTMODEL_HPP) |
---|
| 14 | #define XERCESC_INCLUDE_GUARD_DFACONTENTMODEL_HPP |
---|
| 15 | |
---|
| 16 | #include <xercesc/util/XercesDefs.hpp> |
---|
| 17 | #include <xercesc/util/ArrayIndexOutOfBoundsException.hpp> |
---|
| 18 | #include <xercesc/framework/XMLContentModel.hpp> |
---|
| 19 | #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp> |
---|
| 20 | |
---|
| 21 | XERCES_CPP_NAMESPACE_BEGIN |
---|
| 22 | |
---|
| 23 | class ContentSpecNode; |
---|
| 24 | class CMLeaf; |
---|
| 25 | class CMRepeatingLeaf; |
---|
| 26 | class CMNode; |
---|
| 27 | class CMStateSet; |
---|
| 28 | |
---|
| 29 | // |
---|
| 30 | // DFAContentModel is the heavy weight derivative of ContentModel that does |
---|
| 31 | // all of the non-trivial element content validation. This guy does the full |
---|
| 32 | // bore regular expression to DFA conversion to create a DFA that it then |
---|
| 33 | // uses in its validation algorithm. |
---|
| 34 | // |
---|
| 35 | // NOTE: Upstream work insures that this guy will never see a content model |
---|
| 36 | // with PCDATA in it. Any model with PCDATA is 'mixed' and is handled |
---|
| 37 | // via the MixedContentModel class, since mixed models are very |
---|
| 38 | // constrained in form and easily handled via a special case. This |
---|
| 39 | // also makes our life much easier here. |
---|
| 40 | // |
---|
| 41 | class DFAContentModel : public XMLContentModel |
---|
| 42 | { |
---|
| 43 | public: |
---|
| 44 | // ----------------------------------------------------------------------- |
---|
| 45 | // Constructors and Destructor |
---|
| 46 | // ----------------------------------------------------------------------- |
---|
| 47 | DFAContentModel |
---|
| 48 | ( |
---|
| 49 | const bool dtd |
---|
| 50 | , ContentSpecNode* const elemContentSpec |
---|
| 51 | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
---|
| 52 | ); |
---|
| 53 | DFAContentModel |
---|
| 54 | ( |
---|
| 55 | const bool dtd |
---|
| 56 | , ContentSpecNode* const elemContentSpec |
---|
| 57 | , const bool isMixed |
---|
| 58 | , MemoryManager* const manager |
---|
| 59 | ); |
---|
| 60 | |
---|
| 61 | virtual ~DFAContentModel(); |
---|
| 62 | |
---|
| 63 | |
---|
| 64 | // ----------------------------------------------------------------------- |
---|
| 65 | // Implementation of the virtual content model interface |
---|
| 66 | // ----------------------------------------------------------------------- |
---|
| 67 | |
---|
| 68 | virtual bool validateContent |
---|
| 69 | ( |
---|
| 70 | #ifdef STORE_CHILDREN_INFORMATION_IN_PARSER |
---|
| 71 | XMLElementDecl ** const children |
---|
| 72 | #else |
---|
| 73 | QName** const children |
---|
| 74 | #endif |
---|
| 75 | , XMLSize_t childCount |
---|
| 76 | , XMLSize_t* indexFailingChild |
---|
| 77 | , MemoryManager* const manager |
---|
| 78 | ) const; |
---|
| 79 | |
---|
| 80 | virtual bool validateContentSpecial |
---|
| 81 | ( |
---|
| 82 | #ifdef STORE_CHILDREN_INFORMATION_IN_PARSER |
---|
| 83 | XMLElementDecl ** const children |
---|
| 84 | #else |
---|
| 85 | QName** const children |
---|
| 86 | #endif |
---|
| 87 | , XMLSize_t childCount |
---|
| 88 | , GrammarResolver* const pGrammarResolver |
---|
| 89 | , XMLNamespaceResolver* const pUriResolver |
---|
| 90 | , XMLSize_t* indexFailingChild |
---|
| 91 | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
---|
| 92 | ) const; |
---|
| 93 | |
---|
| 94 | virtual void checkUniqueParticleAttribution |
---|
| 95 | ( |
---|
| 96 | SchemaGrammar* const pGrammar |
---|
| 97 | , GrammarResolver* const pGrammarResolver |
---|
| 98 | , XMLNamespaceResolver* const pUriResolver |
---|
| 99 | , XMLValidator* const pValidator |
---|
| 100 | , unsigned int* const pContentSpecOrgURI |
---|
| 101 | , const XMLCh* pComplexTypeName = 0 |
---|
| 102 | ); |
---|
| 103 | |
---|
| 104 | /** @@@ DEPRECATED @@@ **/ |
---|
| 105 | virtual bool validateContent |
---|
| 106 | ( |
---|
| 107 | QName** const children |
---|
| 108 | , XMLSize_t childCount |
---|
| 109 | , unsigned int emptyNamespaceId |
---|
| 110 | , XMLSize_t* indexFailingChild |
---|
| 111 | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
---|
| 112 | ) const; |
---|
| 113 | |
---|
| 114 | /** @@@ DEPRECATED @@@ **/ |
---|
| 115 | virtual bool validateContentSpecial |
---|
| 116 | ( |
---|
| 117 | QName** const children |
---|
| 118 | , XMLSize_t childCount |
---|
| 119 | , unsigned int emptyNamespaceId |
---|
| 120 | , GrammarResolver* const pGrammarResolver |
---|
| 121 | , XMLStringPool* const pStringPool |
---|
| 122 | , XMLSize_t* indexFailingChild |
---|
| 123 | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
---|
| 124 | ) const; |
---|
| 125 | |
---|
| 126 | /** @@@ DEPRECATED @@@ **/ |
---|
| 127 | virtual void checkUniqueParticleAttribution |
---|
| 128 | ( |
---|
| 129 | SchemaGrammar* const pGrammar |
---|
| 130 | , GrammarResolver* const pGrammarResolver |
---|
| 131 | , XMLStringPool* const pStringPool |
---|
| 132 | , XMLValidator* const pValidator |
---|
| 133 | , unsigned int* const pContentSpecOrgURI |
---|
| 134 | , const XMLCh* pComplexTypeName = 0 |
---|
| 135 | ); |
---|
| 136 | |
---|
| 137 | virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ; |
---|
| 138 | |
---|
| 139 | virtual unsigned int getNextState(unsigned int currentState, |
---|
| 140 | XMLSize_t elementIndex) const; |
---|
| 141 | |
---|
| 142 | virtual bool handleRepetitions( const QName* const curElem, |
---|
| 143 | unsigned int curState, |
---|
| 144 | unsigned int currentLoop, |
---|
| 145 | unsigned int& nextState, |
---|
| 146 | unsigned int& nextLoop, |
---|
| 147 | XMLSize_t elementIndex, |
---|
| 148 | SubstitutionGroupComparator * comparator) const; |
---|
| 149 | |
---|
| 150 | #ifdef PRINT_DEBUG_MESSAGE |
---|
| 151 | virtual void debug_out(std::ostream & out) const |
---|
| 152 | { |
---|
| 153 | out << "(DFAContentModel: "; |
---|
| 154 | if (fElemMapSize) |
---|
| 155 | { |
---|
| 156 | char leadingChar = '{'; |
---|
| 157 | for (unsigned int i = 0; i < fElemMapSize; i++) |
---|
| 158 | { |
---|
| 159 | out << leadingChar << fElemMap[i] << ':' << fElemMapType[i]; |
---|
| 160 | leadingChar = ','; |
---|
| 161 | } |
---|
| 162 | out << '}'; |
---|
| 163 | } |
---|
| 164 | out << ',' << fEmptyOk |
---|
| 165 | << ',' << fEOCPos |
---|
| 166 | << ',' << fDTD |
---|
| 167 | << ',' << fIsMixed |
---|
| 168 | << ')'; |
---|
| 169 | |
---|
| 170 | // bool* fFinalStateFlags; |
---|
| 171 | // CMStateSet** fFollowList; |
---|
| 172 | // CMNode* fHeadNode; |
---|
| 173 | // unsigned int fLeafCount; |
---|
| 174 | // CMLeaf** fLeafList; |
---|
| 175 | // ContentSpecNode::NodeTypes* fLeafListType; |
---|
| 176 | // unsigned int** fTransTable; |
---|
| 177 | // unsigned int fTransTableSize; |
---|
| 178 | // Occurence** fCountingStates; |
---|
| 179 | // ContentLeafNameTypeVector * fLeafNameTypeVector; |
---|
| 180 | |
---|
| 181 | } |
---|
| 182 | #endif |
---|
| 183 | |
---|
| 184 | private : |
---|
| 185 | // ----------------------------------------------------------------------- |
---|
| 186 | // Unimplemented constructors and operators |
---|
| 187 | // ----------------------------------------------------------------------- |
---|
| 188 | DFAContentModel(); |
---|
| 189 | DFAContentModel(const DFAContentModel&); |
---|
| 190 | DFAContentModel& operator=(const DFAContentModel&); |
---|
| 191 | |
---|
| 192 | // ----------------------------------------------------------------------- |
---|
| 193 | // Private helper methods |
---|
| 194 | // ----------------------------------------------------------------------- |
---|
| 195 | void buildDFA(ContentSpecNode* const curNode); |
---|
| 196 | CMNode* buildSyntaxTree(ContentSpecNode* const curNode, unsigned int& curIndex); |
---|
| 197 | unsigned int* makeDefStateList() const; |
---|
| 198 | unsigned int countLeafNodes(ContentSpecNode* const curNode); |
---|
| 199 | |
---|
| 200 | class Occurence : public XMemory |
---|
| 201 | { |
---|
| 202 | public: |
---|
| 203 | Occurence(int minOcc, int maxOcc, int eltIndex); |
---|
| 204 | |
---|
| 205 | int minOccurs; |
---|
| 206 | int maxOccurs; |
---|
| 207 | int elemIndex; |
---|
| 208 | }; |
---|
| 209 | |
---|
| 210 | // ----------------------------------------------------------------------- |
---|
| 211 | // Private data members |
---|
| 212 | // |
---|
| 213 | // fElemMap |
---|
| 214 | // fElemMapSize |
---|
| 215 | // This is the map of unique input symbol elements to indices into |
---|
| 216 | // each state's per-input symbol transition table entry. This is part |
---|
| 217 | // of the built DFA information that must be kept around to do the |
---|
| 218 | // actual validation. |
---|
| 219 | // |
---|
| 220 | // fElemMapType |
---|
| 221 | // This is a map of whether the element map contains information |
---|
| 222 | // related to ANY models. |
---|
| 223 | // |
---|
| 224 | // fEmptyOk |
---|
| 225 | // This is an optimization. While building the transition table we |
---|
| 226 | // can see whether this content model would approve of an empty |
---|
| 227 | // content (which could happen if everything was optional.) So we |
---|
| 228 | // set this flag and short circuit that check, which would otherwise |
---|
| 229 | // be ugly and time consuming if we tried to determine it at each |
---|
| 230 | // validation call. |
---|
| 231 | // |
---|
| 232 | // fEOCPos |
---|
| 233 | // The NFA position of the special EOC (end of content) node. This |
---|
| 234 | // is saved away since its used during the DFA build. |
---|
| 235 | // |
---|
| 236 | // fFinalStateFlags |
---|
| 237 | // This is an array of booleans, one per state (there are |
---|
| 238 | // fTransTableSize states in the DFA) that indicates whether that |
---|
| 239 | // state is a final state. |
---|
| 240 | // |
---|
| 241 | // fFollowList |
---|
| 242 | // The list of follow positions for each NFA position (i.e. for each |
---|
| 243 | // non-epsilon leaf node.) This is only used during the building of |
---|
| 244 | // the DFA, and is let go afterwards. |
---|
| 245 | // |
---|
| 246 | // fHeadNode |
---|
| 247 | // This is the head node of our intermediate representation. It is |
---|
| 248 | // only non-null during the building of the DFA (just so that it |
---|
| 249 | // does not have to be passed all around.) Once the DFA is built, |
---|
| 250 | // this is no longer required so its deleted. |
---|
| 251 | // |
---|
| 252 | // fLeafCount |
---|
| 253 | // The count of leaf nodes. This is an important number that set some |
---|
| 254 | // limits on the sizes of data structures in the DFA process. |
---|
| 255 | // |
---|
| 256 | // fLeafList |
---|
| 257 | // An array of non-epsilon leaf nodes, which is used during the DFA |
---|
| 258 | // build operation, then dropped. These are just references to nodes |
---|
| 259 | // pointed to by fHeadNode, so we don't have to clean them up, just |
---|
| 260 | // the actually leaf list array itself needs cleanup. |
---|
| 261 | // |
---|
| 262 | // fLeafListType |
---|
| 263 | // Array mapping ANY types to the leaf list. |
---|
| 264 | // |
---|
| 265 | // fTransTable |
---|
| 266 | // fTransTableSize |
---|
| 267 | // This is the transition table that is the main by product of all |
---|
| 268 | // of the effort here. It is an array of arrays of ints. The first |
---|
| 269 | // dimension is the number of states we end up with in the DFA. The |
---|
| 270 | // second dimensions is the number of unique elements in the content |
---|
| 271 | // model (fElemMapSize). Each entry in the second dimension indicates |
---|
| 272 | // the new state given that input for the first dimension's start |
---|
| 273 | // state. |
---|
| 274 | // |
---|
| 275 | // The fElemMap array handles mapping from element indexes to |
---|
| 276 | // positions in the second dimension of the transition table. |
---|
| 277 | // |
---|
| 278 | // fTransTableSize is the number of valid entries in the transition |
---|
| 279 | // table, and in the other related tables such as fFinalStateFlags. |
---|
| 280 | // |
---|
| 281 | // fCountingStates |
---|
| 282 | // This is the table holding the minOccurs/maxOccurs for elements |
---|
| 283 | // that can be repeated a finite number of times. |
---|
| 284 | // |
---|
| 285 | // fDTD |
---|
| 286 | // Boolean to allow DTDs to validate even with namespace support. |
---|
| 287 | // |
---|
| 288 | // fIsMixed |
---|
| 289 | // DFA ContentModel with mixed PCDATA. |
---|
| 290 | // ----------------------------------------------------------------------- |
---|
| 291 | QName** fElemMap; |
---|
| 292 | ContentSpecNode::NodeTypes* fElemMapType; |
---|
| 293 | unsigned int fElemMapSize; |
---|
| 294 | bool fEmptyOk; |
---|
| 295 | unsigned int fEOCPos; |
---|
| 296 | bool* fFinalStateFlags; |
---|
| 297 | CMStateSet** fFollowList; |
---|
| 298 | CMNode* fHeadNode; |
---|
| 299 | unsigned int fLeafCount; |
---|
| 300 | CMLeaf** fLeafList; |
---|
| 301 | ContentSpecNode::NodeTypes* fLeafListType; |
---|
| 302 | unsigned int** fTransTable; |
---|
| 303 | unsigned int fTransTableSize; |
---|
| 304 | Occurence** fCountingStates; |
---|
| 305 | bool fDTD; |
---|
| 306 | bool fIsMixed; |
---|
| 307 | ContentLeafNameTypeVector * fLeafNameTypeVector; |
---|
| 308 | MemoryManager* fMemoryManager; |
---|
| 309 | }; |
---|
| 310 | |
---|
| 311 | |
---|
| 312 | inline unsigned int |
---|
| 313 | DFAContentModel::getNextState(unsigned int currentState, |
---|
| 314 | XMLSize_t elementIndex) const { |
---|
| 315 | |
---|
| 316 | if (currentState == XMLContentModel::gInvalidTrans) { |
---|
| 317 | return XMLContentModel::gInvalidTrans; |
---|
| 318 | } |
---|
| 319 | |
---|
| 320 | if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) { |
---|
| 321 | ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager); |
---|
| 322 | } |
---|
| 323 | |
---|
| 324 | return fTransTable[currentState][elementIndex]; |
---|
| 325 | } |
---|
| 326 | |
---|
| 327 | inline |
---|
| 328 | DFAContentModel::Occurence::Occurence(int minOcc, int maxOcc, int eltIndex) |
---|
| 329 | { |
---|
| 330 | minOccurs = minOcc; |
---|
| 331 | maxOccurs = maxOcc; |
---|
| 332 | elemIndex = eltIndex; |
---|
| 333 | } |
---|
| 334 | |
---|
| 335 | XERCES_CPP_NAMESPACE_END |
---|
| 336 | |
---|
| 337 | #endif |
---|
| 338 | |
---|