source: icXML/icXML-devel/src/xercesc/parsers/XercesDOMParser.hpp

Last change on this file was 3565, checked in by cameron, 6 years ago

Updates to xercesc sources

File size: 23.7 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: XercesDOMParser.hpp 932887 2010-04-11 13:04:59Z borisk $
20 */
21
22#if !defined(XERCESC_INCLUDE_GUARD_XERCESDOMPARSER_HPP)
23#define XERCESC_INCLUDE_GUARD_XERCESDOMPARSER_HPP
24
25
26#include <icxercesc/parsers/AbstractDOMParser.hpp>
27#include <icxercesc/validators/common/Grammar.hpp>
28
29XERCES_CPP_NAMESPACE_BEGIN
30
31
32class EntityResolver;
33class ErrorHandler;
34class XMLEntityResolver;
35class XMLResourceIdentifier;
36
37 /**
38  * This class implements the Document Object Model (DOM) interface.
39  * It should be used by applications which choose to parse and
40  * process the XML document using the DOM api's. This implementation
41  * also allows the applications to install an error and an entity
42  * handler (useful extensions to the DOM specification).
43  *
44  * <p>It can be used to instantiate a validating or non-validating
45  * parser, by setting a member flag.</p>
46  */
47class PARSERS_EXPORT XercesDOMParser : public AbstractDOMParser
48{
49public :
50        // -----------------------------------------------------------------------
51        //  Constructors and Destructor
52        // -----------------------------------------------------------------------
53
54        /** @name Constructors and Destructor */
55        //@{
56        /** Construct a XercesDOMParser, with an optional validator
57          *
58          * Constructor with an instance of validator class to use for
59          * validation. If you don't provide a validator, a default one will
60          * be created for you in the scanner.
61          *
62          * @param gramPool   Pointer to the grammar pool instance from
63          *                   external application.
64          *                   The parser does NOT own it.
65          *
66          * @param valToAdopt Pointer to the validator instance to use. The
67          *                   parser is responsible for freeing the memory.
68          * @param  manager   Pointer to the memory manager to be used to
69          *                   allocate objects.
70          */
71        XercesDOMParser
72        (
73                  XMLValidator* const   valToAdopt = 0
74                , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
75                , XMLGrammarPool* const gramPool = 0
76        );
77
78        /**
79          * Destructor
80          */
81        virtual ~XercesDOMParser();
82
83        //@}
84
85
86        // -----------------------------------------------------------------------
87        //  Getter methods
88        // -----------------------------------------------------------------------
89
90        /** @name Getter methods */
91        //@{
92
93        /** Get a pointer to the error handler
94          *
95          * This method returns the installed error handler. If no handler
96          * has been installed, then it will be a zero pointer.
97          *
98          * @return The pointer to the installed error handler object.
99          */
100        ErrorHandler* getErrorHandler();
101
102        /** Get a const pointer to the error handler
103          *
104          * This method returns the installed error handler.  If no handler
105          * has been installed, then it will be a zero pointer.
106          *
107          * @return A const pointer to the installed error handler object.
108          */
109        const ErrorHandler* getErrorHandler() const;
110
111        /** Get a pointer to the entity resolver
112          *
113          * This method returns the installed entity resolver.  If no resolver
114          * has been installed, then it will be a zero pointer.
115          *
116          * @return The pointer to the installed entity resolver object.
117          */
118        EntityResolver* getEntityResolver();
119
120        /** Get a const pointer to the entity resolver
121          *
122          * This method returns the installed entity resolver. If no resolver
123          * has been installed, then it will be a zero pointer.
124          *
125          * @return A const pointer to the installed entity resolver object.
126          */
127        const EntityResolver* getEntityResolver() const;
128
129        /**
130          * Get a pointer to the entity resolver
131          *
132          * This method returns the installed entity resolver.  If no resolver
133          * has been installed, then it will be a zero pointer.
134          *
135          * @return The pointer to the installed entity resolver object.
136          */
137        XMLEntityResolver* getXMLEntityResolver();
138
139        /**
140          * Get a const pointer to the entity resolver
141          *
142          * This method returns the installed entity resolver. If no resolver
143          * has been installed, then it will be a zero pointer.
144          *
145          * @return A const pointer to the installed entity resolver object.
146          */
147        const XMLEntityResolver* getXMLEntityResolver() const;
148
149        /** Get the 'Grammar caching' flag
150          *
151          * This method returns the state of the parser's grammar caching when
152          * parsing an XML document.
153          *
154          * @return true, if the parser is currently configured to
155          *         cache grammars, false otherwise.
156          *
157          * @see #cacheGrammarFromParse
158          */
159        bool isCachingGrammarFromParse() const;
160
161        /** Get the 'Use cached grammar' flag
162          *
163          * This method returns the state of the parser's use of cached grammar
164          * when parsing an XML document.
165          *
166          * @return true, if the parser is currently configured to
167          *         use cached grammars, false otherwise.
168          *
169          * @see #useCachedGrammarInParse
170          */
171        bool isUsingCachedGrammarInParse() const;
172
173        /**
174         * Retrieve the grammar that is associated with the specified namespace key
175         *
176         * @param  nameSpaceKey Namespace key
177         * @return Grammar associated with the Namespace key.
178         */
179        Grammar* getGrammar(const XMLCh* const nameSpaceKey);
180
181        /**
182         * Retrieve the grammar where the root element is declared.
183         *
184         * @return Grammar where root element declared
185         */
186        Grammar* getRootGrammar();
187
188        /**
189         * Returns the string corresponding to a URI id from the URI string pool.
190         *
191         * @param uriId id of the string in the URI string pool.
192         * @return URI string corresponding to the URI id.
193         */
194        const XMLCh* getURIText(unsigned int uriId) const;
195
196        /**
197         * Returns the current src offset within the input source.
198         * To be used only while parsing is in progress.
199         *
200         * @return offset within the input source
201         */
202        XMLFilePos getSrcOffset() const;
203
204        /** Get the 'ignore cached DTD grammar' flag
205          *
206          * @return true, if the parser is currently configured to
207          *         ignore cached DTD, false otherwise.
208          *
209          * @see #setIgnoreCachedDTD
210          */
211        bool getIgnoreCachedDTD() const;
212
213        //@}
214
215
216        // -----------------------------------------------------------------------
217        //  Setter methods
218        // -----------------------------------------------------------------------
219
220        /** @name Setter methods */
221        //@{
222
223        /** Set the error handler
224          *
225          * This method allows applications to install their own error handler
226          * to trap error and warning messages.
227          *
228          * <i>Any previously set handler is merely dropped, since the parser
229          * does not own them.</i>
230          *
231          * @param handler  A const pointer to the user supplied error
232          *                 handler.
233          *
234          * @see #getErrorHandler
235          */
236        void setErrorHandler(ErrorHandler* const handler);
237
238        /** Set the entity resolver
239          *
240          * This method allows applications to install their own entity
241          * resolver. By installing an entity resolver, the applications
242          * can trap and potentially redirect references to external
243          * entities.
244          *
245          * <i>Any previously set entity resolver is merely dropped, since the parser
246          * does not own them.  If both setEntityResolver and setXMLEntityResolver
247          * are called, then the last one is used.</i>
248          *
249          * @param handler  A const pointer to the user supplied entity
250          *                 resolver.
251          *
252          * @see #getEntityResolver
253          */
254        void setEntityResolver(EntityResolver* const handler);
255
256        /**
257          * Set the entity resolver
258          *
259          * This method allows applications to install their own entity
260          * resolver. By installing an entity resolver, the applications
261          * can trap and potentially redirect references to external
262          * entities.
263          *
264          * <i>Any previously set entity resolver is merely dropped, since the parser
265          * does not own them.  If both setEntityResolver and setXMLEntityResolver
266          * are called, then the last one set is used.</i>
267          *
268          * @param handler  A const pointer to the user supplied entity
269          *                 resolver.
270          *
271          * @see #getXMLEntityResolver
272          */
273        void setXMLEntityResolver(XMLEntityResolver* const handler);
274
275        /** Set the 'Grammar caching' flag
276          *
277          * This method allows users to enable or disable caching of grammar when
278          * parsing XML documents. When set to true, the parser will cache the
279          * resulting grammar for use in subsequent parses.
280          *
281          * If the flag is set to true, the 'Use cached grammar' flag will also be
282          * set to true.
283          *
284          * The parser's default state is: false.
285          *
286          * @param newState The value specifying whether we should cache grammars
287          *                 or not.
288          *
289          * @see #isCachingGrammarFromParse
290          * @see #useCachedGrammarInParse
291          */
292        void cacheGrammarFromParse(const bool newState);
293
294        /** Set the 'Use cached grammar' flag
295          *
296          * This method allows users to enable or disable the use of cached
297          * grammars.  When set to true, the parser will use the cached grammar,
298          * instead of building the grammar from scratch, to validate XML
299          * documents.
300          *
301          * If the 'Grammar caching' flag is set to true, this method ignore the
302          * value passed in.
303          *
304          * The parser's default state is: false.
305          *
306          * @param newState The value specifying whether we should use the cached
307          *                 grammar or not.
308          *
309          * @see #isUsingCachedGrammarInParse
310          * @see #cacheGrammarFromParse
311          */
312        void useCachedGrammarInParse(const bool newState);
313
314        /** Set the 'ignore cached DTD grammar' flag
315          *
316          * This method gives users the option to ignore a cached DTD grammar, when
317          * an XML document contains both an internal and external DTD, and the use
318          * cached grammar from parse option is enabled. Currently, we do not allow
319          * using cached DTD grammar when an internal subset is present in the
320          * document. This option will only affect the behavior of the parser when
321          * an internal and external DTD both exist in a document (i.e. no effect
322          * if document has no internal subset).
323          *
324          * The parser's default state is false
325          *
326          * @param newValue The state to set
327          */
328        void setIgnoreCachedDTD(const bool newValue);
329
330        //@}
331
332        // -----------------------------------------------------------------------
333        //  Utility methods
334        // -----------------------------------------------------------------------
335
336        /** @name Utility methods */
337        //@{
338        /** Reset the documents vector pool and release all the associated memory
339          * back to the system.
340          *
341          * When parsing a document using a DOM parser, all memory allocated
342          * for a DOM tree is associated to the DOM document.
343          *
344          * If you do multiple parse using the same DOM parser instance, then
345          * multiple DOM documents will be generated and saved in a vector pool.
346          * All these documents (and thus all the allocated memory)
347          * won't be deleted until the parser instance is destroyed.
348          *
349          * If you don't need these DOM documents anymore and don't want to
350          * destroy the DOM parser instance at this moment, then you can call this method
351          * to reset the document vector pool and release all the allocated memory
352          * back to the system.
353          *
354          * It is an error to call this method if you are in the middle of a
355          * parse (e.g. in the mid of a progressive parse).
356          *
357          * @exception IOException An exception from the parser if this function
358          *            is called when a parse is in progress.
359          *
360          */
361        void resetDocumentPool();
362
363        //@}
364
365        // -----------------------------------------------------------------------
366        //  Implementation of the XMLErrorReporter interface.
367        // -----------------------------------------------------------------------
368
369        /** @name Implementation of the XMLErrorReporter interface. */
370        //@{
371
372        /** Handle errors reported from the parser
373          *
374          * This method is used to report back errors found while parsing the
375          * XML file. This method is also borrowed from the SAX specification.
376          * It calls the corresponding user installed Error Handler method:
377          * 'fatal', 'error', 'warning' depending on the severity of the error.
378          * This classification is defined by the XML specification.
379          *
380          * @param errCode An integer code for the error.
381          * @param msgDomain A const pointer to an Unicode string representing
382          *                  the message domain to use.
383          * @param errType An enumeration classifying the severity of the error.
384          * @param errorText A const pointer to an Unicode string representing
385          *                  the text of the error message.
386          * @param systemId  A const pointer to an Unicode string representing
387          *                  the system id of the XML file where this error
388          *                  was discovered.
389          * @param publicId  A const pointer to an Unicode string representing
390          *                  the public id of the XML file where this error
391          *                  was discovered.
392          * @param lineNum   The line number where the error occurred.
393          * @param colNum    The column number where the error occurred.
394          * @see ErrorHandler
395          */
396        virtual void error
397        (
398                const   unsigned int                errCode
399                , const XMLCh* const                msgDomain
400                , const XMLErrorReporter::ErrTypes  errType
401                , const XMLCh* const                errorText
402                , const XMLCh* const                systemId
403                , const XMLCh* const                publicId
404                , const XMLFileLoc                  lineNum
405                , const XMLFileLoc                  colNum
406        );
407
408        /** Reset any error data before a new parse
409         *
410          * This method allows the user installed Error Handler callback to
411          * 'reset' itself.
412          *
413          * <b>This method is a no-op for this DOM
414          * implementation.</b>
415          */
416        virtual void resetErrors();
417        //@}
418
419
420        // -----------------------------------------------------------------------
421        //  Implementation of the XMLEntityHandler interface.
422        // -----------------------------------------------------------------------
423
424        /** @name Implementation of the XMLEntityHandler interface. */
425        //@{
426
427        /** Handle an end of input source event
428          *
429          * This method is used to indicate the end of parsing of an external
430          * entity file.
431          *
432          * <b>This method is a no-op for this DOM
433          * implementation.</b>
434          *
435          * @param inputSource A const reference to the InputSource object
436          *                    which points to the XML file being parsed.
437          * @see InputSource
438          */
439        virtual void endInputSource(const InputSource& inputSource);
440
441        /** Expand a system id
442          *
443          * This method allows an installed XMLEntityHandler to further
444          * process any system id's of external entities encountered in
445          * the XML file being parsed, such as redirection etc.
446          *
447          * <b>This method always returns 'false'
448          * for this DOM implementation.</b>
449          *
450          * @param systemId  A const pointer to an Unicode string representing
451          *                  the system id scanned by the parser.
452          * @param toFill    A pointer to a buffer in which the application
453          *                  processed system id is stored.
454          * @return 'true', if any processing is done, 'false' otherwise.
455          */
456        virtual bool expandSystemId
457        (
458                const   XMLCh* const    systemId
459                ,       XMLBuffer&      toFill
460        );
461
462        /** Reset any entity handler information
463          *
464          * This method allows the installed XMLEntityHandler to reset
465          * itself.
466          *
467          * <b>This method is a no-op for this DOM
468          * implementation.</b>
469          */
470        virtual void resetEntities();
471
472        /** Resolve a public/system id
473          *
474          * This method allows a user installed entity handler to further
475          * process any pointers to external entities. The applications can
476          * implement 'redirection' via this callback.
477          *
478          * @param resourceIdentifier An object containing the type of
479          *        resource to be resolved and the associated data members
480          *        corresponding to this type.
481          * @return The value returned by the user installed resolveEntity
482          *         method or NULL otherwise to indicate no processing was done.
483          *         The returned InputSource is owned by the parser which is
484          *         responsible to clean up the memory.
485          * @see XMLEntityHandler
486          * @see XMLEntityResolver
487          */
488        virtual InputSource* resolveEntity
489        (
490                XMLResourceIdentifier* resourceIdentifier
491        );
492
493        /** Handle a 'start input source' event
494          *
495          * This method is used to indicate the start of parsing an external
496          * entity file.
497          *
498          * <b>This method is a no-op for this DOM parse
499          * implementation.</b>
500          *
501          * @param inputSource A const reference to the InputSource object
502          *                    which points to the external entity
503          *                    being parsed.
504          */
505        virtual void startInputSource(const InputSource& inputSource);
506
507        //@}
508
509        // -----------------------------------------------------------------------
510        //  Grammar preparsing interface
511        // -----------------------------------------------------------------------
512
513        /** @name Implementation of Grammar preparsing interface's. */
514        //@{
515        /**
516          * Preparse schema grammar (XML Schema, DTD, etc.) via an input source
517          * object.
518          *
519          * This method invokes the preparsing process on a schema grammar XML
520          * file specified by the SAX InputSource parameter. If the 'toCache' flag
521          * is enabled, the parser will cache the grammars for re-use. If a grammar
522          * key is found in the pool, no caching of any grammar will take place.
523          *
524          *
525          * @param source A const reference to the SAX InputSource object which
526          *               points to the schema grammar file to be preparsed.
527          * @param grammarType The grammar type (Schema or DTD).
528          * @param toCache If <code>true</code>, we cache the preparsed grammar,
529          *                otherwise, no caching. Default is <code>false</code>.
530          * @return The preparsed schema grammar object (SchemaGrammar or
531          *         DTDGrammar). That grammar object is owned by the parser.
532          *
533          * @exception SAXException Any SAX exception, possibly
534          *            wrapping another exception.
535          * @exception XMLException An exception from the parser or client
536          *            handler code.
537          * @exception DOMException A DOM exception as per DOM spec.
538          *
539          * @see InputSource#InputSource
540          */
541        Grammar* loadGrammar(const InputSource& source,
542                                                 const Grammar::GrammarType grammarType,
543                                                 const bool toCache = false);
544
545        /**
546          * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL
547          *
548          * This method invokes the preparsing process on a schema grammar XML
549          * file specified by the file path parameter. If the 'toCache' flag
550          * is enabled, the parser will cache the grammars for re-use. If a grammar
551          * key is found in the pool, no caching of any grammar will take place.
552          *
553          *
554          * @param systemId A const XMLCh pointer to the Unicode string which
555          *                 contains the path to the XML grammar file to be
556          *                 preparsed.
557          * @param grammarType The grammar type (Schema or DTD).
558          * @param toCache If <code>true</code>, we cache the preparsed grammar,
559          *                otherwise, no caching. Default is <code>false</code>.
560          * @return The preparsed schema grammar object (SchemaGrammar or
561          *         DTDGrammar). That grammar object is owned by the parser.
562          *
563          * @exception SAXException Any SAX exception, possibly
564          *            wrapping another exception.
565          * @exception XMLException An exception from the parser or client
566          *            handler code.
567          * @exception DOMException A DOM exception as per DOM spec.
568          */
569        Grammar* loadGrammar(const XMLCh* const systemId,
570                                                 const Grammar::GrammarType grammarType,
571                                                 const bool toCache = false);
572
573        /**
574          * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL
575          *
576          * This method invokes the preparsing process on a schema grammar XML
577          * file specified by the file path parameter. If the 'toCache' flag
578          * is enabled, the parser will cache the grammars for re-use. If a grammar
579          * key is found in the pool, no caching of any grammar will take place.
580          *
581          *
582          * @param systemId A const char pointer to a native string which contains
583          *                 the path to the XML grammar file to be preparsed.
584          * @param grammarType The grammar type (Schema or DTD).
585          * @param toCache If <code>true</code>, we cache the preparsed grammar,
586          *                otherwise, no caching. Default is <code>false</code>.
587          * @return The preparsed schema grammar object (SchemaGrammar or
588          *         DTDGrammar). That grammar object is owned by the parser.
589          *
590          * @exception SAXException Any SAX exception, possibly
591          *            wrapping another exception.
592          * @exception XMLException An exception from the parser or client
593          *            handler code.
594          * @exception DOMException A DOM exception as per DOM spec.
595          */
596        Grammar* loadGrammar(const char* const systemId,
597                                                 const Grammar::GrammarType grammarType,
598                                                 const bool toCache = false);
599
600        /**
601          * This method allows the user to reset the pool of cached grammars.
602          */
603        void resetCachedGrammarPool();
604
605        //@}
606
607    /** Called by the XMLParserImpl. This is a factory method to provide it with a templated version
608      * of the XMLDocumentDisseminator; each of which is optimized for SAX, SAX2 or DOM, depending
609      * on the parser being used.
610      *
611      */
612    virtual XMLDocumentDisseminator * createDocumentDisseminator();
613
614private :
615        // -----------------------------------------------------------------------
616        //  Initialize/Cleanup methods
617        // -----------------------------------------------------------------------
618        void resetParse();
619
620        // -----------------------------------------------------------------------
621        //  Unimplemented constructors and operators
622        // -----------------------------------------------------------------------
623        XercesDOMParser(const XercesDOMParser&);
624        XercesDOMParser& operator=(const XercesDOMParser&);
625
626        // -----------------------------------------------------------------------
627        //  Private data members
628        //
629        //  fEntityResolver
630        //      The installed SAX entity resolver, if any. Null if none.
631        //
632        //  fErrorHandler
633        //      The installed SAX error handler, if any. Null if none.
634        //-----------------------------------------------------------------------
635        EntityResolver*          fEntityResolver;
636        XMLEntityResolver*       fXMLEntityResolver;
637        ErrorHandler*            fErrorHandler;
638};
639
640
641
642// ---------------------------------------------------------------------------
643//  XercesDOMParser: Handlers for the XMLEntityHandler interface
644// ---------------------------------------------------------------------------
645inline void XercesDOMParser::endInputSource(const InputSource&)
646{
647        // The DOM entity resolver doesn't handle this
648}
649
650inline bool XercesDOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)
651{
652        // The DOM entity resolver doesn't handle this
653        return false;
654}
655
656inline void XercesDOMParser::resetEntities()
657{
658        // Nothing to do on this one
659}
660
661inline void XercesDOMParser::startInputSource(const InputSource&)
662{
663        // The DOM entity resolver doesn't handle this
664}
665
666
667// ---------------------------------------------------------------------------
668//  XercesDOMParser: Getter methods
669// ---------------------------------------------------------------------------
670inline ErrorHandler* XercesDOMParser::getErrorHandler()
671{
672        return fErrorHandler;
673}
674
675inline const ErrorHandler* XercesDOMParser::getErrorHandler() const
676{
677        return fErrorHandler;
678}
679
680inline EntityResolver* XercesDOMParser::getEntityResolver()
681{
682        return fEntityResolver;
683}
684
685inline const EntityResolver* XercesDOMParser::getEntityResolver() const
686{
687        return fEntityResolver;
688}
689
690inline XMLEntityResolver* XercesDOMParser::getXMLEntityResolver()
691{
692        return fXMLEntityResolver;
693}
694
695inline const XMLEntityResolver* XercesDOMParser::getXMLEntityResolver() const
696{
697        return fXMLEntityResolver;
698}
699
700XERCES_CPP_NAMESPACE_END
701
702#endif
Note: See TracBrowser for help on using the repository browser.