source: icXML/icXML-devel/src/xercesc/parsers/XercesDOMParser.hpp @ 2736

Last change on this file since 2736 was 2722, checked in by cameron, 7 years ago

Original Xerces files with import mods for icxercesc

File size: 25.1 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: XercesDOMParser.hpp 932887 2010-04-11 13:04:59Z borisk $
20 */
21
22#if !defined(XERCESC_INCLUDE_GUARD_XERCESDOMPARSER_HPP)
23#define XERCESC_INCLUDE_GUARD_XERCESDOMPARSER_HPP
24
25
26#include <icxercesc/parsers/AbstractDOMParser.hpp>
27#include <xercesc/validators/common/Grammar.hpp>
28
29XERCES_CPP_NAMESPACE_BEGIN
30
31
32class EntityResolver;
33class ErrorHandler;
34class XMLEntityResolver;
35class XMLResourceIdentifier;
36
37 /**
38  * This class implements the Document Object Model (DOM) interface.
39  * It should be used by applications which choose to parse and
40  * process the XML document using the DOM api's. This implementation
41  * also allows the applications to install an error and an entity
42  * handler (useful extensions to the DOM specification).
43  *
44  * <p>It can be used to instantiate a validating or non-validating
45  * parser, by setting a member flag.</p>
46  */
47class PARSERS_EXPORT XercesDOMParser : public AbstractDOMParser
48{
49public :
50    // -----------------------------------------------------------------------
51    //  Constructors and Destructor
52    // -----------------------------------------------------------------------
53
54    /** @name Constructors and Destructor */
55    //@{
56    /** Construct a XercesDOMParser, with an optional validator
57      *
58      * Constructor with an instance of validator class to use for
59      * validation. If you don't provide a validator, a default one will
60      * be created for you in the scanner.
61      *
62      * @param gramPool   Pointer to the grammar pool instance from
63      *                   external application.
64      *                   The parser does NOT own it.
65      *
66      * @param valToAdopt Pointer to the validator instance to use. The
67      *                   parser is responsible for freeing the memory.
68      * @param  manager   Pointer to the memory manager to be used to
69      *                   allocate objects.
70      */
71    XercesDOMParser
72    (
73          XMLValidator* const   valToAdopt = 0
74        , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
75        , XMLGrammarPool* const gramPool = 0
76    );
77
78    /**
79      * Destructor
80      */
81    virtual ~XercesDOMParser();
82
83    //@}
84
85
86    // -----------------------------------------------------------------------
87    //  Getter methods
88    // -----------------------------------------------------------------------
89
90    /** @name Getter methods */
91    //@{
92
93    /** Get a pointer to the error handler
94      *
95      * This method returns the installed error handler. If no handler
96      * has been installed, then it will be a zero pointer.
97      *
98      * @return The pointer to the installed error handler object.
99      */
100    ErrorHandler* getErrorHandler();
101
102    /** Get a const pointer to the error handler
103      *
104      * This method returns the installed error handler.  If no handler
105      * has been installed, then it will be a zero pointer.
106      *
107      * @return A const pointer to the installed error handler object.
108      */
109    const ErrorHandler* getErrorHandler() const;
110
111    /** Get a pointer to the entity resolver
112      *
113      * This method returns the installed entity resolver.  If no resolver
114      * has been installed, then it will be a zero pointer.
115      *
116      * @return The pointer to the installed entity resolver object.
117      */
118    EntityResolver* getEntityResolver();
119
120    /** Get a const pointer to the entity resolver
121      *
122      * This method returns the installed entity resolver. If no resolver
123      * has been installed, then it will be a zero pointer.
124      *
125      * @return A const pointer to the installed entity resolver object.
126      */
127    const EntityResolver* getEntityResolver() const;
128
129    /**
130      * Get a pointer to the entity resolver
131      *
132      * This method returns the installed entity resolver.  If no resolver
133      * has been installed, then it will be a zero pointer.
134      *
135      * @return The pointer to the installed entity resolver object.
136      */
137    XMLEntityResolver* getXMLEntityResolver();
138
139    /**
140      * Get a const pointer to the entity resolver
141      *
142      * This method returns the installed entity resolver. If no resolver
143      * has been installed, then it will be a zero pointer.
144      *
145      * @return A const pointer to the installed entity resolver object.
146      */
147    const XMLEntityResolver* getXMLEntityResolver() const;
148
149    /** Get the 'Grammar caching' flag
150      *
151      * This method returns the state of the parser's grammar caching when
152      * parsing an XML document.
153      *
154      * @return true, if the parser is currently configured to
155      *         cache grammars, false otherwise.
156      *
157      * @see #cacheGrammarFromParse
158      */
159    bool isCachingGrammarFromParse() const;
160
161    /** Get the 'Use cached grammar' flag
162      *
163      * This method returns the state of the parser's use of cached grammar
164      * when parsing an XML document.
165      *
166      * @return true, if the parser is currently configured to
167      *         use cached grammars, false otherwise.
168      *
169      * @see #useCachedGrammarInParse
170      */
171    bool isUsingCachedGrammarInParse() const;
172
173    /**
174     * Retrieve the grammar that is associated with the specified namespace key
175     *
176     * @param  nameSpaceKey Namespace key
177     * @return Grammar associated with the Namespace key.
178     */
179    Grammar* getGrammar(const XMLCh* const nameSpaceKey);
180
181    /**
182     * Retrieve the grammar where the root element is declared.
183     *
184     * @return Grammar where root element declared
185     */
186    Grammar* getRootGrammar();
187
188    /**
189     * Returns the string corresponding to a URI id from the URI string pool.
190     *
191     * @param uriId id of the string in the URI string pool.
192     * @return URI string corresponding to the URI id.
193     */
194    const XMLCh* getURIText(unsigned int uriId) const;
195
196    /**
197     * Returns the current src offset within the input source.
198     * To be used only while parsing is in progress.
199     *
200     * @return offset within the input source
201     */
202    XMLFilePos getSrcOffset() const;
203
204    /** Get the 'ignore cached DTD grammar' flag
205      *
206      * @return true, if the parser is currently configured to
207      *         ignore cached DTD, false otherwise.
208      *
209      * @see #setIgnoreCachedDTD
210      */
211    bool getIgnoreCachedDTD() const;
212
213    //@}
214
215
216    // -----------------------------------------------------------------------
217    //  Setter methods
218    // -----------------------------------------------------------------------
219
220    /** @name Setter methods */
221    //@{
222
223    /** Set the error handler
224      *
225      * This method allows applications to install their own error handler
226      * to trap error and warning messages.
227      *
228      * <i>Any previously set handler is merely dropped, since the parser
229      * does not own them.</i>
230      *
231      * @param handler  A const pointer to the user supplied error
232      *                 handler.
233      *
234      * @see #getErrorHandler
235      */
236    void setErrorHandler(ErrorHandler* const handler);
237
238    /** Set the entity resolver
239      *
240      * This method allows applications to install their own entity
241      * resolver. By installing an entity resolver, the applications
242      * can trap and potentially redirect references to external
243      * entities.
244      *
245      * <i>Any previously set entity resolver is merely dropped, since the parser
246      * does not own them.  If both setEntityResolver and setXMLEntityResolver
247      * are called, then the last one is used.</i>
248      *
249      * @param handler  A const pointer to the user supplied entity
250      *                 resolver.
251      *
252      * @see #getEntityResolver
253      */
254    void setEntityResolver(EntityResolver* const handler);
255
256    /**
257      * Set the entity resolver
258      *
259      * This method allows applications to install their own entity
260      * resolver. By installing an entity resolver, the applications
261      * can trap and potentially redirect references to external
262      * entities.
263      *
264      * <i>Any previously set entity resolver is merely dropped, since the parser
265      * does not own them.  If both setEntityResolver and setXMLEntityResolver
266      * are called, then the last one set is used.</i>
267      *
268      * @param handler  A const pointer to the user supplied entity
269      *                 resolver.
270      *
271      * @see #getXMLEntityResolver
272      */
273    void setXMLEntityResolver(XMLEntityResolver* const handler);
274
275    /** Set the 'Grammar caching' flag
276      *
277      * This method allows users to enable or disable caching of grammar when
278      * parsing XML documents. When set to true, the parser will cache the
279      * resulting grammar for use in subsequent parses.
280      *
281      * If the flag is set to true, the 'Use cached grammar' flag will also be
282      * set to true.
283      *
284      * The parser's default state is: false.
285      *
286      * @param newState The value specifying whether we should cache grammars
287      *                 or not.
288      *
289      * @see #isCachingGrammarFromParse
290      * @see #useCachedGrammarInParse
291      */
292    void cacheGrammarFromParse(const bool newState);
293
294    /** Set the 'Use cached grammar' flag
295      *
296      * This method allows users to enable or disable the use of cached
297      * grammars.  When set to true, the parser will use the cached grammar,
298      * instead of building the grammar from scratch, to validate XML
299      * documents.
300      *
301      * If the 'Grammar caching' flag is set to true, this method ignore the
302      * value passed in.
303      *
304      * The parser's default state is: false.
305      *
306      * @param newState The value specifying whether we should use the cached
307      *                 grammar or not.
308      *
309      * @see #isUsingCachedGrammarInParse
310      * @see #cacheGrammarFromParse
311      */
312    void useCachedGrammarInParse(const bool newState);
313
314    /** Set the 'ignore cached DTD grammar' flag
315      *
316      * This method gives users the option to ignore a cached DTD grammar, when
317      * an XML document contains both an internal and external DTD, and the use
318      * cached grammar from parse option is enabled. Currently, we do not allow
319      * using cached DTD grammar when an internal subset is present in the
320      * document. This option will only affect the behavior of the parser when
321      * an internal and external DTD both exist in a document (i.e. no effect
322      * if document has no internal subset).
323      *
324      * The parser's default state is false
325      *
326      * @param newValue The state to set
327      */
328    void setIgnoreCachedDTD(const bool newValue);
329
330    //@}
331
332    // -----------------------------------------------------------------------
333    //  Utility methods
334    // -----------------------------------------------------------------------
335
336    /** @name Utility methods */
337    //@{
338    /** Reset the documents vector pool and release all the associated memory
339      * back to the system.
340      *
341      * When parsing a document using a DOM parser, all memory allocated
342      * for a DOM tree is associated to the DOM document.
343      *
344      * If you do multiple parse using the same DOM parser instance, then
345      * multiple DOM documents will be generated and saved in a vector pool.
346      * All these documents (and thus all the allocated memory)
347      * won't be deleted until the parser instance is destroyed.
348      *
349      * If you don't need these DOM documents anymore and don't want to
350      * destroy the DOM parser instance at this moment, then you can call this method
351      * to reset the document vector pool and release all the allocated memory
352      * back to the system.
353      *
354      * It is an error to call this method if you are in the middle of a
355      * parse (e.g. in the mid of a progressive parse).
356      *
357      * @exception IOException An exception from the parser if this function
358      *            is called when a parse is in progress.
359      *
360      */
361    void resetDocumentPool();
362
363    //@}
364
365    // -----------------------------------------------------------------------
366    //  Implementation of the XMLErrorReporter interface.
367    // -----------------------------------------------------------------------
368
369    /** @name Implementation of the XMLErrorReporter interface. */
370    //@{
371
372    /** Handle errors reported from the parser
373      *
374      * This method is used to report back errors found while parsing the
375      * XML file. This method is also borrowed from the SAX specification.
376      * It calls the corresponding user installed Error Handler method:
377      * 'fatal', 'error', 'warning' depending on the severity of the error.
378      * This classification is defined by the XML specification.
379      *
380      * @param errCode An integer code for the error.
381      * @param msgDomain A const pointer to an Unicode string representing
382      *                  the message domain to use.
383      * @param errType An enumeration classifying the severity of the error.
384      * @param errorText A const pointer to an Unicode string representing
385      *                  the text of the error message.
386      * @param systemId  A const pointer to an Unicode string representing
387      *                  the system id of the XML file where this error
388      *                  was discovered.
389      * @param publicId  A const pointer to an Unicode string representing
390      *                  the public id of the XML file where this error
391      *                  was discovered.
392      * @param lineNum   The line number where the error occurred.
393      * @param colNum    The column number where the error occurred.
394      * @see ErrorHandler
395      */
396    virtual void error
397    (
398        const   unsigned int                errCode
399        , const XMLCh* const                msgDomain
400        , const XMLErrorReporter::ErrTypes  errType
401        , const XMLCh* const                errorText
402        , const XMLCh* const                systemId
403        , const XMLCh* const                publicId
404        , const XMLFileLoc                  lineNum
405        , const XMLFileLoc                  colNum
406    );
407
408    /** Reset any error data before a new parse
409     *
410      * This method allows the user installed Error Handler callback to
411      * 'reset' itself.
412      *
413      * <b>This method is a no-op for this DOM
414      * implementation.</b>
415      */
416    virtual void resetErrors();
417    //@}
418
419
420    // -----------------------------------------------------------------------
421    //  Implementation of the XMLEntityHandler interface.
422    // -----------------------------------------------------------------------
423
424    /** @name Implementation of the XMLEntityHandler interface. */
425    //@{
426
427    /** Handle an end of input source event
428      *
429      * This method is used to indicate the end of parsing of an external
430      * entity file.
431      *
432      * <b>This method is a no-op for this DOM
433      * implementation.</b>
434      *
435      * @param inputSource A const reference to the InputSource object
436      *                    which points to the XML file being parsed.
437      * @see InputSource
438      */
439    virtual void endInputSource(const InputSource& inputSource);
440
441    /** Expand a system id
442      *
443      * This method allows an installed XMLEntityHandler to further
444      * process any system id's of external entities encountered in
445      * the XML file being parsed, such as redirection etc.
446      *
447      * <b>This method always returns 'false'
448      * for this DOM implementation.</b>
449      *
450      * @param systemId  A const pointer to an Unicode string representing
451      *                  the system id scanned by the parser.
452      * @param toFill    A pointer to a buffer in which the application
453      *                  processed system id is stored.
454      * @return 'true', if any processing is done, 'false' otherwise.
455      */
456    virtual bool expandSystemId
457    (
458        const   XMLCh* const    systemId
459        ,       XMLBuffer&      toFill
460    );
461
462    /** Reset any entity handler information
463      *
464      * This method allows the installed XMLEntityHandler to reset
465      * itself.
466      *
467      * <b>This method is a no-op for this DOM
468      * implementation.</b>
469      */
470    virtual void resetEntities();
471
472    /** Resolve a public/system id
473      *
474      * This method allows a user installed entity handler to further
475      * process any pointers to external entities. The applications can
476      * implement 'redirection' via this callback.
477      *
478      * @param resourceIdentifier An object containing the type of
479      *        resource to be resolved and the associated data members
480      *        corresponding to this type.
481      * @return The value returned by the user installed resolveEntity
482      *         method or NULL otherwise to indicate no processing was done.
483      *         The returned InputSource is owned by the parser which is
484      *         responsible to clean up the memory.
485      * @see XMLEntityHandler
486      * @see XMLEntityResolver
487      */
488    virtual InputSource* resolveEntity
489    (
490        XMLResourceIdentifier* resourceIdentifier
491    );
492
493    /** Handle a 'start input source' event
494      *
495      * This method is used to indicate the start of parsing an external
496      * entity file.
497      *
498      * <b>This method is a no-op for this DOM parse
499      * implementation.</b>
500      *
501      * @param inputSource A const reference to the InputSource object
502      *                    which points to the external entity
503      *                    being parsed.
504      */
505    virtual void startInputSource(const InputSource& inputSource);
506
507    //@}
508
509    // -----------------------------------------------------------------------
510    //  Grammar preparsing interface
511    // -----------------------------------------------------------------------
512
513    /** @name Implementation of Grammar preparsing interface's. */
514    //@{
515    /**
516      * Preparse schema grammar (XML Schema, DTD, etc.) via an input source
517      * object.
518      *
519      * This method invokes the preparsing process on a schema grammar XML
520      * file specified by the SAX InputSource parameter. If the 'toCache' flag
521      * is enabled, the parser will cache the grammars for re-use. If a grammar
522      * key is found in the pool, no caching of any grammar will take place.
523      *
524      *
525      * @param source A const reference to the SAX InputSource object which
526      *               points to the schema grammar file to be preparsed.
527      * @param grammarType The grammar type (Schema or DTD).
528      * @param toCache If <code>true</code>, we cache the preparsed grammar,
529      *                otherwise, no caching. Default is <code>false</code>.
530      * @return The preparsed schema grammar object (SchemaGrammar or
531      *         DTDGrammar). That grammar object is owned by the parser.
532      *
533      * @exception SAXException Any SAX exception, possibly
534      *            wrapping another exception.
535      * @exception XMLException An exception from the parser or client
536      *            handler code.
537      * @exception DOMException A DOM exception as per DOM spec.
538      *
539      * @see InputSource#InputSource
540      */
541    Grammar* loadGrammar(const InputSource& source,
542                         const Grammar::GrammarType grammarType,
543                         const bool toCache = false);
544
545    /**
546      * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL
547      *
548      * This method invokes the preparsing process on a schema grammar XML
549      * file specified by the file path parameter. If the 'toCache' flag
550      * is enabled, the parser will cache the grammars for re-use. If a grammar
551      * key is found in the pool, no caching of any grammar will take place.
552      *
553      *
554      * @param systemId A const XMLCh pointer to the Unicode string which
555      *                 contains the path to the XML grammar file to be
556      *                 preparsed.
557      * @param grammarType The grammar type (Schema or DTD).
558      * @param toCache If <code>true</code>, we cache the preparsed grammar,
559      *                otherwise, no caching. Default is <code>false</code>.
560      * @return The preparsed schema grammar object (SchemaGrammar or
561      *         DTDGrammar). That grammar object is owned by the parser.
562      *
563      * @exception SAXException Any SAX exception, possibly
564      *            wrapping another exception.
565      * @exception XMLException An exception from the parser or client
566      *            handler code.
567      * @exception DOMException A DOM exception as per DOM spec.
568      */
569    Grammar* loadGrammar(const XMLCh* const systemId,
570                         const Grammar::GrammarType grammarType,
571                         const bool toCache = false);
572
573    /**
574      * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL
575      *
576      * This method invokes the preparsing process on a schema grammar XML
577      * file specified by the file path parameter. If the 'toCache' flag
578      * is enabled, the parser will cache the grammars for re-use. If a grammar
579      * key is found in the pool, no caching of any grammar will take place.
580      *
581      *
582      * @param systemId A const char pointer to a native string which contains
583      *                 the path to the XML grammar file to be preparsed.
584      * @param grammarType The grammar type (Schema or DTD).
585      * @param toCache If <code>true</code>, we cache the preparsed grammar,
586      *                otherwise, no caching. Default is <code>false</code>.
587      * @return The preparsed schema grammar object (SchemaGrammar or
588      *         DTDGrammar). That grammar object is owned by the parser.
589      *
590      * @exception SAXException Any SAX exception, possibly
591      *            wrapping another exception.
592      * @exception XMLException An exception from the parser or client
593      *            handler code.
594      * @exception DOMException A DOM exception as per DOM spec.
595      */
596    Grammar* loadGrammar(const char* const systemId,
597                         const Grammar::GrammarType grammarType,
598                         const bool toCache = false);
599
600    /**
601      * This method allows the user to reset the pool of cached grammars.
602      */
603    void resetCachedGrammarPool();
604
605    //@}
606
607
608private :
609    // -----------------------------------------------------------------------
610    //  Initialize/Cleanup methods
611    // -----------------------------------------------------------------------
612    void resetParse();
613
614    // -----------------------------------------------------------------------
615    //  Unimplemented constructors and operators
616    // -----------------------------------------------------------------------
617    XercesDOMParser(const XercesDOMParser&);
618    XercesDOMParser& operator=(const XercesDOMParser&);
619
620    // -----------------------------------------------------------------------
621    //  Private data members
622    //
623    //  fEntityResolver
624    //      The installed SAX entity resolver, if any. Null if none.
625    //
626    //  fErrorHandler
627    //      The installed SAX error handler, if any. Null if none.
628    //-----------------------------------------------------------------------
629    EntityResolver*          fEntityResolver;
630    XMLEntityResolver*       fXMLEntityResolver;
631    ErrorHandler*            fErrorHandler;
632};
633
634
635
636// ---------------------------------------------------------------------------
637//  XercesDOMParser: Handlers for the XMLEntityHandler interface
638// ---------------------------------------------------------------------------
639inline void XercesDOMParser::endInputSource(const InputSource&)
640{
641    // The DOM entity resolver doesn't handle this
642}
643
644inline bool XercesDOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)
645{
646    // The DOM entity resolver doesn't handle this
647    return false;
648}
649
650inline void XercesDOMParser::resetEntities()
651{
652    // Nothing to do on this one
653}
654
655inline void XercesDOMParser::startInputSource(const InputSource&)
656{
657    // The DOM entity resolver doesn't handle this
658}
659
660
661// ---------------------------------------------------------------------------
662//  XercesDOMParser: Getter methods
663// ---------------------------------------------------------------------------
664inline ErrorHandler* XercesDOMParser::getErrorHandler()
665{
666    return fErrorHandler;
667}
668
669inline const ErrorHandler* XercesDOMParser::getErrorHandler() const
670{
671    return fErrorHandler;
672}
673
674inline EntityResolver* XercesDOMParser::getEntityResolver()
675{
676    return fEntityResolver;
677}
678
679inline const EntityResolver* XercesDOMParser::getEntityResolver() const
680{
681    return fEntityResolver;
682}
683
684inline XMLEntityResolver* XercesDOMParser::getXMLEntityResolver()
685{
686    return fXMLEntityResolver;
687}
688
689inline const XMLEntityResolver* XercesDOMParser::getXMLEntityResolver() const
690{
691    return fXMLEntityResolver;
692}
693
694XERCES_CPP_NAMESPACE_END
695
696#endif
Note: See TracBrowser for help on using the repository browser.