source: icXML/icXML-devel/src/xercesc/framework/XMLFormatter.hpp @ 2722

Last change on this file since 2722 was 2722, checked in by cameron, 6 years ago

Original Xerces files with import mods for icxercesc

File size: 16.8 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: XMLFormatter.hpp 932887 2010-04-11 13:04:59Z borisk $
20 */
21
22#if !defined(XERCESC_INCLUDE_GUARD_XMLFORMATTER_HPP)
23#define XERCESC_INCLUDE_GUARD_XMLFORMATTER_HPP
24
25#include <icxercesc/util/PlatformUtils.hpp>
26
27XERCES_CPP_NAMESPACE_BEGIN
28
29class XMLFormatTarget;
30class XMLTranscoder;
31
32/**
33 *  This class provides the basic formatting capabilities that are required
34 *  to turn the Unicode based XML data from the parsers into a form that can
35 *  be used on non-Unicode based systems, that is, into local or generic text
36 *  encodings.
37 *
38 *  A number of flags are provided to control whether various optional
39 *  formatting operations are performed.
40 */
41class XMLPARSER_EXPORT XMLFormatter : public XMemory
42{
43public:
44    // -----------------------------------------------------------------------
45    //  Class types
46    // -----------------------------------------------------------------------
47    /** @name Public Constants */
48    //@{
49    /**
50     * EscapeFlags - Different styles of escape flags to control various formatting.
51     *
52     * <p><code>NoEscapes:</code>
53     * No character needs to be escaped.   Just write them out as is.</p>
54     * <p><code>StdEscapes:</code>
55     * The following characters need to be escaped:</p>
56     * <table border='1'>
57     * <tr>
58     * <td>character</td>
59     * <td>should be escaped and written as</td>
60     * </tr>
61     * <tr>
62     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
63     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
64     * </tr>
65     * <tr>
66     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
67     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
68     * </tr>
69     * <tr>
70     * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
71     * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
72     * </tr>
73     * <tr>
74     * <td valign='top' rowspan='1' colspan='1'>&lt;</td>
75     * <td valign='top' rowspan='1' colspan='1'>&amp;lt;</td>
76     * </tr>
77     * <tr>
78     * <td valign='top' rowspan='1' colspan='1'>&apos;</td>
79     * <td valign='top' rowspan='1' colspan='1'>&amp;apos;</td>
80     * </tr>
81     * </table>
82     * <p><code>AttrEscapes:</code>
83     * The following characters need to be escaped:</p>
84     * <table border='1'>
85     * <tr>
86     * <td>character</td>
87     * <td>should be escaped and written as</td>
88     * </tr>
89     * <tr>
90     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
91     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
92     * </tr>
93     * <tr>
94     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
95     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
96     * </tr>
97     * <tr>
98     * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
99     * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
100     * </tr>
101     * </table>
102     * <p><code>CharEscapes:</code>
103     * The following characters need to be escaped:</p>
104     * <table border='1'>
105     * <tr>
106     * <td>character</td>
107     * <td>should be escaped and written as</td>
108     * </tr>
109     * <tr>
110     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
111     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
112     * </tr>
113     * <tr>
114     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
115     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
116     * </tr>
117     * </table>
118     * <p><code>EscapeFlags_Count:</code>
119     * Special value, do not use directly.</p>
120     * <p><code>DefaultEscape:</code>
121     * Special value, do not use directly.</p>
122     *
123     */
124    enum EscapeFlags
125    {
126        NoEscapes
127        , StdEscapes
128        , AttrEscapes
129        , CharEscapes
130
131        // Special values, don't use directly
132        , EscapeFlags_Count
133        , DefaultEscape     = 999
134    };
135
136    /**
137     * UnRepFlags
138     *
139     * The unrepresentable flags that indicate how to react when a
140     * character cannot be represented in the target encoding.
141     *
142     * <p><code>UnRep_Fail:</code>
143     * Fail the operation.</p>
144     * <p><code>UnRep_CharRef:</code>
145     * Display the unrepresented character as reference.</p>
146     * <p><code>UnRep_Replace:</code>
147     * Replace the unrepresented character with the replacement character.</p>
148     * <p><code>DefaultUnRep:</code>
149     * Special value, do not use directly.</p>
150     *
151     */
152    enum UnRepFlags
153    {
154        UnRep_Fail
155        , UnRep_CharRef
156        , UnRep_Replace
157
158        , DefaultUnRep      = 999
159    };
160    //@}
161
162
163    // -----------------------------------------------------------------------
164    //  Constructors and Destructor
165    // -----------------------------------------------------------------------
166    /** @name Constructor and Destructor */
167    //@{
168    /**
169     * @param outEncoding the encoding for the formatted content.
170     * @param docVersion  the document version.
171     * @param target      the formatTarget where the formatted content is written to.
172     * @param escapeFlags the escape style for certain character.
173     * @param unrepFlags  the reaction to unrepresentable character.
174     * @param manager     Pointer to the memory manager to be used to
175     *                    allocate objects.
176     */
177    XMLFormatter
178    (
179        const   XMLCh* const            outEncoding
180        , const XMLCh* const            docVersion
181        ,       XMLFormatTarget* const  target
182        , const EscapeFlags             escapeFlags = NoEscapes
183        , const UnRepFlags              unrepFlags = UnRep_Fail
184        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
185    );
186
187    XMLFormatter
188    (
189        const   char* const             outEncoding
190        , const char* const             docVersion
191        ,       XMLFormatTarget* const  target
192        , const EscapeFlags             escapeFlags = NoEscapes
193        , const UnRepFlags              unrepFlags = UnRep_Fail
194        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
195    );
196
197    XMLFormatter
198    (
199        const   XMLCh* const            outEncoding
200        ,       XMLFormatTarget* const  target
201        , const EscapeFlags             escapeFlags = NoEscapes
202        , const UnRepFlags              unrepFlags = UnRep_Fail
203        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
204    );
205
206    XMLFormatter
207    (
208        const   char* const             outEncoding
209        ,       XMLFormatTarget* const  target
210        , const EscapeFlags             escapeFlags = NoEscapes
211        , const UnRepFlags              unrepFlags = UnRep_Fail
212        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
213    );
214
215    ~XMLFormatter();
216    //@}
217
218
219    // -----------------------------------------------------------------------
220    //  Formatting methods
221    // -----------------------------------------------------------------------
222    /** @name Formatting methods */
223    //@{
224    /**
225     * @param toFormat the string to be formatted
226     * @param count    length of the string
227     * @param escapeFlags the escape style for formatting toFormat
228     * @param unrepFlags the reaction for any unrepresentable character in toFormat
229     *
230     */
231    void formatBuf
232    (
233        const   XMLCh* const    toFormat
234        , const XMLSize_t       count
235        , const EscapeFlags     escapeFlags = DefaultEscape
236        , const UnRepFlags      unrepFlags = DefaultUnRep
237    );
238
239    /**
240     * @see formatBuf
241     */
242    XMLFormatter& operator<<
243    (
244        const   XMLCh* const    toFormat
245    );
246
247    XMLFormatter& operator<<
248    (
249        const   XMLCh           toFormat
250    );
251
252    void writeBOM(const XMLByte* const toFormat
253                , const XMLSize_t      count);
254
255    //@}
256
257    // -----------------------------------------------------------------------
258    //  Getter methods
259    // -----------------------------------------------------------------------
260    /** @name Getter methods */
261    //@{
262    /**
263     * @return return the encoding set for the formatted content
264     */
265
266    const XMLCh* getEncodingName() const;
267
268    /**
269     * @return return constant transcoder used internally for transcoding the formatter conent
270     */
271    inline const XMLTranscoder*   getTranscoder() const;
272
273    /**
274     * @return return the transcoder used internally for transcoding the formatter content
275     */
276    inline XMLTranscoder*   getTranscoder();
277
278   //@}
279
280    // -----------------------------------------------------------------------
281    //  Setter methods
282    // -----------------------------------------------------------------------
283    /** @name Setter methods */
284    //@{
285    /**
286     * @param newFlags set the escape style for the follow-on formatted content
287     */
288    void setEscapeFlags
289    (
290        const   EscapeFlags     newFlags
291    );
292
293    /**
294     * @param newFlags set the reaction for unrepresentable character
295     */
296    void setUnRepFlags
297    (
298        const   UnRepFlags      newFlags
299    );
300
301    /**
302     * @param newFlags set the escape style for the follow-on formatted content
303     * @see setEscapeFlags
304     */
305    XMLFormatter& operator<<
306    (
307        const   EscapeFlags     newFlags
308    );
309
310    /**
311     * @param newFlags set the reaction for unrepresentable character
312     * @see setUnRepFlags
313     */
314    XMLFormatter& operator<<
315    (
316        const   UnRepFlags      newFlags
317    );
318    //@}
319
320    // -----------------------------------------------------------------------
321    //  Getter methods
322    // -----------------------------------------------------------------------
323    /** @name Setter methods */
324    //@{
325    /**
326     * @return return the escape style for the formatted content
327     */
328    EscapeFlags getEscapeFlags() const;
329
330    /**
331     * @return return the reaction for unrepresentable character
332     */
333    UnRepFlags getUnRepFlags() const;
334    //@}
335
336private :
337    // -----------------------------------------------------------------------
338    //  Unimplemented constructors and operators
339    // -----------------------------------------------------------------------
340    XMLFormatter();
341    XMLFormatter(const XMLFormatter&);
342    XMLFormatter& operator=(const XMLFormatter&);
343
344
345    // -----------------------------------------------------------------------
346    //  Private class constants
347    // -----------------------------------------------------------------------
348    enum Constants
349    {
350        kTmpBufSize     = 16 * 1024
351    };
352
353
354    // -----------------------------------------------------------------------
355    //  Private helper methods
356    // -----------------------------------------------------------------------
357    const XMLByte* getCharRef(XMLSize_t     &count,
358                              XMLByte*      &ref,
359                              const XMLCh *  stdRef);
360
361    void writeCharRef(const XMLCh &toWrite);
362    void writeCharRef(XMLSize_t toWrite);
363
364    bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
365                    , const XMLCh                     toCheck);
366
367
368    XMLSize_t handleUnEscapedChars(const XMLCh *      srcPtr,
369                                   const XMLSize_t    count,
370                                   const UnRepFlags   unrepFlags);
371
372    void specialFormat
373    (
374        const   XMLCh* const    toFormat
375        , const XMLSize_t       count
376        , const EscapeFlags     escapeFlags
377    );
378
379
380    // -----------------------------------------------------------------------
381    //  Private, non-virtual methods
382    //
383    //  fEscapeFlags
384    //      The escape flags we were told to use in formatting. These are
385    //      defaults set in the ctor, which can be overridden on a particular
386    //      call.
387    //
388    //  fOutEncoding
389    //      This the name of the output encoding. Saved mainly for meaningful
390    //      error messages.
391    //
392    //  fTarget
393    //      This is the target object for the formatting operation.
394    //
395    //  fUnRepFlags
396    //      The unrepresentable flags that indicate how to react when a
397    //      character cannot be represented in the target encoding.
398    //
399    //  fXCoder
400    //      This the transcoder that we will use. It is created using the
401    //      encoding name we were told to use.
402    //
403    //  fTmpBuf
404    //      An output buffer that we use to transcode chars into before we
405    //      send them off to be output.
406    //
407    //  fAposRef
408    //  fAmpRef
409    //  fGTRef
410    //  fLTRef
411    //  fQuoteRef
412    //      These are character refs for the standard char refs, in the
413    //      output encoding. They are faulted in as required, by transcoding
414    //      them from fixed Unicode versions.
415    //
416    //  fIsXML11
417    //      for performance reason, we do not store the actual version string
418    //      and do the string comparison again and again.
419    //
420    // -----------------------------------------------------------------------
421    EscapeFlags                 fEscapeFlags;
422    XMLCh*                      fOutEncoding;
423    XMLFormatTarget*            fTarget;
424    UnRepFlags                  fUnRepFlags;
425    XMLTranscoder*              fXCoder;
426    XMLByte                     fTmpBuf[kTmpBufSize + 4];
427    XMLByte*                    fAposRef;
428    XMLSize_t                   fAposLen;
429    XMLByte*                    fAmpRef;
430    XMLSize_t                   fAmpLen;
431    XMLByte*                    fGTRef;
432    XMLSize_t                   fGTLen;
433    XMLByte*                    fLTRef;
434    XMLSize_t                   fLTLen;
435    XMLByte*                    fQuoteRef;
436    XMLSize_t                   fQuoteLen;
437    bool                        fIsXML11;
438    MemoryManager*              fMemoryManager;
439};
440
441
442class XMLPARSER_EXPORT XMLFormatTarget : public XMemory
443{
444public:
445    // -----------------------------------------------------------------------
446    //  Constructors and Destructor
447    // -----------------------------------------------------------------------
448    virtual ~XMLFormatTarget() {}
449
450
451    // -----------------------------------------------------------------------
452    //  Virtual interface
453    // -----------------------------------------------------------------------
454    virtual void writeChars
455    (
456          const XMLByte* const      toWrite
457        , const XMLSize_t           count
458        ,       XMLFormatter* const formatter
459    ) = 0;
460
461    virtual void flush() {};
462
463
464protected :
465    // -----------------------------------------------------------------------
466    //  Hidden constructors and operators
467    // -----------------------------------------------------------------------
468    XMLFormatTarget() {};
469
470private:
471    // -----------------------------------------------------------------------
472    //  Unimplemented constructors and operators
473    // -----------------------------------------------------------------------
474    XMLFormatTarget(const XMLFormatTarget&);
475    XMLFormatTarget& operator=(const XMLFormatTarget&);
476};
477
478
479// ---------------------------------------------------------------------------
480//  XMLFormatter: Getter methods
481// ---------------------------------------------------------------------------
482inline const XMLCh* XMLFormatter::getEncodingName() const
483{
484    return fOutEncoding;
485}
486
487inline const XMLTranscoder* XMLFormatter::getTranscoder() const
488{
489    return fXCoder;
490}
491
492inline XMLTranscoder* XMLFormatter::getTranscoder()
493{
494    return fXCoder;
495}
496
497// ---------------------------------------------------------------------------
498//  XMLFormatter: Setter methods
499// ---------------------------------------------------------------------------
500inline void XMLFormatter::setEscapeFlags(const EscapeFlags newFlags)
501{
502    fEscapeFlags = newFlags;
503}
504
505inline void XMLFormatter::setUnRepFlags(const UnRepFlags newFlags)
506{
507    fUnRepFlags = newFlags;
508}
509
510
511inline XMLFormatter& XMLFormatter::operator<<(const EscapeFlags newFlags)
512{
513    fEscapeFlags = newFlags;
514    return *this;
515}
516
517inline XMLFormatter& XMLFormatter::operator<<(const UnRepFlags newFlags)
518{
519    fUnRepFlags = newFlags;
520    return *this;
521}
522
523// ---------------------------------------------------------------------------
524//  XMLFormatter: Getter methods
525// ---------------------------------------------------------------------------
526inline XMLFormatter::EscapeFlags XMLFormatter::getEscapeFlags() const
527{
528    return fEscapeFlags;
529}
530
531inline XMLFormatter::UnRepFlags XMLFormatter::getUnRepFlags() const
532{
533    return fUnRepFlags;
534}
535
536XERCES_CPP_NAMESPACE_END
537
538#endif
Note: See TracBrowser for help on using the repository browser.