source: icXML/icXML-devel/src/icxercesc/framework/XMLRecognizer.hpp @ 2720

Last change on this file since 2720 was 2720, checked in by cameron, 6 years ago

Initial check-in of icXML 0.8 source files

File size: 5.5 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 *  $Id: XMLRecognizer.hpp 555320 2007-07-11 16:05:13Z amassari $
20 */
21
22#if !defined(XERCESC_INCLUDE_GUARD_XMLRECOGNIZER_HPP)
23#define XERCESC_INCLUDE_GUARD_XMLRECOGNIZER_HPP
24
25#include <xercesc/util/XercesDefs.hpp>
26#include <xercesc/util/PlatformUtils.hpp>
27
28XERCES_CPP_NAMESPACE_BEGIN
29
30/**
31 *  This class provides some simple code to recognize the encodings of
32 *  XML files. This recognition only does very basic sensing of the encoding
33 *  in a broad sense. Basically its just enough to let us get started and
34 *  read the XMLDecl line. The scanner, once it reads the XMLDecl, will
35 *  tell the reader any actual encoding string it found and the reader can
36 *  update itself to be more specific at that point.
37 */
38class XMLPARSER_EXPORT XMLRecognizer
39{
40public :
41    // -----------------------------------------------------------------------
42    //  Class types
43    //
44    //  This enum represents the various encoding families that we have to
45    //  deal with individually at the scanner level. This does not indicate
46    //  the exact encoding, just the rough family that would let us scan
47    //  the XML/TextDecl to find the encoding string.
48    //
49    //  The 'L's and 'B's stand for little or big endian.
50    //
51    //  OtherEncoding means that its some transcoder based encoding, i.e. not
52    //  one of the ones that we do internally. Its a special case and should
53    //  never be used directly outside of the reader.
54    //
55    //  NOTE: Keep this in sync with the name map array in the Cpp file!!
56    // -----------------------------------------------------------------------
57    enum Encodings
58    {
59        EBCDIC          = 0
60        , UCS_4B        = 1
61        , UCS_4L        = 2
62        , US_ASCII      = 3
63        , UTF_8         = 4
64        , UTF_16B       = 5
65        , UTF_16L       = 6
66        , XERCES_XMLCH  = 7
67
68        , Encodings_Count
69        , Encodings_Min = EBCDIC
70        , Encodings_Max = XERCES_XMLCH
71
72        , OtherEncoding = 999
73    };
74
75    // -----------------------------------------------------------------------
76    //  Public, const static data
77    //
78    //  These are the byte sequences for each of the encodings that we can
79    //  auto sense, and their lengths.
80    // -----------------------------------------------------------------------
81    static const char           fgASCIIPre[];
82    static const XMLSize_t      fgASCIIPreLen;
83    static const XMLByte        fgEBCDICPre[];
84    static const XMLSize_t      fgEBCDICPreLen;
85    static const XMLByte        fgUTF16BPre[];
86    static const XMLByte        fgUTF16LPre[];
87    static const XMLSize_t      fgUTF16PreLen;
88    static const XMLByte        fgUCS4BPre[];
89    static const XMLByte        fgUCS4LPre[];
90    static const XMLSize_t      fgUCS4PreLen;
91    static const char           fgUTF8BOM[];
92    static const XMLSize_t      fgUTF8BOMLen;
93
94
95    // -----------------------------------------------------------------------
96    //  Encoding recognition methods
97    // -----------------------------------------------------------------------
98    static Encodings basicEncodingProbe
99    (
100        const   XMLByte* const      rawBuffer
101        , const XMLSize_t           rawByteCount
102    );
103
104    static Encodings encodingForName
105    (
106        const   XMLCh* const    theEncName
107    );
108
109    static const XMLCh* nameForEncoding(const Encodings theEncoding
110        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
111
112
113    static inline bool isSwapped(const XMLRecognizer::Encodings fEncoding);
114
115protected :
116    // -----------------------------------------------------------------------
117    //  Unimplemented constructors, operators, and destructor
118    //
119    //  This class is effectively being used as a namespace for some static
120    //  methods.
121    //
122    //   (these functions are protected rather than private only to get rid of
123    //    some annoying compiler warnings.)
124    //
125    // -----------------------------------------------------------------------
126    XMLRecognizer();
127    ~XMLRecognizer();
128
129private:
130    // -----------------------------------------------------------------------
131    //  Unimplemented constructors and operators
132    // -----------------------------------------------------------------------
133    XMLRecognizer(const XMLRecognizer&);   
134    XMLRecognizer& operator=(const XMLRecognizer&);
135};
136
137//
138//  This is called when the encoding flag is set and just sets the fSwapped
139//  flag appropriately.
140//
141bool
142XMLRecognizer::isSwapped(const XMLRecognizer::Encodings fEncoding)
143{
144    if (XMLPlatformUtils::fgXMLChBigEndian)
145    {
146        return ((fEncoding == XMLRecognizer::UTF_16L) || (fEncoding == XMLRecognizer::UCS_4L));
147    }
148    else
149    {
150        return ((fEncoding == XMLRecognizer::UTF_16B) || (fEncoding == XMLRecognizer::UCS_4B));
151    }
152}
153
154
155XERCES_CPP_NAMESPACE_END
156
157#endif
Note: See TracBrowser for help on using the repository browser.