source: icXML/icXML-devel/src/xercesc/util/regx/RegxUtil.hpp @ 2722

Last change on this file since 2722 was 2722, checked in by cameron, 6 years ago

Original Xerces files with import mods for icxercesc

File size: 3.5 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: RegxUtil.hpp 678879 2008-07-22 20:05:05Z amassari $
20 */
21
22#if !defined(XERCESC_INCLUDE_GUARD_REGXUTIL_HPP)
23#define XERCESC_INCLUDE_GUARD_REGXUTIL_HPP
24
25// ---------------------------------------------------------------------------
26//  Includes
27// ---------------------------------------------------------------------------
28#include <xercesc/util/XMLUniDefs.hpp>
29
30
31XERCES_CPP_NAMESPACE_BEGIN
32
33class MemoryManager;
34
35class XMLUTIL_EXPORT RegxUtil {
36public:
37
38    // -----------------------------------------------------------------------
39    //  Constructors and destructors
40    // -----------------------------------------------------------------------
41    ~RegxUtil() {}
42
43    static XMLInt32 composeFromSurrogate(const XMLCh high, const XMLCh low);
44    static bool isEOLChar(const XMLCh);
45    static bool isWordChar(const XMLCh);
46    static bool isLowSurrogate(const XMLCh ch);
47    static bool isHighSurrogate(const XMLCh ch);
48    static void decomposeToSurrogates(XMLInt32 ch, XMLCh& high, XMLCh& low);
49
50    static XMLCh* decomposeToSurrogates(XMLInt32 ch,
51                                        MemoryManager* const manager);
52    static XMLCh* stripExtendedComment(const XMLCh* const expression,
53                                       MemoryManager* const manager = 0);
54
55private:
56    // -----------------------------------------------------------------------
57    //  Unimplemented constructors and operators
58    // -----------------------------------------------------------------------
59    RegxUtil();
60};
61
62
63inline bool RegxUtil::isEOLChar(const XMLCh ch) {
64
65    return (ch == chLF || ch == chCR || ch == chLineSeparator
66           || ch == chParagraphSeparator);
67}
68
69inline XMLInt32 RegxUtil::composeFromSurrogate(const XMLCh high, const XMLCh low) {
70    // see http://unicode.org/unicode/faq/utf_bom.html#35
71    const XMLInt32 SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
72    return (high << 10) + low + SURROGATE_OFFSET;
73}
74
75inline bool RegxUtil::isLowSurrogate(const XMLCh ch) {
76
77    return (ch & 0xFC00) == 0xDC00;
78}
79
80inline bool RegxUtil::isHighSurrogate(const XMLCh ch) {
81
82    return (ch & 0xFC00) == 0xD800;
83}
84
85inline void RegxUtil::decomposeToSurrogates(XMLInt32 ch, XMLCh& high, XMLCh& low) {
86    // see http://unicode.org/unicode/faq/utf_bom.html#35
87    const XMLInt32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
88    high = XMLCh(LEAD_OFFSET + (ch >> 10));
89    low = XMLCh(0xDC00 + (ch & 0x3FF));
90}
91
92inline bool RegxUtil::isWordChar(const XMLCh ch) {
93
94    if ((ch == chUnderscore)
95        || (ch >= chDigit_0 && ch <= chDigit_9)
96        || (ch >= chLatin_A && ch <= chLatin_Z)
97        || (ch >= chLatin_a && ch <= chLatin_z))
98        return true;
99
100    return false;
101}
102
103XERCES_CPP_NAMESPACE_END
104
105#endif
106
107/**
108  * End of file RegxUtil.hpp
109  */
Note: See TracBrowser for help on using the repository browser.