source: icXML/icXML-devel/src/xercesc/util/regx/XMLRangeFactory.cpp @ 2722

Last change on this file since 2722 was 2722, checked in by cameron, 6 years ago

Original Xerces files with import mods for icxercesc

File size: 8.9 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: XMLRangeFactory.cpp 678879 2008-07-22 20:05:05Z amassari $
20 */
21
22// ---------------------------------------------------------------------------
23//  Includes
24// ---------------------------------------------------------------------------
25#include <xercesc/util/regx/XMLRangeFactory.hpp>
26#include <xercesc/internal/CharTypeTables.hpp>
27#include <xercesc/util/regx/RegxDefs.hpp>
28#include <xercesc/util/regx/TokenFactory.hpp>
29#include <xercesc/util/regx/RangeToken.hpp>
30#include <xercesc/util/regx/RangeTokenMap.hpp>
31#include <xercesc/util/regx/UnicodeRangeFactory.hpp>
32#include <xercesc/util/Janitor.hpp>
33#include <string.h>
34
35XERCES_CPP_NAMESPACE_BEGIN
36
37// ---------------------------------------------------------------------------
38//  Local static functions
39// ---------------------------------------------------------------------------
40static void setupRange(XMLInt32* const rangeMap,
41                       const XMLCh* const theTable,
42                       unsigned int startingIndex) {
43
44    const XMLCh* pchCur = theTable;
45
46    // Do the ranges first
47    while (*pchCur)
48    {
49        rangeMap[startingIndex++] = *pchCur++;
50    }
51
52    // Skip the range terminator
53    pchCur++;
54
55    // And then the singles until we hit its terminator
56    while (*pchCur) {
57
58        const XMLCh chSingle = *pchCur++;
59        rangeMap[startingIndex++] = chSingle;
60        rangeMap[startingIndex++] = chSingle;
61    }
62}
63
64static unsigned int getTableLen(const XMLCh* const theTable) {
65
66    XMLSize_t rangeLen = XMLString::stringLen(theTable);
67
68    return (unsigned int)(rangeLen + 2*XMLString::stringLen(theTable + rangeLen + 1));
69}
70
71// ---------------------------------------------------------------------------
72//  XMLRangeFactory: Constructors and Destructor
73// ---------------------------------------------------------------------------
74XMLRangeFactory::XMLRangeFactory()
75{
76
77}
78
79XMLRangeFactory::~XMLRangeFactory() {
80
81}
82
83// ---------------------------------------------------------------------------
84//  XMLRangeFactory: Range creation methods
85// ---------------------------------------------------------------------------
86void XMLRangeFactory::buildRanges(RangeTokenMap *rangeTokMap) {
87
88    if (fRangesCreated)
89        return;
90
91    if (!fKeywordsInitialized) {
92        initializeKeywordMap(rangeTokMap);
93    }
94
95    TokenFactory* tokFactory = rangeTokMap->getTokenFactory();
96
97    // Create space ranges
98    unsigned int wsTblLen = getTableLen(gWhitespaceChars);
99    RangeToken* tok = tokFactory->createRange();
100    XMLInt32* wsRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
101    (
102        wsTblLen * sizeof(XMLInt32)
103    );//new XMLInt32[wsTblLen];
104
105    tok->setRangeValues(wsRange, wsTblLen);
106    setupRange(wsRange, gWhitespaceChars, 0);
107    // Build the internal map.
108    tok->createMap();
109    rangeTokMap->setRangeToken(fgXMLSpace, tok);
110
111    tok = RangeToken::complementRanges(tok, tokFactory);
112    // Build the internal map.
113    tok->createMap();
114    rangeTokMap->setRangeToken(fgXMLSpace, tok , true);
115
116    // Create digits ranges
117    tok = tokFactory->createRange();
118    unsigned int digitTblLen = getTableLen(gDigitChars);
119    XMLInt32* digitRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
120    (
121        digitTblLen * sizeof(XMLInt32)
122    );//new XMLInt32[digitTblLen];
123
124    tok->setRangeValues(digitRange, digitTblLen);
125    setupRange(digitRange, gDigitChars, 0);
126    // Build the internal map.
127    tok->createMap();
128    rangeTokMap->setRangeToken(fgXMLDigit, tok);
129
130    tok = RangeToken::complementRanges(tok, tokFactory);
131    // Build the internal map.
132    tok->createMap();
133    rangeTokMap->setRangeToken(fgXMLDigit, tok , true);
134
135    // Build word ranges
136    unsigned int baseTblLen = getTableLen(gBaseChars);
137    unsigned int ideoTblLen = getTableLen(gIdeographicChars);
138    unsigned int wordRangeLen = baseTblLen + ideoTblLen + digitTblLen;
139    XMLInt32* wordRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
140    (
141        wordRangeLen * sizeof(XMLInt32)
142    );//new XMLInt32[wordRangeLen];
143    ArrayJanitor<XMLInt32> janWordRange(wordRange, XMLPlatformUtils::fgMemoryManager);
144
145    setupRange(wordRange, gBaseChars, 0);
146    setupRange(wordRange, gIdeographicChars, baseTblLen);
147    memcpy(wordRange + baseTblLen + ideoTblLen, digitRange, digitTblLen * sizeof(XMLInt32));
148
149    // Create NameChar ranges
150    tok = tokFactory->createRange();
151    unsigned int combTblLen = getTableLen(gCombiningChars);
152    unsigned int extTblLen = getTableLen(gExtenderChars);
153    unsigned int nameTblLen = wordRangeLen + combTblLen + extTblLen;
154    XMLInt32* nameRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
155    (
156        (nameTblLen + 8) * sizeof(XMLInt32)
157    );//new XMLInt32[nameTblLen + 8];
158
159    tok->setRangeValues(nameRange, nameTblLen + 8);
160    memcpy(nameRange, wordRange, wordRangeLen * sizeof(XMLInt32));
161    setupRange(nameRange, gCombiningChars, wordRangeLen);
162    setupRange(nameRange, gExtenderChars, wordRangeLen + combTblLen);
163    nameRange[nameTblLen++] = chDash;
164    nameRange[nameTblLen++] = chDash;
165    nameRange[nameTblLen++] = chColon;
166    nameRange[nameTblLen++] = chColon;
167    nameRange[nameTblLen++] = chPeriod;
168    nameRange[nameTblLen++] = chPeriod;
169    nameRange[nameTblLen++] = chUnderscore;
170    nameRange[nameTblLen++] = chUnderscore;
171    tok->sortRanges();
172    tok->compactRanges();
173    // Build the internal map.
174    tok->createMap();
175    rangeTokMap->setRangeToken(fgXMLNameChar, tok);
176
177    tok = RangeToken::complementRanges(tok, tokFactory);
178    // Build the internal map.
179    tok->createMap();
180    rangeTokMap->setRangeToken(fgXMLNameChar, tok , true);
181
182    // Create initialNameChar ranges
183    tok = tokFactory->createRange();
184    unsigned int initialNameTblLen = baseTblLen + ideoTblLen;
185    XMLInt32* initialNameRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
186    (
187        (initialNameTblLen + 4) * sizeof(XMLInt32)
188    );//new XMLInt32[initialNameTblLen + 4];
189
190    tok->setRangeValues(initialNameRange, initialNameTblLen + 4);
191    memcpy(initialNameRange, wordRange, initialNameTblLen * sizeof(XMLInt32));
192    initialNameRange[initialNameTblLen++] = chColon;
193    initialNameRange[initialNameTblLen++] = chColon;
194    initialNameRange[initialNameTblLen++] = chUnderscore;
195    initialNameRange[initialNameTblLen++] = chUnderscore;
196    tok->sortRanges();
197    tok->compactRanges();
198    // Build the internal map.
199    tok->createMap();
200    rangeTokMap->setRangeToken(fgXMLInitialNameChar, tok);
201
202    tok = RangeToken::complementRanges(tok, tokFactory);
203    // Build the internal map.
204    tok->createMap();
205    rangeTokMap->setRangeToken(fgXMLInitialNameChar, tok , true);
206
207    // Create word range
208    // \w = [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters)
209    tok = tokFactory->createRange();
210    for(int i=0; i<=0xFFFF; i++)
211    {
212        unsigned short chType=UnicodeRangeFactory::getUniCategory(XMLUniCharacter::getType(i));
213        if(chType == UnicodeRangeFactory::CHAR_PUNCTUATION || 
214           chType == UnicodeRangeFactory::CHAR_SEPARATOR || 
215           chType == UnicodeRangeFactory::CHAR_OTHER)
216            tok->addRange(i, i);
217    }
218    tok->sortRanges();
219    tok->compactRanges();
220    // Build the internal map.
221    tok->createMap();
222    rangeTokMap->setRangeToken(fgXMLWord, tok , true);
223
224    tok = RangeToken::complementRanges(tok, tokFactory);
225    // Build the internal map.
226    tok->createMap();
227    rangeTokMap->setRangeToken(fgXMLWord, tok);
228
229
230    fRangesCreated = true;
231}
232
233// ---------------------------------------------------------------------------
234//  XMLRangeFactory: Range creation methods
235// ---------------------------------------------------------------------------
236void XMLRangeFactory::initializeKeywordMap(RangeTokenMap *rangeTokMap) {
237
238    if (fKeywordsInitialized)
239        return;
240
241    rangeTokMap->addKeywordMap(fgXMLSpace, fgXMLCategory);
242    rangeTokMap->addKeywordMap(fgXMLDigit, fgXMLCategory);
243    rangeTokMap->addKeywordMap(fgXMLWord, fgXMLCategory);
244    rangeTokMap->addKeywordMap(fgXMLNameChar, fgXMLCategory);
245    rangeTokMap->addKeywordMap(fgXMLInitialNameChar, fgXMLCategory);
246
247    fKeywordsInitialized = true;
248}
249
250XERCES_CPP_NAMESPACE_END
251
252/**
253  * End of file XMLRangeFactory.cpp
254  */
Note: See TracBrowser for help on using the repository browser.