source: icXML/icXML-devel/src/icxercesc/util/XMLUCS4Transcoder.cpp @ 2720

Last change on this file since 2720 was 2720, checked in by cameron, 6 years ago

Initial check-in of icXML 0.8 source files

File size: 10.0 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18
19// ---------------------------------------------------------------------------
20//  Includes
21// ---------------------------------------------------------------------------
22#include <xercesc/util/BitOps.hpp>
23#include <xercesc/util/XMLUCS4Transcoder.hpp>
24#include <xercesc/util/TranscodingException.hpp>
25#include <string.h>
26
27XERCES_CPP_NAMESPACE_BEGIN
28
29// ---------------------------------------------------------------------------
30//  XMLUCS4Transcoder: Constructors and Destructor
31// ---------------------------------------------------------------------------
32XMLUCS4Transcoder::XMLUCS4Transcoder(const  XMLCh* const    encodingName
33                                    , const XMLSize_t       blockSize
34                                    , const bool            swapped
35                                    , MemoryManager* const manager) :
36
37    XMLTranscoder(encodingName, blockSize, manager)
38    , fSwapped(swapped)
39{
40}
41
42
43XMLUCS4Transcoder::~XMLUCS4Transcoder()
44{
45}
46
47
48// ---------------------------------------------------------------------------
49//  XMLUCS4Transcoder: Implementation of the transcoder API
50// ---------------------------------------------------------------------------
51XMLSize_t
52XMLUCS4Transcoder::transcodeFrom(const  XMLByte* const          srcData
53                                , const XMLSize_t               srcCount
54                                ,       XMLCh* const            toFill
55                                , const XMLSize_t               maxChars
56                                ,       XMLSize_t&              bytesEaten
57                                ,       unsigned char* const    charSizes)
58{
59    //
60    //  Get pointers to the start and end of the source buffer in terms of
61    //  UCS-4 characters.
62    //
63    const UCS4Ch*   srcPtr = (const UCS4Ch*)srcData;
64    const UCS4Ch*   srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch));
65
66    //
67    //  Get pointers to the start and end of the target buffer, which is
68    //  in terms of the XMLCh chars we output.
69    //
70    XMLCh*  outPtr = toFill;
71    XMLCh*  outEnd = toFill + maxChars;
72
73    //
74    //  And get a pointer into the char sizes buffer. We will run this
75    //  up as we put chars into the output buffer.
76    //
77    unsigned char* sizePtr = charSizes;
78
79    //
80    //  Now process chars until we either use up all our source or all of
81    //  our output space.
82    //
83    while ((outPtr < outEnd) && (srcPtr < srcEnd))
84    {
85        //
86        //  Get the next UCS char out of the buffer. Don't bump the ptr
87        //  yet since we might not have enough storage for it in the target
88        //  (if its causes a surrogate pair to be created.
89        //
90        UCS4Ch nextVal = *srcPtr;
91
92        // If it needs to be swapped, then do it
93        if (fSwapped)
94            nextVal = BitOps::swapBytes(nextVal);
95
96        // Handle a surrogate pair if needed
97        if (nextVal & 0xFFFF0000)
98        {
99            //
100            //  If we don't have room for both of the chars, then we
101            //  bail out now.
102            //
103            if (outPtr + 1 == outEnd)
104                break;
105
106            const XMLInt32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
107                const XMLCh ch1 = XMLCh(LEAD_OFFSET + (nextVal >> 10));
108                const XMLCh ch2 = XMLCh(0xDC00 + (nextVal & 0x3FF));
109
110            //
111            //  We have room so store them both. But note that the
112            //  second one took up no source bytes!
113            //
114            *sizePtr++ = sizeof(UCS4Ch);
115            *outPtr++ = ch1;
116            *sizePtr++ = 0;
117            *outPtr++ = ch2;
118        }
119         else
120        {
121            //
122            //  No surrogate, so just store it and bump the count of chars
123            //  read. Update the char sizes buffer for this char's entry.
124            //
125            *sizePtr++ = sizeof(UCS4Ch);
126            *outPtr++ = XMLCh(nextVal);
127        }
128
129        // Indicate that we ate another UCS char's worth of bytes
130        srcPtr++;
131    }
132
133    // Set the bytes eaten parameter
134    bytesEaten = ((const XMLByte*)srcPtr) - srcData;
135
136    // And return the chars written into the output buffer
137    return outPtr - toFill;
138}
139
140
141XMLSize_t
142XMLUCS4Transcoder::transcodeFrom
143(
144      const XMLByte* const          srcData
145    , const XMLSize_t               srcCount
146    ,       XMLBuffer &             toFill
147)
148{
149    toFill.ensureCapacity((srcCount * 2) / sizeof(XMLCh));
150
151    //
152    //  Get pointers to the start and end of the source buffer in terms of
153    //  UCS-4 characters.
154    //
155    const UCS4Ch*   srcPtr = (const UCS4Ch*)srcData;
156    const UCS4Ch*   srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch));
157
158    //
159    //  Get pointers to the start and end of the target buffer, which is
160    //  in terms of the XMLCh chars we output.
161    //
162    XMLCh*  outPtr = toFill.getRawBuffer();
163
164    //
165    //  Now process chars until we either use up all our source or all of
166    //  our output space.
167    //
168    while (srcPtr < srcEnd)
169    {
170        //
171        //  Get the next UCS char out of the buffer. Don't bump the ptr
172        //  yet since we might not have enough storage for it in the target
173        //  (if its causes a surrogate pair to be created.
174        //
175        UCS4Ch nextVal = *srcPtr;
176
177        // If it needs to be swapped, then do it
178        if (fSwapped)
179            nextVal = BitOps::swapBytes(nextVal);
180
181        // Handle a surrogate pair if needed
182        if (nextVal & 0xFFFF0000)
183        {
184            const XMLInt32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
185            const XMLCh ch1 = XMLCh(LEAD_OFFSET + (nextVal >> 10));
186            const XMLCh ch2 = XMLCh(0xDC00 + (nextVal & 0x3FF));
187
188            *outPtr++ = ch1;
189            *outPtr++ = ch2;
190        }
191         else
192        {
193            //
194            //  No surrogate, so just store it and bump the count of chars
195            //  read. Update the char sizes buffer for this char's entry.
196            //
197            *outPtr++ = XMLCh(nextVal);
198        }
199
200        // Indicate that we ate another UCS char's worth of bytes
201        srcPtr++;
202    }
203
204    const XMLSize_t outLen = outPtr - toFill.getRawBuffer();
205
206    toFill.setLen(outLen);
207
208    // And return the chars written into the output buffer
209    return outLen;
210
211}
212
213
214XMLSize_t
215XMLUCS4Transcoder::transcodeTo( const   XMLCh* const    srcData
216                                , const XMLSize_t       srcCount
217                                ,       XMLByte* const  toFill
218                                , const XMLSize_t       maxBytes
219                                ,       XMLSize_t&      charsEaten
220                                , const UnRepOpts)
221{
222    //
223    //  Get pointers to the start and end of the source buffer, which
224    //  is in terms of XMLCh chars.
225    //
226    const XMLCh*  srcPtr = srcData;
227    const XMLCh*  srcEnd = srcData + srcCount;
228
229    //
230    //  Get pointers to the start and end of the target buffer, in terms
231    //  of UCS-4 chars.
232    //
233    UCS4Ch*   outPtr = (UCS4Ch*)toFill;
234    UCS4Ch*   outEnd = outPtr + (maxBytes / sizeof(UCS4Ch));
235
236    //
237    //  Now loop until we either run out of source characters or we
238    //  fill up our output buffer.
239    //
240    XMLCh trailCh;
241    while ((outPtr < outEnd) && (srcPtr < srcEnd))
242    {
243        //
244        //  Get out an XMLCh char from the source. Don't bump up the
245        //  pointer yet, since it might be a leading for which we don't
246        //  have the trailing.
247        //
248        const XMLCh curCh = *srcPtr;
249
250        //
251        //  If its a leading char of a surrogate pair handle it one way,
252        //  else just cast it over into the target.
253        //
254        if ((curCh >= 0xD800) && (curCh <= 0xDBFF))
255        {
256            //
257            //  Ok, we have to have another source char available or we
258            //  just give up without eating the leading char.
259            //
260            if (srcPtr + 1 == srcEnd)
261                break;
262
263            //
264            //  We have the trailing char, so eat the first char and the
265            //  trailing char from the source.
266            //
267            srcPtr++;
268            trailCh = *srcPtr++;
269
270            //
271            //  Then make sure its a legal trailing char. If not, throw
272            //  an exception.
273            //
274            if ( !( (trailCh >= 0xDC00) && (trailCh <= 0xDFFF) ) )
275                ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadTrailingSurrogate, getMemoryManager());
276
277            // And now combine the two into a single output char
278            const XMLInt32 SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
279            *outPtr++ = (curCh << 10) + trailCh + SURROGATE_OFFSET;
280        }
281         else
282        {
283            //
284            //  Its just a char, so we can take it as is. If we need to
285            //  swap it, then swap it. Because of flakey compilers, use
286            //  a temp first.
287            //
288            const UCS4Ch tmpCh = UCS4Ch(curCh);
289            if (fSwapped)
290                *outPtr++ = BitOps::swapBytes(tmpCh);
291            else
292                *outPtr++ = tmpCh;
293
294            // Bump the source pointer
295            srcPtr++;
296        }
297    }
298
299    // Set the chars we ate from the source
300    charsEaten = srcPtr - srcData;
301
302    // Return the bytes we wrote to the output
303    return ((XMLByte*)outPtr) - toFill;
304}
305
306
307bool XMLUCS4Transcoder::canTranscodeTo(const unsigned int)
308{
309    // We can handle anything
310    return true;
311}
312
313XERCES_CPP_NAMESPACE_END
Note: See TracBrowser for help on using the repository browser.