source: icXML/icXML-devel/src/icxercesc/util/XMLUCS4Transcoder.cpp @ 6297

Last change on this file since 6297 was 3103, checked in by cameron, 6 years ago

Initial imports for icXML v0.9

File size: 12.6 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18
19// ---------------------------------------------------------------------------
20//  Includes
21// ---------------------------------------------------------------------------
22#include <xercesc/util/BitOps.hpp>
23#include <icxercesc/util/XMLUCS4Transcoder.hpp>
24#include <xercesc/util/TranscodingException.hpp>
25#include <string.h>
26
27XERCES_CPP_NAMESPACE_BEGIN
28
29// ---------------------------------------------------------------------------
30//  XMLUCS4Transcoder: Constructors and Destructor
31// ---------------------------------------------------------------------------
32XMLUCS4Transcoder::XMLUCS4Transcoder(const  XMLCh* const    encodingName
33                                    , const XMLSize_t       blockSize
34                                    , const bool            swapped
35                                    , MemoryManager* const manager) :
36
37    XMLTranscoder(encodingName, blockSize, manager)
38    , fSwapped(swapped)
39{
40}
41
42
43XMLUCS4Transcoder::~XMLUCS4Transcoder()
44{
45}
46
47
48// ---------------------------------------------------------------------------
49//  XMLUCS4Transcoder: Implementation of the transcoder API
50// ---------------------------------------------------------------------------
51XMLSize_t
52XMLUCS4Transcoder::transcodeFrom(const  XMLByte* const          srcData
53                                , const XMLSize_t               srcCount
54                                ,       XMLCh* const            toFill
55                                , const XMLSize_t               maxChars
56                                ,       XMLSize_t&              bytesEaten
57                                ,       unsigned char* const    charSizes)
58{
59    //
60    //  Get pointers to the start and end of the source buffer in terms of
61    //  UCS-4 characters.
62    //
63    const UCS4Ch*   srcPtr = (const UCS4Ch*)srcData;
64    const UCS4Ch*   srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch));
65
66    //
67    //  Get pointers to the start and end of the target buffer, which is
68    //  in terms of the XMLCh chars we output.
69    //
70    XMLCh*  outPtr = toFill;
71    XMLCh*  outEnd = toFill + maxChars;
72
73    //
74    //  And get a pointer into the char sizes buffer. We will run this
75    //  up as we put chars into the output buffer.
76    //
77    unsigned char* sizePtr = charSizes;
78
79    //
80    //  Now process chars until we either use up all our source or all of
81    //  our output space.
82    //
83    while ((outPtr < outEnd) && (srcPtr < srcEnd))
84    {
85        //
86        //  Get the next UCS char out of the buffer. Don't bump the ptr
87        //  yet since we might not have enough storage for it in the target
88        //  (if its causes a surrogate pair to be created.
89        //
90        UCS4Ch nextVal = *srcPtr;
91
92        // If it needs to be swapped, then do it
93        if (fSwapped)
94            nextVal = BitOps::swapBytes(nextVal);
95
96        // Handle a surrogate pair if needed
97        if (nextVal & 0xFFFF0000)
98        {
99            //
100            //  If we don't have room for both of the chars, then we
101            //  bail out now.
102            //
103            if (outPtr + 1 == outEnd)
104                break;
105
106            const XMLInt32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
107                const XMLCh ch1 = XMLCh(LEAD_OFFSET + (nextVal >> 10));
108                const XMLCh ch2 = XMLCh(0xDC00 + (nextVal & 0x3FF));
109
110            //
111            //  We have room so store them both. But note that the
112            //  second one took up no source bytes!
113            //
114            *sizePtr++ = sizeof(UCS4Ch);
115            *outPtr++ = ch1;
116            *sizePtr++ = 0;
117            *outPtr++ = ch2;
118        }
119         else
120        {
121            //
122            //  No surrogate, so just store it and bump the count of chars
123            //  read. Update the char sizes buffer for this char's entry.
124            //
125            *sizePtr++ = sizeof(UCS4Ch);
126            *outPtr++ = XMLCh(nextVal);
127        }
128
129        // Indicate that we ate another UCS char's worth of bytes
130        srcPtr++;
131    }
132
133    // Set the bytes eaten parameter
134    bytesEaten = ((const XMLByte*)srcPtr) - srcData;
135
136    // And return the chars written into the output buffer
137    return outPtr - toFill;
138}
139
140XMLSize_t
141XMLUCS4Transcoder::transcodeFrom(const  XMLByte* const          srcData
142                                , const XMLSize_t               srcCount
143                                ,       XMLCh* const            toFill
144                                , const XMLSize_t               maxChars
145                                ,       XMLSize_t&              bytesEaten)
146{
147    //
148    //  Get pointers to the start and end of the source buffer in terms of
149    //  UCS-4 characters.
150    //
151    const UCS4Ch*   srcPtr = (const UCS4Ch*)srcData;
152    const UCS4Ch*   srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch));
153
154    //
155    //  Get pointers to the start and end of the target buffer, which is
156    //  in terms of the XMLCh chars we output.
157    //
158    XMLCh*  outPtr = toFill;
159    XMLCh*  outEnd = toFill + maxChars;
160
161    //
162    //  Now process chars until we either use up all our source or all of
163    //  our output space.
164    //
165    while ((outPtr < outEnd) && (srcPtr < srcEnd))
166    {
167        //
168        //  Get the next UCS char out of the buffer. Don't bump the ptr
169        //  yet since we might not have enough storage for it in the target
170        //  (if its causes a surrogate pair to be created.
171        //
172        UCS4Ch nextVal = *srcPtr;
173
174        // If it needs to be swapped, then do it
175        if (fSwapped)
176            nextVal = BitOps::swapBytes(nextVal);
177
178        // Handle a surrogate pair if needed
179        if (nextVal & 0xFFFF0000)
180        {
181            //
182            //  If we don't have room for both of the chars, then we
183            //  bail out now.
184            //
185            if (outPtr + 1 == outEnd)
186                break;
187
188            const XMLInt32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
189            const XMLCh ch1 = XMLCh(LEAD_OFFSET + (nextVal >> 10));
190            const XMLCh ch2 = XMLCh(0xDC00 + (nextVal & 0x3FF));
191
192            //
193            //  We have room so store them both. But note that the
194            //  second one took up no source bytes!
195            //
196            *outPtr++ = ch1;
197            *outPtr++ = ch2;
198        }
199         else
200        {
201            //
202            //  No surrogate, so just store it and bump the count of chars
203            //  read. Update the char sizes buffer for this char's entry.
204            //
205            *outPtr++ = XMLCh(nextVal);
206        }
207
208        // Indicate that we ate another UCS char's worth of bytes
209        srcPtr++;
210    }
211
212    // Set the bytes eaten parameter
213    bytesEaten = ((const XMLByte*)srcPtr) - srcData;
214
215    // And return the chars written into the output buffer
216    return outPtr - toFill;
217}
218
219XMLSize_t
220XMLUCS4Transcoder::transcodeFrom
221(
222      const XMLByte* const          srcData
223    , const XMLSize_t               srcCount
224    ,       XMLBuffer &             toFill
225)
226{
227    toFill.ensureCapacity((srcCount * 2) / sizeof(XMLCh));
228
229    //
230    //  Get pointers to the start and end of the source buffer in terms of
231    //  UCS-4 characters.
232    //
233    const UCS4Ch*   srcPtr = (const UCS4Ch*)srcData;
234    const UCS4Ch*   srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch));
235
236    //
237    //  Get pointers to the start and end of the target buffer, which is
238    //  in terms of the XMLCh chars we output.
239    //
240    XMLCh*  outPtr = toFill.getRawBuffer();
241
242    //
243    //  Now process chars until we either use up all our source or all of
244    //  our output space.
245    //
246    while (srcPtr < srcEnd)
247    {
248        //
249        //  Get the next UCS char out of the buffer. Don't bump the ptr
250        //  yet since we might not have enough storage for it in the target
251        //  (if its causes a surrogate pair to be created.
252        //
253        UCS4Ch nextVal = *srcPtr;
254
255        // If it needs to be swapped, then do it
256        if (fSwapped)
257            nextVal = BitOps::swapBytes(nextVal);
258
259        // Handle a surrogate pair if needed
260        if (nextVal & 0xFFFF0000)
261        {
262            const XMLInt32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
263            const XMLCh ch1 = XMLCh(LEAD_OFFSET + (nextVal >> 10));
264            const XMLCh ch2 = XMLCh(0xDC00 + (nextVal & 0x3FF));
265
266            *outPtr++ = ch1;
267            *outPtr++ = ch2;
268        }
269         else
270        {
271            //
272            //  No surrogate, so just store it and bump the count of chars
273            //  read. Update the char sizes buffer for this char's entry.
274            //
275            *outPtr++ = XMLCh(nextVal);
276        }
277
278        // Indicate that we ate another UCS char's worth of bytes
279        srcPtr++;
280    }
281
282    const XMLSize_t outLen = outPtr - toFill.getRawBuffer();
283
284    toFill.setLen(outLen);
285
286    // And return the chars written into the output buffer
287    return outLen;
288
289}
290
291
292XMLSize_t
293XMLUCS4Transcoder::transcodeTo( const   XMLCh* const    srcData
294                                , const XMLSize_t       srcCount
295                                ,       XMLByte* const  toFill
296                                , const XMLSize_t       maxBytes
297                                ,       XMLSize_t&      charsEaten
298                                , const UnRepOpts)
299{
300    //
301    //  Get pointers to the start and end of the source buffer, which
302    //  is in terms of XMLCh chars.
303    //
304    const XMLCh*  srcPtr = srcData;
305    const XMLCh*  srcEnd = srcData + srcCount;
306
307    //
308    //  Get pointers to the start and end of the target buffer, in terms
309    //  of UCS-4 chars.
310    //
311    UCS4Ch*   outPtr = (UCS4Ch*)toFill;
312    UCS4Ch*   outEnd = outPtr + (maxBytes / sizeof(UCS4Ch));
313
314    //
315    //  Now loop until we either run out of source characters or we
316    //  fill up our output buffer.
317    //
318    XMLCh trailCh;
319    while ((outPtr < outEnd) && (srcPtr < srcEnd))
320    {
321        //
322        //  Get out an XMLCh char from the source. Don't bump up the
323        //  pointer yet, since it might be a leading for which we don't
324        //  have the trailing.
325        //
326        const XMLCh curCh = *srcPtr;
327
328        //
329        //  If its a leading char of a surrogate pair handle it one way,
330        //  else just cast it over into the target.
331        //
332        if ((curCh >= 0xD800) && (curCh <= 0xDBFF))
333        {
334            //
335            //  Ok, we have to have another source char available or we
336            //  just give up without eating the leading char.
337            //
338            if (srcPtr + 1 == srcEnd)
339                break;
340
341            //
342            //  We have the trailing char, so eat the first char and the
343            //  trailing char from the source.
344            //
345            srcPtr++;
346            trailCh = *srcPtr++;
347
348            //
349            //  Then make sure its a legal trailing char. If not, throw
350            //  an exception.
351            //
352            if ( !( (trailCh >= 0xDC00) && (trailCh <= 0xDFFF) ) )
353                ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadTrailingSurrogate, getMemoryManager());
354
355            // And now combine the two into a single output char
356            const XMLInt32 SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
357            *outPtr++ = (curCh << 10) + trailCh + SURROGATE_OFFSET;
358        }
359         else
360        {
361            //
362            //  Its just a char, so we can take it as is. If we need to
363            //  swap it, then swap it. Because of flakey compilers, use
364            //  a temp first.
365            //
366            const UCS4Ch tmpCh = UCS4Ch(curCh);
367            if (fSwapped)
368                *outPtr++ = BitOps::swapBytes(tmpCh);
369            else
370                *outPtr++ = tmpCh;
371
372            // Bump the source pointer
373            srcPtr++;
374        }
375    }
376
377    // Set the chars we ate from the source
378    charsEaten = srcPtr - srcData;
379
380    // Return the bytes we wrote to the output
381    return ((XMLByte*)outPtr) - toFill;
382}
383
384
385bool XMLUCS4Transcoder::canTranscodeTo(const unsigned int)
386{
387    // We can handle anything
388    return true;
389}
390
391bool XMLUCS4Transcoder::isSwapped() const
392{
393    return fSwapped;
394}
395
396
397XERCES_CPP_NAMESPACE_END
Note: See TracBrowser for help on using the repository browser.