source: icXML/icXML-devel/src/xercesc/util/Transcoders/ICU/ICUTransService.cpp @ 2722

Last change on this file since 2722 was 2722, checked in by cameron, 6 years ago

Original Xerces files with import mods for icxercesc

File size: 35.0 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: ICUTransService.cpp 901107 2010-01-20 08:45:02Z borisk $
20 */
21
22
23// ---------------------------------------------------------------------------
24//  Includes
25// ---------------------------------------------------------------------------
26#include <icxercesc/util/PlatformUtils.hpp>
27#include <xercesc/util/Janitor.hpp>
28#include <xercesc/util/TranscodingException.hpp>
29#include <icxercesc/util/XMLString.hpp>
30#include <xercesc/util/XMLUniDefs.hpp>
31#include "ICUTransService.hpp"
32#include <string.h>
33#include <unicode/uloc.h>
34#include <unicode/uchar.h>
35#include <unicode/ucnv.h>
36#include <unicode/ucnv_err.h>
37#include <unicode/ustring.h>
38#include <unicode/udata.h>
39#if (U_ICU_VERSION_MAJOR_NUM >= 2)
40    #include <unicode/uclean.h>
41#endif
42
43#if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)
44// Forward reference the symbol which points to the ICU converter data.
45#if (U_ICU_VERSION_MAJOR_NUM < 2)
46extern "C" const uint8_t U_IMPORT icudata_dat[];
47#endif
48#endif
49
50#if !defined(U16_NEXT_UNSAFE) && defined(UTF16_NEXT_CHAR_UNSAFE)
51    #define U16_NEXT_UNSAFE UTF16_NEXT_CHAR_UNSAFE
52#endif
53
54#if !defined(U16_APPEND_UNSAFE) && defined(UTF16_APPEND_CHAR_UNSAFE)
55    #define U16_APPEND_UNSAFE UTF16_APPEND_CHAR_UNSAFE
56#endif
57
58#if !defined(U_IS_BMP) && defined(UTF16_CHAR_LENGTH)
59    #define U_IS_BMP(c) (UTF16_CHAR_LENGTH(c)==1)
60#endif
61
62
63XERCES_CPP_NAMESPACE_BEGIN
64
65// ---------------------------------------------------------------------------
66//  Local, const data
67// ---------------------------------------------------------------------------
68static const XMLCh gMyServiceId[] =
69{
70    chLatin_I, chLatin_C, chLatin_U, chNull
71};
72
73static const XMLCh gS390Id[] =
74{
75    chLatin_S, chDigit_3, chDigit_9, chDigit_0, chNull
76};
77
78static const XMLCh gs390Id[] =
79{
80    chLatin_s, chDigit_3, chDigit_9, chDigit_0, chNull
81};
82
83static const XMLCh gswaplfnlId[] =
84{
85    chComma, chLatin_s, chLatin_w, chLatin_a, chLatin_p,
86    chLatin_l, chLatin_f, chLatin_n, chLatin_l, chNull
87};
88// ---------------------------------------------------------------------------
89//  Local functions
90// ---------------------------------------------------------------------------
91
92//
93//  When XMLCh and ICU's UChar are not the same size, we have to do a temp
94//  conversion of all strings. These local helper methods make that easier.
95//
96static UChar* convertToUChar( const XMLCh* const   toConvert
97                            , const XMLSize_t      srcLen = 0
98                            , MemoryManager* const manager = 0)
99{
100    const XMLSize_t actualLen = srcLen
101                                   ? srcLen : XMLString::stringLen(toConvert);
102
103    UChar* tmpBuf = (manager)
104        ? (UChar*) manager->allocate((actualLen + 1) * sizeof(UChar))
105                : new UChar[actualLen + 1];
106    const XMLCh* srcPtr = toConvert;
107    UChar* outPtr = tmpBuf;
108    while (*srcPtr)
109        *outPtr++ = UChar(*srcPtr++);
110    *outPtr = 0;
111
112    return tmpBuf;
113}
114
115
116static XMLCh* convertToXMLCh( const UChar* const toConvert,
117                            MemoryManager* const manager = 0)
118{
119    const unsigned int srcLen = u_strlen(toConvert);
120    XMLCh* retBuf = (manager)
121        ? (XMLCh*) manager->allocate((srcLen+1) * sizeof(XMLCh))
122        : new XMLCh[srcLen + 1];
123
124    XMLCh* outPtr = retBuf;
125    const UChar* srcPtr = toConvert;
126    while (*srcPtr)
127        *outPtr++ = XMLCh(*srcPtr++);
128    *outPtr = 0;
129
130    return retBuf;
131}
132
133
134
135
136// ---------------------------------------------------------------------------
137//  ICUTransService: Constructors and Destructor
138// ---------------------------------------------------------------------------
139ICUTransService::ICUTransService(MemoryManager*)
140{
141  // Starting with ICU 3.4 we don't need to call init anymore.
142  //
143#if (U_ICU_VERSION_MAJOR_NUM > 2 || (U_ICU_VERSION_MAJOR_NUM == 2 && U_ICU_VERSION_MINOR_NUM >= 6)) && \
144  (U_ICU_VERSION_MAJOR_NUM < 3 || (U_ICU_VERSION_MAJOR_NUM == 3 && U_ICU_VERSION_MINOR_NUM < 4))
145    UErrorCode errorCode=U_ZERO_ERROR;
146    u_init(&errorCode);
147    if(U_FAILURE(errorCode)) {
148        XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService);
149    }
150#endif
151
152#if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)
153#if (U_ICU_VERSION_MAJOR_NUM < 2)
154    // Starting with ICU 2.0, ICU itself includes a static reference to the data
155    // entrypoint symbol.
156    //
157    // ICU 1.8 (and previous) did not include a static reference, but would
158    // dynamically load the data dll when it was first needed, however this dynamic
159    // loading proved unreliable in some of the odd environments that Xerces needed
160    // to run in.  Hence, the static reference.
161
162    // Pass the location of the converter data to ICU. By doing so, we are
163    // forcing the load of ICU converter data DLL, after the Xerces-C DLL is
164    // loaded. This implies that Xerces-C, now has to explicitly link with the
165    // ICU converter dll. However, the advantage is that we no longer depend
166    // on the code which does demand dynamic loading of DLL's. The demand
167    // loading is highly system dependent and was a constant source of support
168    // calls.
169    UErrorCode uerr = U_ZERO_ERROR;
170    udata_setCommonData((void *) icudata_dat, &uerr);
171#endif
172#endif
173}
174
175ICUTransService::~ICUTransService()
176{
177    /*
178     * commented out the following clean up code
179     * in case users use ICU outside of the parser
180     * if we clean up here, users' code may crash
181     *
182    #if (U_ICU_VERSION_MAJOR_NUM >= 2)
183        // release all lazily allocated data
184        u_cleanup();
185    #endif
186    */
187}
188
189
190// ---------------------------------------------------------------------------
191//  ICUTransService: The virtual transcoding service API
192// ---------------------------------------------------------------------------
193int ICUTransService::compareIString(const   XMLCh* const    comp1
194                                    , const XMLCh* const    comp2)
195{
196    size_t  i = 0;
197    size_t  j = 0;
198
199    for(;;)
200    {
201        UChar32 ch1;
202        UChar32 ch2;
203
204        U16_NEXT_UNSAFE(comp1, i, ch1);
205        U16_NEXT_UNSAFE(comp2, j, ch2);
206
207        const UChar32   folded1 =
208            u_foldCase(ch1, U_FOLD_CASE_DEFAULT);
209
210        const UChar32   folded2 =
211            u_foldCase(ch2, U_FOLD_CASE_DEFAULT);
212
213        if (folded1 !=
214            folded2)
215        {
216            return folded1 - folded2;
217        }
218        else if (ch1 == 0)
219        {
220            // If ch1 is 0, the ch2 must also be
221            // 0.  Otherwise, the previous if
222            // would have failed.
223            break;
224        }
225    }
226
227    return 0;
228}
229
230
231int ICUTransService::compareNIString(const  XMLCh* const    comp1
232                                    , const XMLCh* const    comp2
233                                    , const XMLSize_t       maxChars)
234{
235    if (maxChars > 0)
236    {
237        // Note that this function has somewhat broken semantics, as it's
238        // possible for two strings of different lengths to compare as equal
239        // in a case-insensitive manner, since one character could be
240        // represented as a surrogate pair.
241        size_t  i = 0;
242        size_t  j = 0;
243
244        for(;;)
245        {
246            UChar32 ch1;
247            UChar32 ch2;
248
249            U16_NEXT_UNSAFE(comp1, i, ch1);
250            U16_NEXT_UNSAFE(comp2, j, ch2);
251
252            const UChar32   folded1 =
253                u_foldCase(ch1, U_FOLD_CASE_DEFAULT);
254
255            const UChar32   folded2 =
256                u_foldCase(ch2, U_FOLD_CASE_DEFAULT);
257
258            if (folded1 != folded2)
259            {
260                return folded1 - folded2;
261            }
262            else if (i == maxChars)
263            {
264                // If we're at the end of both strings, return 0.
265                // Otherwise, we've run out of characters in the
266                // left string, so return -1.
267                return j == maxChars ? 0 : -1;
268            }
269            else if (j == maxChars)
270            {
271                // We've run out of characters in the right string,
272                // but not the left, so return 1.
273                return 1;
274            }
275        }
276    }
277
278    return 0;
279}
280
281
282const XMLCh* ICUTransService::getId() const
283{
284    return gMyServiceId;
285}
286
287XMLLCPTranscoder* ICUTransService::makeNewLCPTranscoder(MemoryManager* manager)
288{
289    //
290    //  Try to create a default converter. If it fails, return a null
291    //  pointer which will basically cause the system to give up because
292    //  we really can't do anything without one.
293    //
294    UErrorCode uerr = U_ZERO_ERROR;
295    UConverter* converter = ucnv_open(NULL, &uerr);
296    if (!converter)
297        return 0;
298
299    // That went ok, so create an ICU LCP transcoder wrapper and return it
300    return new (manager) ICULCPTranscoder(converter);
301}
302
303
304bool ICUTransService::supportsSrcOfs() const
305{
306    // This implementation supports source offset information
307    return true;
308}
309
310
311template <class FunctionType>
312static void doCaseConvert(XMLCh*          convertString,
313                          FunctionType    caseFunction)
314{
315    // Note the semantics of this function are broken, since it's
316    // possible that changing the case of a string could increase
317    // its length, but there's no way to handle such a situation.
318    const XMLSize_t len = XMLString::stringLen(convertString);
319
320    size_t  readPos = 0;
321    size_t  writePos = 0;
322
323    while(readPos < len)
324    {
325        UChar32     original;
326
327        // Get the next Unicode code point.
328        U16_NEXT_UNSAFE(convertString, readPos, original);
329
330        // Convert the code point
331        const UChar32   converted = caseFunction(original);
332
333        // OK, now here's where it gets ugly.
334        if (!U_IS_BMP(converted) && U_IS_BMP(original) &&
335            readPos - writePos == 1)
336        {
337            // We do not have room to convert the
338            // character without overwriting the next
339            // character, so we will just stop.
340            break;
341        }
342        else
343        {
344            U16_APPEND_UNSAFE(convertString, writePos, converted);
345        }
346    }
347
348    convertString[writePos] = 0;
349}
350
351
352
353void ICUTransService::upperCase(XMLCh* const toUpperCase)
354{
355    doCaseConvert(toUpperCase, u_toupper);
356}
357
358void ICUTransService::lowerCase(XMLCh* const toLowerCase)
359{
360    doCaseConvert(toLowerCase, u_tolower);
361}
362
363
364
365// ---------------------------------------------------------------------------
366//  ICUTransService: The protected virtual transcoding service API
367// ---------------------------------------------------------------------------
368XMLTranscoder* ICUTransService::
369makeNewXMLTranscoder(const  XMLCh* const            encodingName
370                    ,       XMLTransService::Codes& resValue
371                    , const XMLSize_t               blockSize
372                    ,       MemoryManager* const    manager)
373{
374    //
375    //  For encodings that end with "s390" we need to strip off the "s390"
376    //  from the encoding name and add ",swaplfnl" to the encoding name
377    //  that we pass into ICU on the ucnv_openU.
378    //
379    XMLCh* encodingNameToUse = (XMLCh*) encodingName;
380    XMLCh* workBuffer = 0;
381
382    if ( (XMLString::endsWith(encodingNameToUse, gs390Id)) ||
383         (XMLString::endsWith(encodingNameToUse, gS390Id)) )
384    {
385       XMLSize_t workBufferSize = (XMLString::stringLen(encodingNameToUse) + XMLString::stringLen(gswaplfnlId) - XMLString::stringLen(gS390Id) + 1);
386       workBuffer = (XMLCh*) manager->allocate(workBufferSize * sizeof(XMLCh));
387       XMLSize_t moveSize = XMLString::stringLen(encodingNameToUse) - XMLString::stringLen(gS390Id);
388       XMLString::moveChars(workBuffer, encodingNameToUse, moveSize);
389       XMLString::moveChars((workBuffer + moveSize), gswaplfnlId, XMLString::stringLen(gswaplfnlId));
390       encodingNameToUse = workBuffer;
391    }
392
393    //
394    //  If UChar and XMLCh are not the same size, then we have premassage the
395    //  encoding name into a UChar type string.
396    //
397    const UChar* actualName;
398    UChar* tmpName = 0;
399    if (sizeof(UChar) == sizeof(XMLCh))
400    {
401        actualName = (const UChar*)encodingNameToUse;
402    }
403    else
404    {
405        tmpName = convertToUChar(encodingNameToUse, 0, manager);
406        actualName = tmpName;
407    }
408
409    ArrayJanitor<UChar> janTmp(tmpName, manager);
410    ArrayJanitor<XMLCh> janTmp1(workBuffer, manager);
411
412    UErrorCode uerr = U_ZERO_ERROR;
413    UConverter* converter = ucnv_openU(actualName, &uerr);
414    if (!converter)
415    {
416        resValue = XMLTransService::UnsupportedEncoding;
417        return 0;
418    }
419
420    return new (manager) ICUTranscoder(encodingName, converter, blockSize, manager);
421}
422
423
424
425
426// ---------------------------------------------------------------------------
427//  ICUTranscoder: Constructors and Destructor
428// ---------------------------------------------------------------------------
429ICUTranscoder::ICUTranscoder(const  XMLCh* const        encodingName
430                            ,       UConverter* const   toAdopt
431                            , const XMLSize_t           blockSize
432                            , MemoryManager* const      manager) :
433
434    XMLTranscoder(encodingName, blockSize, manager)
435    , fConverter(toAdopt)
436    , fFixed(false)
437    , fSrcOffsets(0)
438{
439    // If there is a block size, then allocate our source offset array
440    if (blockSize)
441        fSrcOffsets = (XMLUInt32*) manager->allocate
442        (
443            blockSize * sizeof(XMLUInt32)
444        );//new XMLUInt32[blockSize];
445
446    // Remember if its a fixed size encoding
447    fFixed = (ucnv_getMaxCharSize(fConverter) == ucnv_getMinCharSize(fConverter));
448}
449
450ICUTranscoder::~ICUTranscoder()
451{
452    getMemoryManager()->deallocate(fSrcOffsets);//delete [] fSrcOffsets;
453
454    // If there is a converter, ask ICU to clean it up
455    if (fConverter)
456    {
457        // <TBD> Does this actually delete the structure???
458        ucnv_close(fConverter);
459        fConverter = 0;
460    }
461}
462
463
464// ---------------------------------------------------------------------------
465//  ICUTranscoder: The virtual transcoder API
466// ---------------------------------------------------------------------------
467XMLSize_t
468ICUTranscoder::transcodeFrom(const  XMLByte* const          srcData
469                            , const XMLSize_t               srcCount
470                            ,       XMLCh* const            toFill
471                            , const XMLSize_t               maxChars
472                            ,       XMLSize_t&              bytesEaten
473                            ,       unsigned char* const    charSizes)
474{
475    // Set up pointers to the start and end of the source buffer
476    const XMLByte*  startSrc = srcData;
477    const XMLByte*  endSrc = srcData + srcCount;
478
479    //
480    //  And now do the target buffer. This works differently according to
481    //  whether XMLCh and UChar are the same size or not.
482    //
483    UChar* startTarget;
484    if (sizeof(XMLCh) == sizeof(UChar))
485        startTarget = (UChar*)toFill;
486     else
487        startTarget = (UChar*) getMemoryManager()->allocate
488        (
489            maxChars * sizeof(UChar)
490        );//new UChar[maxChars];
491    UChar* orgTarget = startTarget;
492
493    //
494    //  Transcode the buffer.  Buffer overflow errors are normal, occuring
495    //  when the raw input buffer holds more characters than will fit in
496    //  the Unicode output buffer.
497    //
498    UErrorCode  err = U_ZERO_ERROR;
499    ucnv_toUnicode
500    (
501        fConverter
502        , &startTarget
503        , startTarget + maxChars
504        , (const char**)&startSrc
505        , (const char*)endSrc
506        , (fFixed ? 0 : (int32_t*)fSrcOffsets)
507        , false
508        , &err
509    );
510
511    if ((err != U_ZERO_ERROR) && (err != U_BUFFER_OVERFLOW_ERROR))
512    {
513        if (orgTarget != (UChar*)toFill)
514            getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;
515
516        if (fFixed)
517        {
518            XMLCh tmpBuf[17];
519            XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16, getMemoryManager());
520            ThrowXMLwithMemMgr2
521            (
522                TranscodingException
523                , XMLExcepts::Trans_BadSrcCP
524                , tmpBuf
525                , getEncodingName()
526                , getMemoryManager()
527            );
528        }
529        else
530        {
531            ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
532        }
533    }
534
535    // Calculate the bytes eaten and store in caller's param
536    bytesEaten = startSrc - srcData;
537
538    // And the characters decoded
539    const XMLSize_t charsDecoded = startTarget - orgTarget;
540
541    //
542    //  Translate the array of char offsets into an array of character
543    //  sizes, which is what the transcoder interface semantics requires.
544    //  If its fixed, then we can optimize it.
545    //
546    if (fFixed)
547    {
548        const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter);
549        memset(charSizes, fillSize, maxChars);
550    }
551     else
552    {
553        //
554        //  We have to convert the series of offsets into a series of
555        //  sizes. If just one char was decoded, then its the total bytes
556        //  eaten. Otherwise, do a loop and subtract out each element from
557        //  its previous element.
558        //
559        if (charsDecoded == 1)
560        {
561            charSizes[0] = (unsigned char)bytesEaten;
562        }
563         else
564        {
565            //  ICU does not return an extra element to allow us to figure
566            //  out the last char size, so we have to compute it from the
567            //  total bytes used.
568            unsigned int index;
569            for (index = 0; index < charsDecoded - 1; index++)
570            {
571                charSizes[index] = (unsigned char)(fSrcOffsets[index + 1]
572                                                    - fSrcOffsets[index]);
573            }
574            if( charsDecoded > 0 ) {
575                charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten
576                                              - fSrcOffsets[charsDecoded - 1]);
577            }
578        }
579    }
580
581    //
582    //  If XMLCh and UChar are not the same size, then we need to copy over
583    //  the temp buffer to the new one.
584    //
585    if (sizeof(UChar) != sizeof(XMLCh))
586    {
587        XMLCh* outPtr = toFill;
588        startTarget = orgTarget;
589        for (unsigned int index = 0; index < charsDecoded; index++)
590            *outPtr++ = XMLCh(*startTarget++);
591
592        // And delete the temp buffer
593        getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;
594    }
595
596    // Return the chars we put into the target buffer
597    return charsDecoded;
598}
599
600
601XMLSize_t
602ICUTranscoder::transcodeTo( const   XMLCh* const    srcData
603                            , const XMLSize_t       srcCount
604                            ,       XMLByte* const  toFill
605                            , const XMLSize_t       maxBytes
606                            ,       XMLSize_t&      charsEaten
607                            , const UnRepOpts       options)
608{
609    //
610    //  Get a pointer to the buffer to transcode. If UChar and XMLCh are
611    //  the same size here, then use the original. Else, create a temp
612    //  one and put a janitor on it.
613    //
614    const UChar* srcPtr;
615    UChar* tmpBufPtr = 0;
616    if (sizeof(XMLCh) == sizeof(UChar))
617    {
618        srcPtr = (const UChar*)srcData;
619    }
620    else
621    {
622        tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager());
623        srcPtr = tmpBufPtr;
624    }
625    ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager());
626
627    //
628    //  Set the appropriate callback so that it will either fail or use
629    //  the rep char. Remember the old one so we can put it back.
630    //
631    UErrorCode  err = U_ZERO_ERROR;
632    UConverterFromUCallback oldCB = NULL;
633    #if (U_ICU_VERSION_MAJOR_NUM < 2)
634    void* orgContent;
635    #else
636    const void* orgContent;
637    #endif
638    ucnv_setFromUCallBack
639    (
640        fConverter
641        , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP
642                                   : UCNV_FROM_U_CALLBACK_SUBSTITUTE
643        , NULL
644        , &oldCB
645        , &orgContent
646        , &err
647    );
648
649    //
650    //  Ok, lets transcode as many chars as we we can in one shot. The
651    //  ICU API gives enough info not to have to do this one char by char.
652    //
653    XMLByte*        startTarget = toFill;
654    const UChar*    startSrc = srcPtr;
655    err = U_ZERO_ERROR;
656    ucnv_fromUnicode
657    (
658        fConverter
659        , (char**)&startTarget
660        , (char*)(startTarget + maxBytes)
661        , &startSrc
662        , srcPtr + srcCount
663        , 0
664        , false
665        , &err
666    );
667
668    // Rememember the status before we possibly overite the error code
669    const bool res = (err == U_ZERO_ERROR);
670
671    // Put the old handler back
672    err = U_ZERO_ERROR;
673    UConverterFromUCallback orgAction = NULL;
674
675    ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);
676
677    if (!res)
678    {
679        XMLCh tmpBuf[17];
680        XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16, getMemoryManager());
681        ThrowXMLwithMemMgr2
682        (
683            TranscodingException
684            , XMLExcepts::Trans_Unrepresentable
685            , tmpBuf
686            , getEncodingName()
687            , getMemoryManager()
688        );
689    }
690
691    // Fill in the chars we ate from the input
692    charsEaten = startSrc - srcPtr;
693
694    // Return the chars we stored
695    return startTarget - toFill;
696}
697
698
699bool ICUTranscoder::canTranscodeTo(const unsigned int toCheck)
700{
701    //
702    //  If the passed value is really a surrogate embedded together, then
703    //  we need to break it out into its two chars. Else just one. While
704    //  we are ate it, convert them to UChar format if required.
705    //
706    UChar           srcBuf[2];
707    unsigned int    srcCount = 1;
708    if (toCheck & 0xFFFF0000)
709    {
710        srcBuf[0] = UChar((toCheck >> 10) + 0xD800);
711        srcBuf[1] = UChar(toCheck & 0x3FF) + 0xDC00;
712        srcCount++;
713    }
714     else
715    {
716        srcBuf[0] = UChar(toCheck);
717    }
718
719    //
720    //  Set the callback so that it will fail instead of using the rep char.
721    //  Remember the old one so we can put it back.
722    //
723     UErrorCode  err = U_ZERO_ERROR;
724     UConverterFromUCallback oldCB = NULL;
725     #if (U_ICU_VERSION_MAJOR_NUM < 2)
726     void* orgContent;
727     #else
728     const void* orgContent;
729     #endif
730
731     ucnv_setFromUCallBack
732         (
733         fConverter
734         , UCNV_FROM_U_CALLBACK_STOP
735         , NULL
736         , &oldCB
737         , &orgContent
738         , &err
739         );
740
741    // Set upa temp buffer to format into. Make it more than big enough
742    char            tmpBuf[64];
743    char*           startTarget = tmpBuf;
744    const UChar*    startSrc = srcBuf;
745
746    err = U_ZERO_ERROR;
747    ucnv_fromUnicode
748    (
749        fConverter
750        , &startTarget
751        , startTarget + 64
752        , &startSrc
753        , srcBuf + srcCount
754        , 0
755        , false
756        , &err
757    );
758
759    // Save the result before we overight the error code
760    const bool res = (err == U_ZERO_ERROR);
761
762    // Put the old handler back
763    err = U_ZERO_ERROR;
764    UConverterFromUCallback orgAction = NULL;
765
766    ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);
767
768    return res;
769}
770
771
772
773// ---------------------------------------------------------------------------
774//  ICULCPTranscoder: Constructors and Destructor
775// ---------------------------------------------------------------------------
776ICULCPTranscoder::ICULCPTranscoder(UConverter* const toAdopt) :
777
778    fConverter(toAdopt)
779{
780}
781
782ICULCPTranscoder::~ICULCPTranscoder()
783{
784    // If there is a converter, ask ICU to clean it up
785    if (fConverter)
786    {
787        // <TBD> Does this actually delete the structure???
788        ucnv_close(fConverter);
789        fConverter = 0;
790    }
791}
792
793
794// ---------------------------------------------------------------------------
795//  ICULCPTranscoder: Constructors and Destructor
796// ---------------------------------------------------------------------------
797XMLSize_t ICULCPTranscoder::calcRequiredSize(const XMLCh* const srcText
798                                                , MemoryManager* const manager)
799{
800    if (!srcText)
801        return 0;
802
803    //
804    //  We do two different versions of this, according to whether XMLCh
805    //  is the same size as UChar or not.
806    //
807    UErrorCode err = U_ZERO_ERROR;
808    int32_t targetCap;
809    if (sizeof(XMLCh) == sizeof(UChar))
810    {
811        // Use a faux scope to synchronize while we do this
812        {
813            XMLMutexLock lockConverter(&fMutex);
814
815            targetCap = ucnv_fromUChars
816            (
817                fConverter
818                , 0
819                , 0
820                , (const UChar*)srcText
821                , -1
822                , &err
823            );
824        }
825    }
826    else
827    {
828        // Copy the source to a local temp
829        UChar* tmpBuf = convertToUChar(srcText, 0, manager);
830        ArrayJanitor<UChar> janTmp(tmpBuf, manager);
831
832        // Use a faux scope to synchronize while we do this
833        {
834            XMLMutexLock lockConverter(&fMutex);
835
836            targetCap = ucnv_fromUChars
837            (
838                fConverter
839                , 0
840                , 0
841                , tmpBuf
842                , -1
843                , &err
844            );
845        }
846    }
847
848    if (err != U_BUFFER_OVERFLOW_ERROR)
849        return 0;
850
851    return (XMLSize_t)targetCap;
852}
853
854XMLSize_t ICULCPTranscoder::calcRequiredSize(const char* const srcText
855                                                , MemoryManager* const /*manager*/)
856{
857    if (!srcText)
858        return 0;
859
860    int32_t targetCap;
861    UErrorCode err = U_ZERO_ERROR;
862
863    // Use a faux scope to synchronize while we do this
864    {
865        XMLMutexLock lockConverter(&fMutex);
866        targetCap = ucnv_toUChars
867        (
868            fConverter
869            , 0
870            , 0
871            , srcText
872            , (int32_t)strlen(srcText)
873            , &err
874        );
875    }
876
877    if (err != U_BUFFER_OVERFLOW_ERROR)
878        return 0;
879
880#if (U_ICU_VERSION_MAJOR_NUM < 2)
881    // Subtract one since it includes the terminator space
882    return (XMLSize_t)(targetCap - 1);
883#else
884    // Starting ICU 2.0, this is fixed and all ICU String functions have consistent NUL-termination behavior.
885    // The returned length is always the number of output UChar's, not counting an additional, terminating NUL.
886    return (XMLSize_t)(targetCap);
887#endif
888}
889
890
891char* ICULCPTranscoder::transcode(const XMLCh* const toTranscode,
892                                  MemoryManager* const manager)
893{
894    char* retBuf = 0;
895
896    // Check for a couple of special cases
897    if (!toTranscode)
898        return retBuf;
899
900    if (!*toTranscode)
901    {
902        retBuf = (char*) manager->allocate(sizeof(char));//new char[1];
903        retBuf[0] = 0;
904        return retBuf;
905    }
906
907    //
908    //  Get the length of the source string since we'll have to use it in
909    //  a couple places below.
910    //
911    const XMLSize_t srcLen = XMLString::stringLen(toTranscode);
912
913    //
914    //  If XMLCh and UChar are not the same size, then we have to make a
915    //  temp copy of the text to pass to ICU.
916    //
917    const UChar* actualSrc;
918    UChar* ncActual = 0;
919    if (sizeof(XMLCh) == sizeof(UChar))
920    {
921        actualSrc = (const UChar*)toTranscode;
922    }
923     else
924    {
925        // Allocate a non-const temp buf, but store it also in the actual
926        ncActual = convertToUChar(toTranscode, 0, manager);
927        actualSrc = ncActual;
928    }
929
930    // Insure that the temp buffer, if any, gets cleaned up via the nc pointer
931    ArrayJanitor<UChar> janTmp(ncActual, manager);
932
933    // Caculate a return buffer size not too big, but less likely to overflow
934    int32_t targetLen = (int32_t)(srcLen * 1.25);
935
936    // Allocate the return buffer
937    retBuf = (char*) manager->allocate((targetLen + 1) * sizeof(char));//new char[targetLen + 1];
938
939    //
940    //  Lock now while we call the converter. Use a faux block to do the
941    //  lock so that it unlocks immediately afterwards.
942    //
943    UErrorCode err = U_ZERO_ERROR;
944    int32_t targetCap;
945    {
946        XMLMutexLock lockConverter(&fMutex);
947
948        targetCap = ucnv_fromUChars
949        (
950            fConverter
951            , retBuf
952            , targetLen + 1
953            , actualSrc
954            , -1
955            , &err
956        );
957    }
958
959    // If targetLen is not enough then buffer overflow might occur
960    if ((err == U_BUFFER_OVERFLOW_ERROR) || (err == U_STRING_NOT_TERMINATED_WARNING))
961    {
962        //
963        //  Reset the error, delete the old buffer, allocate a new one,
964        //  and try again.
965        //
966        err = U_ZERO_ERROR;
967        manager->deallocate(retBuf);//delete [] retBuf;
968        retBuf = (char*) manager->allocate((targetCap + 1) * sizeof(char));//new char[targetCap + 1];
969
970        // Lock again before we retry
971        XMLMutexLock lockConverter(&fMutex);
972        targetCap = ucnv_fromUChars
973        (
974            fConverter
975            , retBuf
976            , targetCap + 1
977            , actualSrc
978            , -1
979            , &err
980        );
981    }
982
983    if (U_FAILURE(err))
984    {
985        manager->deallocate(retBuf);//delete [] retBuf;
986        return 0;
987    }
988
989    return retBuf;
990}
991
992XMLCh* ICULCPTranscoder::transcode(const char* const toTranscode,
993                                   MemoryManager* const manager)
994{
995    // Watch for a few pyscho corner cases
996    if (!toTranscode)
997        return 0;
998
999    if (!*toTranscode)
1000    {
1001        XMLCh* retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1];
1002        retVal[0] = 0;
1003        return retVal;
1004    }
1005
1006    //
1007    //  Get the length of the string to transcode. The Unicode string will
1008    //  almost always be no more chars than were in the source, so this is
1009    //  the best guess as to the storage needed.
1010    //
1011    const int32_t srcLen = (int32_t)strlen(toTranscode);
1012
1013    // We need a target buffer of UChars to fill in
1014    UChar* targetBuf = 0;
1015
1016    // Now lock while we do these calculations
1017    UErrorCode err = U_ZERO_ERROR;
1018    int32_t targetCap;
1019    {
1020        XMLMutexLock lockConverter(&fMutex);
1021
1022        //
1023        //  Here we don't know what the target length will be so use 0 and
1024        //  expect an U_BUFFER_OVERFLOW_ERROR in which case it'd get resolved
1025        //  by the correct capacity value.
1026        //
1027        targetCap = ucnv_toUChars
1028        (
1029            fConverter
1030            , 0
1031            , 0
1032            , toTranscode
1033            , srcLen
1034            , &err
1035        );
1036
1037        if (err != U_BUFFER_OVERFLOW_ERROR)
1038            return 0;
1039
1040        err = U_ZERO_ERROR;
1041        targetBuf = (UChar*) manager->allocate((targetCap+1) * sizeof(UChar));//new UChar[targetCap + 1];
1042        ucnv_toUChars
1043        (
1044            fConverter
1045            , targetBuf
1046            , targetCap + 1
1047            , toTranscode
1048            , srcLen
1049            , &err
1050        );
1051    }
1052
1053    if (U_FAILURE(err))
1054    {
1055        // Clean up if we got anything allocated
1056        manager->deallocate(targetBuf);//delete [] targetBuf;
1057        return 0;
1058    }
1059
1060    // Cap it off to make sure
1061    targetBuf[targetCap] = 0;
1062
1063    //
1064    //  If XMLCh and UChar are the same size, then we can return retVal
1065    //  as is. Else, we have to allocate another buffer and copy the data
1066    //  over to it.
1067    //
1068    XMLCh* actualRet;
1069    if (sizeof(XMLCh) == sizeof(UChar))
1070    {
1071        actualRet = (XMLCh*)targetBuf;
1072    }
1073     else
1074    {
1075        actualRet = convertToXMLCh(targetBuf, manager);
1076        manager->deallocate(targetBuf);//delete [] targetBuf;
1077    }
1078    return actualRet;
1079}
1080
1081
1082bool ICULCPTranscoder::transcode(const  char* const     toTranscode
1083                                ,       XMLCh* const    toFill
1084                                , const XMLSize_t       maxChars
1085                                , MemoryManager* const  manager)
1086{
1087    // Check for a couple of psycho corner cases
1088    if (!toTranscode || !maxChars)
1089    {
1090        toFill[0] = 0;
1091        return true;
1092    }
1093
1094    if (!*toTranscode)
1095    {
1096        toFill[0] = 0;
1097        return true;
1098    }
1099
1100    // We'll need this in a couple of places below
1101    const XMLSize_t srcLen = strlen(toTranscode);
1102
1103    //
1104    //  Set up the target buffer. If XMLCh and UChar are not the same size
1105    //  then we have to use a temp buffer and convert over.
1106    //
1107    UChar* targetBuf;
1108    if (sizeof(XMLCh) == sizeof(UChar))
1109        targetBuf = (UChar*)toFill;
1110    else
1111        targetBuf = (UChar*) manager->allocate
1112        (
1113            (maxChars + 1) * sizeof(UChar)
1114        );//new UChar[maxChars + 1];
1115
1116    //
1117    //  Use a faux block to enforce a lock on the converter, which will
1118    //  unlock immediately after its completed.
1119    //
1120    UErrorCode err = U_ZERO_ERROR;
1121    {
1122        XMLMutexLock lockConverter(&fMutex);
1123        ucnv_toUChars
1124        (
1125            fConverter
1126            , targetBuf
1127            , (int32_t)maxChars + 1
1128            , toTranscode
1129            , (int32_t)srcLen
1130            , &err
1131        );
1132    }
1133
1134    if (U_FAILURE(err))
1135    {
1136        if (targetBuf != (UChar*)toFill)
1137            manager->deallocate(targetBuf);//delete [] targetBuf;
1138        return false;
1139    }
1140
1141    // If the sizes are not the same, then copy the data over
1142    if (sizeof(XMLCh) != sizeof(UChar))
1143    {
1144        UChar* srcPtr = targetBuf;
1145        XMLCh* outPtr = toFill;
1146        while (*srcPtr)
1147            *outPtr++ = XMLCh(*srcPtr++);
1148        *outPtr = 0;
1149
1150        // And delete the temp buffer
1151        manager->deallocate(targetBuf);//delete [] targetBuf;
1152    }
1153
1154    return true;
1155}
1156
1157
1158bool ICULCPTranscoder::transcode(   const   XMLCh* const    toTranscode
1159                                    ,       char* const     toFill
1160                                    , const XMLSize_t       maxChars
1161                                    , MemoryManager* const  manager)
1162{
1163    // Watch for a few psycho corner cases
1164    if (!toTranscode || !maxChars)
1165    {
1166        toFill[0] = 0;
1167        return true;
1168    }
1169
1170    if (!*toTranscode)
1171    {
1172        toFill[0] = 0;
1173        return true;
1174    }
1175
1176    //
1177    //  If XMLCh and UChar are not the same size, then we have to make a
1178    //  temp copy of the text to pass to ICU.
1179    //
1180    const UChar* actualSrc;
1181    UChar* ncActual = 0;
1182    if (sizeof(XMLCh) == sizeof(UChar))
1183    {
1184        actualSrc = (const UChar*)toTranscode;
1185    }
1186     else
1187    {
1188        // Allocate a non-const temp buf, but store it also in the actual
1189        ncActual = convertToUChar(toTranscode, 0, manager);
1190        actualSrc = ncActual;
1191    }
1192
1193    // Insure that the temp buffer, if any, gets cleaned up via the nc pointer
1194    ArrayJanitor<UChar> janTmp(ncActual, manager);
1195
1196    //
1197    //  Use a faux block to enforce a lock on the converter while we do this.
1198    //  It will be released immediately after its done.
1199    //
1200    UErrorCode err = U_ZERO_ERROR;
1201    int32_t targetCap;
1202    {
1203        XMLMutexLock lockConverter(&fMutex);
1204        targetCap = ucnv_fromUChars
1205        (
1206            fConverter
1207            , toFill
1208            , (int32_t)maxChars
1209            , actualSrc
1210            , -1
1211            , &err
1212        );
1213    }
1214
1215    if (U_FAILURE(err))
1216        return false;
1217
1218    toFill[targetCap] = 0;
1219    return true;
1220}
1221
1222XERCES_CPP_NAMESPACE_END
Note: See TracBrowser for help on using the repository browser.