1 | /* |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
---|
3 | * contributor license agreements. See the NOTICE file distributed with |
---|
4 | * this work for additional information regarding copyright ownership. |
---|
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
---|
6 | * (the "License"); you may not use this file except in compliance with |
---|
7 | * the License. You may obtain a copy of the License at |
---|
8 | * |
---|
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
10 | * |
---|
11 | * Unless required by applicable law or agreed to in writing, software |
---|
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
14 | * See the License for the specific language governing permissions and |
---|
15 | * limitations under the License. |
---|
16 | */ |
---|
17 | |
---|
18 | /* |
---|
19 | * $Id: ICUTransService.cpp 901107 2010-01-20 08:45:02Z borisk $ |
---|
20 | */ |
---|
21 | |
---|
22 | |
---|
23 | // --------------------------------------------------------------------------- |
---|
24 | // Includes |
---|
25 | // --------------------------------------------------------------------------- |
---|
26 | #include <icxercesc/util/PlatformUtils.hpp> |
---|
27 | #include <xercesc/util/Janitor.hpp> |
---|
28 | #include <xercesc/util/TranscodingException.hpp> |
---|
29 | #include <icxercesc/util/XMLString.hpp> |
---|
30 | #include <xercesc/util/XMLUniDefs.hpp> |
---|
31 | #include "ICUTransService.hpp" |
---|
32 | #include <string.h> |
---|
33 | #include <unicode/uloc.h> |
---|
34 | #include <unicode/uchar.h> |
---|
35 | #include <unicode/ucnv.h> |
---|
36 | #include <unicode/ucnv_err.h> |
---|
37 | #include <unicode/ustring.h> |
---|
38 | #include <unicode/udata.h> |
---|
39 | #if (U_ICU_VERSION_MAJOR_NUM >= 2) |
---|
40 | #include <unicode/uclean.h> |
---|
41 | #endif |
---|
42 | |
---|
43 | #if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX) |
---|
44 | // Forward reference the symbol which points to the ICU converter data. |
---|
45 | #if (U_ICU_VERSION_MAJOR_NUM < 2) |
---|
46 | extern "C" const uint8_t U_IMPORT icudata_dat[]; |
---|
47 | #endif |
---|
48 | #endif |
---|
49 | |
---|
50 | #if !defined(U16_NEXT_UNSAFE) && defined(UTF16_NEXT_CHAR_UNSAFE) |
---|
51 | #define U16_NEXT_UNSAFE UTF16_NEXT_CHAR_UNSAFE |
---|
52 | #endif |
---|
53 | |
---|
54 | #if !defined(U16_APPEND_UNSAFE) && defined(UTF16_APPEND_CHAR_UNSAFE) |
---|
55 | #define U16_APPEND_UNSAFE UTF16_APPEND_CHAR_UNSAFE |
---|
56 | #endif |
---|
57 | |
---|
58 | #if !defined(U_IS_BMP) && defined(UTF16_CHAR_LENGTH) |
---|
59 | #define U_IS_BMP(c) (UTF16_CHAR_LENGTH(c)==1) |
---|
60 | #endif |
---|
61 | |
---|
62 | |
---|
63 | XERCES_CPP_NAMESPACE_BEGIN |
---|
64 | |
---|
65 | // --------------------------------------------------------------------------- |
---|
66 | // Local, const data |
---|
67 | // --------------------------------------------------------------------------- |
---|
68 | static const XMLCh gMyServiceId[] = |
---|
69 | { |
---|
70 | chLatin_I, chLatin_C, chLatin_U, chNull |
---|
71 | }; |
---|
72 | |
---|
73 | static const XMLCh gS390Id[] = |
---|
74 | { |
---|
75 | chLatin_S, chDigit_3, chDigit_9, chDigit_0, chNull |
---|
76 | }; |
---|
77 | |
---|
78 | static const XMLCh gs390Id[] = |
---|
79 | { |
---|
80 | chLatin_s, chDigit_3, chDigit_9, chDigit_0, chNull |
---|
81 | }; |
---|
82 | |
---|
83 | static const XMLCh gswaplfnlId[] = |
---|
84 | { |
---|
85 | chComma, chLatin_s, chLatin_w, chLatin_a, chLatin_p, |
---|
86 | chLatin_l, chLatin_f, chLatin_n, chLatin_l, chNull |
---|
87 | }; |
---|
88 | // --------------------------------------------------------------------------- |
---|
89 | // Local functions |
---|
90 | // --------------------------------------------------------------------------- |
---|
91 | |
---|
92 | // |
---|
93 | // When XMLCh and ICU's UChar are not the same size, we have to do a temp |
---|
94 | // conversion of all strings. These local helper methods make that easier. |
---|
95 | // |
---|
96 | static UChar* convertToUChar( const XMLCh* const toConvert |
---|
97 | , const XMLSize_t srcLen = 0 |
---|
98 | , MemoryManager* const manager = 0) |
---|
99 | { |
---|
100 | const XMLSize_t actualLen = srcLen |
---|
101 | ? srcLen : XMLString::stringLen(toConvert); |
---|
102 | |
---|
103 | UChar* tmpBuf = (manager) |
---|
104 | ? (UChar*) manager->allocate((actualLen + 1) * sizeof(UChar)) |
---|
105 | : new UChar[actualLen + 1]; |
---|
106 | const XMLCh* srcPtr = toConvert; |
---|
107 | UChar* outPtr = tmpBuf; |
---|
108 | while (*srcPtr) |
---|
109 | *outPtr++ = UChar(*srcPtr++); |
---|
110 | *outPtr = 0; |
---|
111 | |
---|
112 | return tmpBuf; |
---|
113 | } |
---|
114 | |
---|
115 | |
---|
116 | static XMLCh* convertToXMLCh( const UChar* const toConvert, |
---|
117 | MemoryManager* const manager = 0) |
---|
118 | { |
---|
119 | const unsigned int srcLen = u_strlen(toConvert); |
---|
120 | XMLCh* retBuf = (manager) |
---|
121 | ? (XMLCh*) manager->allocate((srcLen+1) * sizeof(XMLCh)) |
---|
122 | : new XMLCh[srcLen + 1]; |
---|
123 | |
---|
124 | XMLCh* outPtr = retBuf; |
---|
125 | const UChar* srcPtr = toConvert; |
---|
126 | while (*srcPtr) |
---|
127 | *outPtr++ = XMLCh(*srcPtr++); |
---|
128 | *outPtr = 0; |
---|
129 | |
---|
130 | return retBuf; |
---|
131 | } |
---|
132 | |
---|
133 | |
---|
134 | |
---|
135 | |
---|
136 | // --------------------------------------------------------------------------- |
---|
137 | // ICUTransService: Constructors and Destructor |
---|
138 | // --------------------------------------------------------------------------- |
---|
139 | ICUTransService::ICUTransService(MemoryManager*) |
---|
140 | { |
---|
141 | // Starting with ICU 3.4 we don't need to call init anymore. |
---|
142 | // |
---|
143 | #if (U_ICU_VERSION_MAJOR_NUM > 2 || (U_ICU_VERSION_MAJOR_NUM == 2 && U_ICU_VERSION_MINOR_NUM >= 6)) && \ |
---|
144 | (U_ICU_VERSION_MAJOR_NUM < 3 || (U_ICU_VERSION_MAJOR_NUM == 3 && U_ICU_VERSION_MINOR_NUM < 4)) |
---|
145 | UErrorCode errorCode=U_ZERO_ERROR; |
---|
146 | u_init(&errorCode); |
---|
147 | if(U_FAILURE(errorCode)) { |
---|
148 | XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService); |
---|
149 | } |
---|
150 | #endif |
---|
151 | |
---|
152 | #if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX) |
---|
153 | #if (U_ICU_VERSION_MAJOR_NUM < 2) |
---|
154 | // Starting with ICU 2.0, ICU itself includes a static reference to the data |
---|
155 | // entrypoint symbol. |
---|
156 | // |
---|
157 | // ICU 1.8 (and previous) did not include a static reference, but would |
---|
158 | // dynamically load the data dll when it was first needed, however this dynamic |
---|
159 | // loading proved unreliable in some of the odd environments that Xerces needed |
---|
160 | // to run in. Hence, the static reference. |
---|
161 | |
---|
162 | // Pass the location of the converter data to ICU. By doing so, we are |
---|
163 | // forcing the load of ICU converter data DLL, after the Xerces-C DLL is |
---|
164 | // loaded. This implies that Xerces-C, now has to explicitly link with the |
---|
165 | // ICU converter dll. However, the advantage is that we no longer depend |
---|
166 | // on the code which does demand dynamic loading of DLL's. The demand |
---|
167 | // loading is highly system dependent and was a constant source of support |
---|
168 | // calls. |
---|
169 | UErrorCode uerr = U_ZERO_ERROR; |
---|
170 | udata_setCommonData((void *) icudata_dat, &uerr); |
---|
171 | #endif |
---|
172 | #endif |
---|
173 | } |
---|
174 | |
---|
175 | ICUTransService::~ICUTransService() |
---|
176 | { |
---|
177 | /* |
---|
178 | * commented out the following clean up code |
---|
179 | * in case users use ICU outside of the parser |
---|
180 | * if we clean up here, users' code may crash |
---|
181 | * |
---|
182 | #if (U_ICU_VERSION_MAJOR_NUM >= 2) |
---|
183 | // release all lazily allocated data |
---|
184 | u_cleanup(); |
---|
185 | #endif |
---|
186 | */ |
---|
187 | } |
---|
188 | |
---|
189 | |
---|
190 | // --------------------------------------------------------------------------- |
---|
191 | // ICUTransService: The virtual transcoding service API |
---|
192 | // --------------------------------------------------------------------------- |
---|
193 | int ICUTransService::compareIString(const XMLCh* const comp1 |
---|
194 | , const XMLCh* const comp2) |
---|
195 | { |
---|
196 | size_t i = 0; |
---|
197 | size_t j = 0; |
---|
198 | |
---|
199 | for(;;) |
---|
200 | { |
---|
201 | UChar32 ch1; |
---|
202 | UChar32 ch2; |
---|
203 | |
---|
204 | U16_NEXT_UNSAFE(comp1, i, ch1); |
---|
205 | U16_NEXT_UNSAFE(comp2, j, ch2); |
---|
206 | |
---|
207 | const UChar32 folded1 = |
---|
208 | u_foldCase(ch1, U_FOLD_CASE_DEFAULT); |
---|
209 | |
---|
210 | const UChar32 folded2 = |
---|
211 | u_foldCase(ch2, U_FOLD_CASE_DEFAULT); |
---|
212 | |
---|
213 | if (folded1 != |
---|
214 | folded2) |
---|
215 | { |
---|
216 | return folded1 - folded2; |
---|
217 | } |
---|
218 | else if (ch1 == 0) |
---|
219 | { |
---|
220 | // If ch1 is 0, the ch2 must also be |
---|
221 | // 0. Otherwise, the previous if |
---|
222 | // would have failed. |
---|
223 | break; |
---|
224 | } |
---|
225 | } |
---|
226 | |
---|
227 | return 0; |
---|
228 | } |
---|
229 | |
---|
230 | |
---|
231 | int ICUTransService::compareNIString(const XMLCh* const comp1 |
---|
232 | , const XMLCh* const comp2 |
---|
233 | , const XMLSize_t maxChars) |
---|
234 | { |
---|
235 | if (maxChars > 0) |
---|
236 | { |
---|
237 | // Note that this function has somewhat broken semantics, as it's |
---|
238 | // possible for two strings of different lengths to compare as equal |
---|
239 | // in a case-insensitive manner, since one character could be |
---|
240 | // represented as a surrogate pair. |
---|
241 | size_t i = 0; |
---|
242 | size_t j = 0; |
---|
243 | |
---|
244 | for(;;) |
---|
245 | { |
---|
246 | UChar32 ch1; |
---|
247 | UChar32 ch2; |
---|
248 | |
---|
249 | U16_NEXT_UNSAFE(comp1, i, ch1); |
---|
250 | U16_NEXT_UNSAFE(comp2, j, ch2); |
---|
251 | |
---|
252 | const UChar32 folded1 = |
---|
253 | u_foldCase(ch1, U_FOLD_CASE_DEFAULT); |
---|
254 | |
---|
255 | const UChar32 folded2 = |
---|
256 | u_foldCase(ch2, U_FOLD_CASE_DEFAULT); |
---|
257 | |
---|
258 | if (folded1 != folded2) |
---|
259 | { |
---|
260 | return folded1 - folded2; |
---|
261 | } |
---|
262 | else if (i == maxChars) |
---|
263 | { |
---|
264 | // If we're at the end of both strings, return 0. |
---|
265 | // Otherwise, we've run out of characters in the |
---|
266 | // left string, so return -1. |
---|
267 | return j == maxChars ? 0 : -1; |
---|
268 | } |
---|
269 | else if (j == maxChars) |
---|
270 | { |
---|
271 | // We've run out of characters in the right string, |
---|
272 | // but not the left, so return 1. |
---|
273 | return 1; |
---|
274 | } |
---|
275 | } |
---|
276 | } |
---|
277 | |
---|
278 | return 0; |
---|
279 | } |
---|
280 | |
---|
281 | |
---|
282 | const XMLCh* ICUTransService::getId() const |
---|
283 | { |
---|
284 | return gMyServiceId; |
---|
285 | } |
---|
286 | |
---|
287 | XMLLCPTranscoder* ICUTransService::makeNewLCPTranscoder(MemoryManager* manager) |
---|
288 | { |
---|
289 | // |
---|
290 | // Try to create a default converter. If it fails, return a null |
---|
291 | // pointer which will basically cause the system to give up because |
---|
292 | // we really can't do anything without one. |
---|
293 | // |
---|
294 | UErrorCode uerr = U_ZERO_ERROR; |
---|
295 | UConverter* converter = ucnv_open(NULL, &uerr); |
---|
296 | if (!converter) |
---|
297 | return 0; |
---|
298 | |
---|
299 | // That went ok, so create an ICU LCP transcoder wrapper and return it |
---|
300 | return new (manager) ICULCPTranscoder(converter); |
---|
301 | } |
---|
302 | |
---|
303 | |
---|
304 | bool ICUTransService::supportsSrcOfs() const |
---|
305 | { |
---|
306 | // This implementation supports source offset information |
---|
307 | return true; |
---|
308 | } |
---|
309 | |
---|
310 | |
---|
311 | template <class FunctionType> |
---|
312 | static void doCaseConvert(XMLCh* convertString, |
---|
313 | FunctionType caseFunction) |
---|
314 | { |
---|
315 | // Note the semantics of this function are broken, since it's |
---|
316 | // possible that changing the case of a string could increase |
---|
317 | // its length, but there's no way to handle such a situation. |
---|
318 | const XMLSize_t len = XMLString::stringLen(convertString); |
---|
319 | |
---|
320 | size_t readPos = 0; |
---|
321 | size_t writePos = 0; |
---|
322 | |
---|
323 | while(readPos < len) |
---|
324 | { |
---|
325 | UChar32 original; |
---|
326 | |
---|
327 | // Get the next Unicode code point. |
---|
328 | U16_NEXT_UNSAFE(convertString, readPos, original); |
---|
329 | |
---|
330 | // Convert the code point |
---|
331 | const UChar32 converted = caseFunction(original); |
---|
332 | |
---|
333 | // OK, now here's where it gets ugly. |
---|
334 | if (!U_IS_BMP(converted) && U_IS_BMP(original) && |
---|
335 | readPos - writePos == 1) |
---|
336 | { |
---|
337 | // We do not have room to convert the |
---|
338 | // character without overwriting the next |
---|
339 | // character, so we will just stop. |
---|
340 | break; |
---|
341 | } |
---|
342 | else |
---|
343 | { |
---|
344 | U16_APPEND_UNSAFE(convertString, writePos, converted); |
---|
345 | } |
---|
346 | } |
---|
347 | |
---|
348 | convertString[writePos] = 0; |
---|
349 | } |
---|
350 | |
---|
351 | |
---|
352 | |
---|
353 | void ICUTransService::upperCase(XMLCh* const toUpperCase) |
---|
354 | { |
---|
355 | doCaseConvert(toUpperCase, u_toupper); |
---|
356 | } |
---|
357 | |
---|
358 | void ICUTransService::lowerCase(XMLCh* const toLowerCase) |
---|
359 | { |
---|
360 | doCaseConvert(toLowerCase, u_tolower); |
---|
361 | } |
---|
362 | |
---|
363 | |
---|
364 | |
---|
365 | // --------------------------------------------------------------------------- |
---|
366 | // ICUTransService: The protected virtual transcoding service API |
---|
367 | // --------------------------------------------------------------------------- |
---|
368 | XMLTranscoder* ICUTransService:: |
---|
369 | makeNewXMLTranscoder(const XMLCh* const encodingName |
---|
370 | , XMLTransService::Codes& resValue |
---|
371 | , const XMLSize_t blockSize |
---|
372 | , MemoryManager* const manager) |
---|
373 | { |
---|
374 | // |
---|
375 | // For encodings that end with "s390" we need to strip off the "s390" |
---|
376 | // from the encoding name and add ",swaplfnl" to the encoding name |
---|
377 | // that we pass into ICU on the ucnv_openU. |
---|
378 | // |
---|
379 | XMLCh* encodingNameToUse = (XMLCh*) encodingName; |
---|
380 | XMLCh* workBuffer = 0; |
---|
381 | |
---|
382 | if ( (XMLString::endsWith(encodingNameToUse, gs390Id)) || |
---|
383 | (XMLString::endsWith(encodingNameToUse, gS390Id)) ) |
---|
384 | { |
---|
385 | XMLSize_t workBufferSize = (XMLString::stringLen(encodingNameToUse) + XMLString::stringLen(gswaplfnlId) - XMLString::stringLen(gS390Id) + 1); |
---|
386 | workBuffer = (XMLCh*) manager->allocate(workBufferSize * sizeof(XMLCh)); |
---|
387 | XMLSize_t moveSize = XMLString::stringLen(encodingNameToUse) - XMLString::stringLen(gS390Id); |
---|
388 | XMLString::moveChars(workBuffer, encodingNameToUse, moveSize); |
---|
389 | XMLString::moveChars((workBuffer + moveSize), gswaplfnlId, XMLString::stringLen(gswaplfnlId)); |
---|
390 | encodingNameToUse = workBuffer; |
---|
391 | } |
---|
392 | |
---|
393 | // |
---|
394 | // If UChar and XMLCh are not the same size, then we have premassage the |
---|
395 | // encoding name into a UChar type string. |
---|
396 | // |
---|
397 | const UChar* actualName; |
---|
398 | UChar* tmpName = 0; |
---|
399 | if (sizeof(UChar) == sizeof(XMLCh)) |
---|
400 | { |
---|
401 | actualName = (const UChar*)encodingNameToUse; |
---|
402 | } |
---|
403 | else |
---|
404 | { |
---|
405 | tmpName = convertToUChar(encodingNameToUse, 0, manager); |
---|
406 | actualName = tmpName; |
---|
407 | } |
---|
408 | |
---|
409 | ArrayJanitor<UChar> janTmp(tmpName, manager); |
---|
410 | ArrayJanitor<XMLCh> janTmp1(workBuffer, manager); |
---|
411 | |
---|
412 | UErrorCode uerr = U_ZERO_ERROR; |
---|
413 | UConverter* converter = ucnv_openU(actualName, &uerr); |
---|
414 | if (!converter) |
---|
415 | { |
---|
416 | resValue = XMLTransService::UnsupportedEncoding; |
---|
417 | return 0; |
---|
418 | } |
---|
419 | |
---|
420 | return new (manager) ICUTranscoder(encodingName, converter, blockSize, manager); |
---|
421 | } |
---|
422 | |
---|
423 | |
---|
424 | |
---|
425 | |
---|
426 | // --------------------------------------------------------------------------- |
---|
427 | // ICUTranscoder: Constructors and Destructor |
---|
428 | // --------------------------------------------------------------------------- |
---|
429 | ICUTranscoder::ICUTranscoder(const XMLCh* const encodingName |
---|
430 | , UConverter* const toAdopt |
---|
431 | , const XMLSize_t blockSize |
---|
432 | , MemoryManager* const manager) : |
---|
433 | |
---|
434 | XMLTranscoder(encodingName, blockSize, manager) |
---|
435 | , fConverter(toAdopt) |
---|
436 | , fFixed(false) |
---|
437 | , fSrcOffsets(0) |
---|
438 | { |
---|
439 | // If there is a block size, then allocate our source offset array |
---|
440 | if (blockSize) |
---|
441 | fSrcOffsets = (XMLUInt32*) manager->allocate |
---|
442 | ( |
---|
443 | blockSize * sizeof(XMLUInt32) |
---|
444 | );//new XMLUInt32[blockSize]; |
---|
445 | |
---|
446 | // Remember if its a fixed size encoding |
---|
447 | fFixed = (ucnv_getMaxCharSize(fConverter) == ucnv_getMinCharSize(fConverter)); |
---|
448 | } |
---|
449 | |
---|
450 | ICUTranscoder::~ICUTranscoder() |
---|
451 | { |
---|
452 | getMemoryManager()->deallocate(fSrcOffsets);//delete [] fSrcOffsets; |
---|
453 | |
---|
454 | // If there is a converter, ask ICU to clean it up |
---|
455 | if (fConverter) |
---|
456 | { |
---|
457 | // <TBD> Does this actually delete the structure??? |
---|
458 | ucnv_close(fConverter); |
---|
459 | fConverter = 0; |
---|
460 | } |
---|
461 | } |
---|
462 | |
---|
463 | |
---|
464 | // --------------------------------------------------------------------------- |
---|
465 | // ICUTranscoder: The virtual transcoder API |
---|
466 | // --------------------------------------------------------------------------- |
---|
467 | XMLSize_t |
---|
468 | ICUTranscoder::transcodeFrom(const XMLByte* const srcData |
---|
469 | , const XMLSize_t srcCount |
---|
470 | , XMLCh* const toFill |
---|
471 | , const XMLSize_t maxChars |
---|
472 | , XMLSize_t& bytesEaten |
---|
473 | , unsigned char* const charSizes) |
---|
474 | { |
---|
475 | // Set up pointers to the start and end of the source buffer |
---|
476 | const XMLByte* startSrc = srcData; |
---|
477 | const XMLByte* endSrc = srcData + srcCount; |
---|
478 | |
---|
479 | // |
---|
480 | // And now do the target buffer. This works differently according to |
---|
481 | // whether XMLCh and UChar are the same size or not. |
---|
482 | // |
---|
483 | UChar* startTarget; |
---|
484 | if (sizeof(XMLCh) == sizeof(UChar)) |
---|
485 | startTarget = (UChar*)toFill; |
---|
486 | else |
---|
487 | startTarget = (UChar*) getMemoryManager()->allocate |
---|
488 | ( |
---|
489 | maxChars * sizeof(UChar) |
---|
490 | );//new UChar[maxChars]; |
---|
491 | UChar* orgTarget = startTarget; |
---|
492 | |
---|
493 | // |
---|
494 | // Transcode the buffer. Buffer overflow errors are normal, occuring |
---|
495 | // when the raw input buffer holds more characters than will fit in |
---|
496 | // the Unicode output buffer. |
---|
497 | // |
---|
498 | UErrorCode err = U_ZERO_ERROR; |
---|
499 | ucnv_toUnicode |
---|
500 | ( |
---|
501 | fConverter |
---|
502 | , &startTarget |
---|
503 | , startTarget + maxChars |
---|
504 | , (const char**)&startSrc |
---|
505 | , (const char*)endSrc |
---|
506 | , (fFixed ? 0 : (int32_t*)fSrcOffsets) |
---|
507 | , false |
---|
508 | , &err |
---|
509 | ); |
---|
510 | |
---|
511 | if ((err != U_ZERO_ERROR) && (err != U_BUFFER_OVERFLOW_ERROR)) |
---|
512 | { |
---|
513 | if (orgTarget != (UChar*)toFill) |
---|
514 | getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget; |
---|
515 | |
---|
516 | if (fFixed) |
---|
517 | { |
---|
518 | XMLCh tmpBuf[17]; |
---|
519 | XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16, getMemoryManager()); |
---|
520 | ThrowXMLwithMemMgr2 |
---|
521 | ( |
---|
522 | TranscodingException |
---|
523 | , XMLExcepts::Trans_BadSrcCP |
---|
524 | , tmpBuf |
---|
525 | , getEncodingName() |
---|
526 | , getMemoryManager() |
---|
527 | ); |
---|
528 | } |
---|
529 | else |
---|
530 | { |
---|
531 | ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager()); |
---|
532 | } |
---|
533 | } |
---|
534 | |
---|
535 | // Calculate the bytes eaten and store in caller's param |
---|
536 | bytesEaten = startSrc - srcData; |
---|
537 | |
---|
538 | // And the characters decoded |
---|
539 | const XMLSize_t charsDecoded = startTarget - orgTarget; |
---|
540 | |
---|
541 | // |
---|
542 | // Translate the array of char offsets into an array of character |
---|
543 | // sizes, which is what the transcoder interface semantics requires. |
---|
544 | // If its fixed, then we can optimize it. |
---|
545 | // |
---|
546 | if (fFixed) |
---|
547 | { |
---|
548 | const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter); |
---|
549 | memset(charSizes, fillSize, maxChars); |
---|
550 | } |
---|
551 | else |
---|
552 | { |
---|
553 | // |
---|
554 | // We have to convert the series of offsets into a series of |
---|
555 | // sizes. If just one char was decoded, then its the total bytes |
---|
556 | // eaten. Otherwise, do a loop and subtract out each element from |
---|
557 | // its previous element. |
---|
558 | // |
---|
559 | if (charsDecoded == 1) |
---|
560 | { |
---|
561 | charSizes[0] = (unsigned char)bytesEaten; |
---|
562 | } |
---|
563 | else |
---|
564 | { |
---|
565 | // ICU does not return an extra element to allow us to figure |
---|
566 | // out the last char size, so we have to compute it from the |
---|
567 | // total bytes used. |
---|
568 | unsigned int index; |
---|
569 | for (index = 0; index < charsDecoded - 1; index++) |
---|
570 | { |
---|
571 | charSizes[index] = (unsigned char)(fSrcOffsets[index + 1] |
---|
572 | - fSrcOffsets[index]); |
---|
573 | } |
---|
574 | if( charsDecoded > 0 ) { |
---|
575 | charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten |
---|
576 | - fSrcOffsets[charsDecoded - 1]); |
---|
577 | } |
---|
578 | } |
---|
579 | } |
---|
580 | |
---|
581 | // |
---|
582 | // If XMLCh and UChar are not the same size, then we need to copy over |
---|
583 | // the temp buffer to the new one. |
---|
584 | // |
---|
585 | if (sizeof(UChar) != sizeof(XMLCh)) |
---|
586 | { |
---|
587 | XMLCh* outPtr = toFill; |
---|
588 | startTarget = orgTarget; |
---|
589 | for (unsigned int index = 0; index < charsDecoded; index++) |
---|
590 | *outPtr++ = XMLCh(*startTarget++); |
---|
591 | |
---|
592 | // And delete the temp buffer |
---|
593 | getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget; |
---|
594 | } |
---|
595 | |
---|
596 | // Return the chars we put into the target buffer |
---|
597 | return charsDecoded; |
---|
598 | } |
---|
599 | |
---|
600 | |
---|
601 | XMLSize_t |
---|
602 | ICUTranscoder::transcodeTo( const XMLCh* const srcData |
---|
603 | , const XMLSize_t srcCount |
---|
604 | , XMLByte* const toFill |
---|
605 | , const XMLSize_t maxBytes |
---|
606 | , XMLSize_t& charsEaten |
---|
607 | , const UnRepOpts options) |
---|
608 | { |
---|
609 | // |
---|
610 | // Get a pointer to the buffer to transcode. If UChar and XMLCh are |
---|
611 | // the same size here, then use the original. Else, create a temp |
---|
612 | // one and put a janitor on it. |
---|
613 | // |
---|
614 | const UChar* srcPtr; |
---|
615 | UChar* tmpBufPtr = 0; |
---|
616 | if (sizeof(XMLCh) == sizeof(UChar)) |
---|
617 | { |
---|
618 | srcPtr = (const UChar*)srcData; |
---|
619 | } |
---|
620 | else |
---|
621 | { |
---|
622 | tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager()); |
---|
623 | srcPtr = tmpBufPtr; |
---|
624 | } |
---|
625 | ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager()); |
---|
626 | |
---|
627 | // |
---|
628 | // Set the appropriate callback so that it will either fail or use |
---|
629 | // the rep char. Remember the old one so we can put it back. |
---|
630 | // |
---|
631 | UErrorCode err = U_ZERO_ERROR; |
---|
632 | UConverterFromUCallback oldCB = NULL; |
---|
633 | #if (U_ICU_VERSION_MAJOR_NUM < 2) |
---|
634 | void* orgContent; |
---|
635 | #else |
---|
636 | const void* orgContent; |
---|
637 | #endif |
---|
638 | ucnv_setFromUCallBack |
---|
639 | ( |
---|
640 | fConverter |
---|
641 | , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP |
---|
642 | : UCNV_FROM_U_CALLBACK_SUBSTITUTE |
---|
643 | , NULL |
---|
644 | , &oldCB |
---|
645 | , &orgContent |
---|
646 | , &err |
---|
647 | ); |
---|
648 | |
---|
649 | // |
---|
650 | // Ok, lets transcode as many chars as we we can in one shot. The |
---|
651 | // ICU API gives enough info not to have to do this one char by char. |
---|
652 | // |
---|
653 | XMLByte* startTarget = toFill; |
---|
654 | const UChar* startSrc = srcPtr; |
---|
655 | err = U_ZERO_ERROR; |
---|
656 | ucnv_fromUnicode |
---|
657 | ( |
---|
658 | fConverter |
---|
659 | , (char**)&startTarget |
---|
660 | , (char*)(startTarget + maxBytes) |
---|
661 | , &startSrc |
---|
662 | , srcPtr + srcCount |
---|
663 | , 0 |
---|
664 | , false |
---|
665 | , &err |
---|
666 | ); |
---|
667 | |
---|
668 | // Rememember the status before we possibly overite the error code |
---|
669 | const bool res = (err == U_ZERO_ERROR); |
---|
670 | |
---|
671 | // Put the old handler back |
---|
672 | err = U_ZERO_ERROR; |
---|
673 | UConverterFromUCallback orgAction = NULL; |
---|
674 | |
---|
675 | ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err); |
---|
676 | |
---|
677 | if (!res) |
---|
678 | { |
---|
679 | XMLCh tmpBuf[17]; |
---|
680 | XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16, getMemoryManager()); |
---|
681 | ThrowXMLwithMemMgr2 |
---|
682 | ( |
---|
683 | TranscodingException |
---|
684 | , XMLExcepts::Trans_Unrepresentable |
---|
685 | , tmpBuf |
---|
686 | , getEncodingName() |
---|
687 | , getMemoryManager() |
---|
688 | ); |
---|
689 | } |
---|
690 | |
---|
691 | // Fill in the chars we ate from the input |
---|
692 | charsEaten = startSrc - srcPtr; |
---|
693 | |
---|
694 | // Return the chars we stored |
---|
695 | return startTarget - toFill; |
---|
696 | } |
---|
697 | |
---|
698 | |
---|
699 | bool ICUTranscoder::canTranscodeTo(const unsigned int toCheck) |
---|
700 | { |
---|
701 | // |
---|
702 | // If the passed value is really a surrogate embedded together, then |
---|
703 | // we need to break it out into its two chars. Else just one. While |
---|
704 | // we are ate it, convert them to UChar format if required. |
---|
705 | // |
---|
706 | UChar srcBuf[2]; |
---|
707 | unsigned int srcCount = 1; |
---|
708 | if (toCheck & 0xFFFF0000) |
---|
709 | { |
---|
710 | srcBuf[0] = UChar((toCheck >> 10) + 0xD800); |
---|
711 | srcBuf[1] = UChar(toCheck & 0x3FF) + 0xDC00; |
---|
712 | srcCount++; |
---|
713 | } |
---|
714 | else |
---|
715 | { |
---|
716 | srcBuf[0] = UChar(toCheck); |
---|
717 | } |
---|
718 | |
---|
719 | // |
---|
720 | // Set the callback so that it will fail instead of using the rep char. |
---|
721 | // Remember the old one so we can put it back. |
---|
722 | // |
---|
723 | UErrorCode err = U_ZERO_ERROR; |
---|
724 | UConverterFromUCallback oldCB = NULL; |
---|
725 | #if (U_ICU_VERSION_MAJOR_NUM < 2) |
---|
726 | void* orgContent; |
---|
727 | #else |
---|
728 | const void* orgContent; |
---|
729 | #endif |
---|
730 | |
---|
731 | ucnv_setFromUCallBack |
---|
732 | ( |
---|
733 | fConverter |
---|
734 | , UCNV_FROM_U_CALLBACK_STOP |
---|
735 | , NULL |
---|
736 | , &oldCB |
---|
737 | , &orgContent |
---|
738 | , &err |
---|
739 | ); |
---|
740 | |
---|
741 | // Set upa temp buffer to format into. Make it more than big enough |
---|
742 | char tmpBuf[64]; |
---|
743 | char* startTarget = tmpBuf; |
---|
744 | const UChar* startSrc = srcBuf; |
---|
745 | |
---|
746 | err = U_ZERO_ERROR; |
---|
747 | ucnv_fromUnicode |
---|
748 | ( |
---|
749 | fConverter |
---|
750 | , &startTarget |
---|
751 | , startTarget + 64 |
---|
752 | , &startSrc |
---|
753 | , srcBuf + srcCount |
---|
754 | , 0 |
---|
755 | , false |
---|
756 | , &err |
---|
757 | ); |
---|
758 | |
---|
759 | // Save the result before we overight the error code |
---|
760 | const bool res = (err == U_ZERO_ERROR); |
---|
761 | |
---|
762 | // Put the old handler back |
---|
763 | err = U_ZERO_ERROR; |
---|
764 | UConverterFromUCallback orgAction = NULL; |
---|
765 | |
---|
766 | ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err); |
---|
767 | |
---|
768 | return res; |
---|
769 | } |
---|
770 | |
---|
771 | |
---|
772 | |
---|
773 | // --------------------------------------------------------------------------- |
---|
774 | // ICULCPTranscoder: Constructors and Destructor |
---|
775 | // --------------------------------------------------------------------------- |
---|
776 | ICULCPTranscoder::ICULCPTranscoder(UConverter* const toAdopt) : |
---|
777 | |
---|
778 | fConverter(toAdopt) |
---|
779 | { |
---|
780 | } |
---|
781 | |
---|
782 | ICULCPTranscoder::~ICULCPTranscoder() |
---|
783 | { |
---|
784 | // If there is a converter, ask ICU to clean it up |
---|
785 | if (fConverter) |
---|
786 | { |
---|
787 | // <TBD> Does this actually delete the structure??? |
---|
788 | ucnv_close(fConverter); |
---|
789 | fConverter = 0; |
---|
790 | } |
---|
791 | } |
---|
792 | |
---|
793 | |
---|
794 | // --------------------------------------------------------------------------- |
---|
795 | // ICULCPTranscoder: Constructors and Destructor |
---|
796 | // --------------------------------------------------------------------------- |
---|
797 | XMLSize_t ICULCPTranscoder::calcRequiredSize(const XMLCh* const srcText |
---|
798 | , MemoryManager* const manager) |
---|
799 | { |
---|
800 | if (!srcText) |
---|
801 | return 0; |
---|
802 | |
---|
803 | // |
---|
804 | // We do two different versions of this, according to whether XMLCh |
---|
805 | // is the same size as UChar or not. |
---|
806 | // |
---|
807 | UErrorCode err = U_ZERO_ERROR; |
---|
808 | int32_t targetCap; |
---|
809 | if (sizeof(XMLCh) == sizeof(UChar)) |
---|
810 | { |
---|
811 | // Use a faux scope to synchronize while we do this |
---|
812 | { |
---|
813 | XMLMutexLock lockConverter(&fMutex); |
---|
814 | |
---|
815 | targetCap = ucnv_fromUChars |
---|
816 | ( |
---|
817 | fConverter |
---|
818 | , 0 |
---|
819 | , 0 |
---|
820 | , (const UChar*)srcText |
---|
821 | , -1 |
---|
822 | , &err |
---|
823 | ); |
---|
824 | } |
---|
825 | } |
---|
826 | else |
---|
827 | { |
---|
828 | // Copy the source to a local temp |
---|
829 | UChar* tmpBuf = convertToUChar(srcText, 0, manager); |
---|
830 | ArrayJanitor<UChar> janTmp(tmpBuf, manager); |
---|
831 | |
---|
832 | // Use a faux scope to synchronize while we do this |
---|
833 | { |
---|
834 | XMLMutexLock lockConverter(&fMutex); |
---|
835 | |
---|
836 | targetCap = ucnv_fromUChars |
---|
837 | ( |
---|
838 | fConverter |
---|
839 | , 0 |
---|
840 | , 0 |
---|
841 | , tmpBuf |
---|
842 | , -1 |
---|
843 | , &err |
---|
844 | ); |
---|
845 | } |
---|
846 | } |
---|
847 | |
---|
848 | if (err != U_BUFFER_OVERFLOW_ERROR) |
---|
849 | return 0; |
---|
850 | |
---|
851 | return (XMLSize_t)targetCap; |
---|
852 | } |
---|
853 | |
---|
854 | XMLSize_t ICULCPTranscoder::calcRequiredSize(const char* const srcText |
---|
855 | , MemoryManager* const /*manager*/) |
---|
856 | { |
---|
857 | if (!srcText) |
---|
858 | return 0; |
---|
859 | |
---|
860 | int32_t targetCap; |
---|
861 | UErrorCode err = U_ZERO_ERROR; |
---|
862 | |
---|
863 | // Use a faux scope to synchronize while we do this |
---|
864 | { |
---|
865 | XMLMutexLock lockConverter(&fMutex); |
---|
866 | targetCap = ucnv_toUChars |
---|
867 | ( |
---|
868 | fConverter |
---|
869 | , 0 |
---|
870 | , 0 |
---|
871 | , srcText |
---|
872 | , (int32_t)strlen(srcText) |
---|
873 | , &err |
---|
874 | ); |
---|
875 | } |
---|
876 | |
---|
877 | if (err != U_BUFFER_OVERFLOW_ERROR) |
---|
878 | return 0; |
---|
879 | |
---|
880 | #if (U_ICU_VERSION_MAJOR_NUM < 2) |
---|
881 | // Subtract one since it includes the terminator space |
---|
882 | return (XMLSize_t)(targetCap - 1); |
---|
883 | #else |
---|
884 | // Starting ICU 2.0, this is fixed and all ICU String functions have consistent NUL-termination behavior. |
---|
885 | // The returned length is always the number of output UChar's, not counting an additional, terminating NUL. |
---|
886 | return (XMLSize_t)(targetCap); |
---|
887 | #endif |
---|
888 | } |
---|
889 | |
---|
890 | |
---|
891 | char* ICULCPTranscoder::transcode(const XMLCh* const toTranscode, |
---|
892 | MemoryManager* const manager) |
---|
893 | { |
---|
894 | char* retBuf = 0; |
---|
895 | |
---|
896 | // Check for a couple of special cases |
---|
897 | if (!toTranscode) |
---|
898 | return retBuf; |
---|
899 | |
---|
900 | if (!*toTranscode) |
---|
901 | { |
---|
902 | retBuf = (char*) manager->allocate(sizeof(char));//new char[1]; |
---|
903 | retBuf[0] = 0; |
---|
904 | return retBuf; |
---|
905 | } |
---|
906 | |
---|
907 | // |
---|
908 | // Get the length of the source string since we'll have to use it in |
---|
909 | // a couple places below. |
---|
910 | // |
---|
911 | const XMLSize_t srcLen = XMLString::stringLen(toTranscode); |
---|
912 | |
---|
913 | // |
---|
914 | // If XMLCh and UChar are not the same size, then we have to make a |
---|
915 | // temp copy of the text to pass to ICU. |
---|
916 | // |
---|
917 | const UChar* actualSrc; |
---|
918 | UChar* ncActual = 0; |
---|
919 | if (sizeof(XMLCh) == sizeof(UChar)) |
---|
920 | { |
---|
921 | actualSrc = (const UChar*)toTranscode; |
---|
922 | } |
---|
923 | else |
---|
924 | { |
---|
925 | // Allocate a non-const temp buf, but store it also in the actual |
---|
926 | ncActual = convertToUChar(toTranscode, 0, manager); |
---|
927 | actualSrc = ncActual; |
---|
928 | } |
---|
929 | |
---|
930 | // Insure that the temp buffer, if any, gets cleaned up via the nc pointer |
---|
931 | ArrayJanitor<UChar> janTmp(ncActual, manager); |
---|
932 | |
---|
933 | // Caculate a return buffer size not too big, but less likely to overflow |
---|
934 | int32_t targetLen = (int32_t)(srcLen * 1.25); |
---|
935 | |
---|
936 | // Allocate the return buffer |
---|
937 | retBuf = (char*) manager->allocate((targetLen + 1) * sizeof(char));//new char[targetLen + 1]; |
---|
938 | |
---|
939 | // |
---|
940 | // Lock now while we call the converter. Use a faux block to do the |
---|
941 | // lock so that it unlocks immediately afterwards. |
---|
942 | // |
---|
943 | UErrorCode err = U_ZERO_ERROR; |
---|
944 | int32_t targetCap; |
---|
945 | { |
---|
946 | XMLMutexLock lockConverter(&fMutex); |
---|
947 | |
---|
948 | targetCap = ucnv_fromUChars |
---|
949 | ( |
---|
950 | fConverter |
---|
951 | , retBuf |
---|
952 | , targetLen + 1 |
---|
953 | , actualSrc |
---|
954 | , -1 |
---|
955 | , &err |
---|
956 | ); |
---|
957 | } |
---|
958 | |
---|
959 | // If targetLen is not enough then buffer overflow might occur |
---|
960 | if ((err == U_BUFFER_OVERFLOW_ERROR) || (err == U_STRING_NOT_TERMINATED_WARNING)) |
---|
961 | { |
---|
962 | // |
---|
963 | // Reset the error, delete the old buffer, allocate a new one, |
---|
964 | // and try again. |
---|
965 | // |
---|
966 | err = U_ZERO_ERROR; |
---|
967 | manager->deallocate(retBuf);//delete [] retBuf; |
---|
968 | retBuf = (char*) manager->allocate((targetCap + 1) * sizeof(char));//new char[targetCap + 1]; |
---|
969 | |
---|
970 | // Lock again before we retry |
---|
971 | XMLMutexLock lockConverter(&fMutex); |
---|
972 | targetCap = ucnv_fromUChars |
---|
973 | ( |
---|
974 | fConverter |
---|
975 | , retBuf |
---|
976 | , targetCap + 1 |
---|
977 | , actualSrc |
---|
978 | , -1 |
---|
979 | , &err |
---|
980 | ); |
---|
981 | } |
---|
982 | |
---|
983 | if (U_FAILURE(err)) |
---|
984 | { |
---|
985 | manager->deallocate(retBuf);//delete [] retBuf; |
---|
986 | return 0; |
---|
987 | } |
---|
988 | |
---|
989 | return retBuf; |
---|
990 | } |
---|
991 | |
---|
992 | XMLCh* ICULCPTranscoder::transcode(const char* const toTranscode, |
---|
993 | MemoryManager* const manager) |
---|
994 | { |
---|
995 | // Watch for a few pyscho corner cases |
---|
996 | if (!toTranscode) |
---|
997 | return 0; |
---|
998 | |
---|
999 | if (!*toTranscode) |
---|
1000 | { |
---|
1001 | XMLCh* retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1]; |
---|
1002 | retVal[0] = 0; |
---|
1003 | return retVal; |
---|
1004 | } |
---|
1005 | |
---|
1006 | // |
---|
1007 | // Get the length of the string to transcode. The Unicode string will |
---|
1008 | // almost always be no more chars than were in the source, so this is |
---|
1009 | // the best guess as to the storage needed. |
---|
1010 | // |
---|
1011 | const int32_t srcLen = (int32_t)strlen(toTranscode); |
---|
1012 | |
---|
1013 | // We need a target buffer of UChars to fill in |
---|
1014 | UChar* targetBuf = 0; |
---|
1015 | |
---|
1016 | // Now lock while we do these calculations |
---|
1017 | UErrorCode err = U_ZERO_ERROR; |
---|
1018 | int32_t targetCap; |
---|
1019 | { |
---|
1020 | XMLMutexLock lockConverter(&fMutex); |
---|
1021 | |
---|
1022 | // |
---|
1023 | // Here we don't know what the target length will be so use 0 and |
---|
1024 | // expect an U_BUFFER_OVERFLOW_ERROR in which case it'd get resolved |
---|
1025 | // by the correct capacity value. |
---|
1026 | // |
---|
1027 | targetCap = ucnv_toUChars |
---|
1028 | ( |
---|
1029 | fConverter |
---|
1030 | , 0 |
---|
1031 | , 0 |
---|
1032 | , toTranscode |
---|
1033 | , srcLen |
---|
1034 | , &err |
---|
1035 | ); |
---|
1036 | |
---|
1037 | if (err != U_BUFFER_OVERFLOW_ERROR) |
---|
1038 | return 0; |
---|
1039 | |
---|
1040 | err = U_ZERO_ERROR; |
---|
1041 | targetBuf = (UChar*) manager->allocate((targetCap+1) * sizeof(UChar));//new UChar[targetCap + 1]; |
---|
1042 | ucnv_toUChars |
---|
1043 | ( |
---|
1044 | fConverter |
---|
1045 | , targetBuf |
---|
1046 | , targetCap + 1 |
---|
1047 | , toTranscode |
---|
1048 | , srcLen |
---|
1049 | , &err |
---|
1050 | ); |
---|
1051 | } |
---|
1052 | |
---|
1053 | if (U_FAILURE(err)) |
---|
1054 | { |
---|
1055 | // Clean up if we got anything allocated |
---|
1056 | manager->deallocate(targetBuf);//delete [] targetBuf; |
---|
1057 | return 0; |
---|
1058 | } |
---|
1059 | |
---|
1060 | // Cap it off to make sure |
---|
1061 | targetBuf[targetCap] = 0; |
---|
1062 | |
---|
1063 | // |
---|
1064 | // If XMLCh and UChar are the same size, then we can return retVal |
---|
1065 | // as is. Else, we have to allocate another buffer and copy the data |
---|
1066 | // over to it. |
---|
1067 | // |
---|
1068 | XMLCh* actualRet; |
---|
1069 | if (sizeof(XMLCh) == sizeof(UChar)) |
---|
1070 | { |
---|
1071 | actualRet = (XMLCh*)targetBuf; |
---|
1072 | } |
---|
1073 | else |
---|
1074 | { |
---|
1075 | actualRet = convertToXMLCh(targetBuf, manager); |
---|
1076 | manager->deallocate(targetBuf);//delete [] targetBuf; |
---|
1077 | } |
---|
1078 | return actualRet; |
---|
1079 | } |
---|
1080 | |
---|
1081 | |
---|
1082 | bool ICULCPTranscoder::transcode(const char* const toTranscode |
---|
1083 | , XMLCh* const toFill |
---|
1084 | , const XMLSize_t maxChars |
---|
1085 | , MemoryManager* const manager) |
---|
1086 | { |
---|
1087 | // Check for a couple of psycho corner cases |
---|
1088 | if (!toTranscode || !maxChars) |
---|
1089 | { |
---|
1090 | toFill[0] = 0; |
---|
1091 | return true; |
---|
1092 | } |
---|
1093 | |
---|
1094 | if (!*toTranscode) |
---|
1095 | { |
---|
1096 | toFill[0] = 0; |
---|
1097 | return true; |
---|
1098 | } |
---|
1099 | |
---|
1100 | // We'll need this in a couple of places below |
---|
1101 | const XMLSize_t srcLen = strlen(toTranscode); |
---|
1102 | |
---|
1103 | // |
---|
1104 | // Set up the target buffer. If XMLCh and UChar are not the same size |
---|
1105 | // then we have to use a temp buffer and convert over. |
---|
1106 | // |
---|
1107 | UChar* targetBuf; |
---|
1108 | if (sizeof(XMLCh) == sizeof(UChar)) |
---|
1109 | targetBuf = (UChar*)toFill; |
---|
1110 | else |
---|
1111 | targetBuf = (UChar*) manager->allocate |
---|
1112 | ( |
---|
1113 | (maxChars + 1) * sizeof(UChar) |
---|
1114 | );//new UChar[maxChars + 1]; |
---|
1115 | |
---|
1116 | // |
---|
1117 | // Use a faux block to enforce a lock on the converter, which will |
---|
1118 | // unlock immediately after its completed. |
---|
1119 | // |
---|
1120 | UErrorCode err = U_ZERO_ERROR; |
---|
1121 | { |
---|
1122 | XMLMutexLock lockConverter(&fMutex); |
---|
1123 | ucnv_toUChars |
---|
1124 | ( |
---|
1125 | fConverter |
---|
1126 | , targetBuf |
---|
1127 | , (int32_t)maxChars + 1 |
---|
1128 | , toTranscode |
---|
1129 | , (int32_t)srcLen |
---|
1130 | , &err |
---|
1131 | ); |
---|
1132 | } |
---|
1133 | |
---|
1134 | if (U_FAILURE(err)) |
---|
1135 | { |
---|
1136 | if (targetBuf != (UChar*)toFill) |
---|
1137 | manager->deallocate(targetBuf);//delete [] targetBuf; |
---|
1138 | return false; |
---|
1139 | } |
---|
1140 | |
---|
1141 | // If the sizes are not the same, then copy the data over |
---|
1142 | if (sizeof(XMLCh) != sizeof(UChar)) |
---|
1143 | { |
---|
1144 | UChar* srcPtr = targetBuf; |
---|
1145 | XMLCh* outPtr = toFill; |
---|
1146 | while (*srcPtr) |
---|
1147 | *outPtr++ = XMLCh(*srcPtr++); |
---|
1148 | *outPtr = 0; |
---|
1149 | |
---|
1150 | // And delete the temp buffer |
---|
1151 | manager->deallocate(targetBuf);//delete [] targetBuf; |
---|
1152 | } |
---|
1153 | |
---|
1154 | return true; |
---|
1155 | } |
---|
1156 | |
---|
1157 | |
---|
1158 | bool ICULCPTranscoder::transcode( const XMLCh* const toTranscode |
---|
1159 | , char* const toFill |
---|
1160 | , const XMLSize_t maxChars |
---|
1161 | , MemoryManager* const manager) |
---|
1162 | { |
---|
1163 | // Watch for a few psycho corner cases |
---|
1164 | if (!toTranscode || !maxChars) |
---|
1165 | { |
---|
1166 | toFill[0] = 0; |
---|
1167 | return true; |
---|
1168 | } |
---|
1169 | |
---|
1170 | if (!*toTranscode) |
---|
1171 | { |
---|
1172 | toFill[0] = 0; |
---|
1173 | return true; |
---|
1174 | } |
---|
1175 | |
---|
1176 | // |
---|
1177 | // If XMLCh and UChar are not the same size, then we have to make a |
---|
1178 | // temp copy of the text to pass to ICU. |
---|
1179 | // |
---|
1180 | const UChar* actualSrc; |
---|
1181 | UChar* ncActual = 0; |
---|
1182 | if (sizeof(XMLCh) == sizeof(UChar)) |
---|
1183 | { |
---|
1184 | actualSrc = (const UChar*)toTranscode; |
---|
1185 | } |
---|
1186 | else |
---|
1187 | { |
---|
1188 | // Allocate a non-const temp buf, but store it also in the actual |
---|
1189 | ncActual = convertToUChar(toTranscode, 0, manager); |
---|
1190 | actualSrc = ncActual; |
---|
1191 | } |
---|
1192 | |
---|
1193 | // Insure that the temp buffer, if any, gets cleaned up via the nc pointer |
---|
1194 | ArrayJanitor<UChar> janTmp(ncActual, manager); |
---|
1195 | |
---|
1196 | // |
---|
1197 | // Use a faux block to enforce a lock on the converter while we do this. |
---|
1198 | // It will be released immediately after its done. |
---|
1199 | // |
---|
1200 | UErrorCode err = U_ZERO_ERROR; |
---|
1201 | int32_t targetCap; |
---|
1202 | { |
---|
1203 | XMLMutexLock lockConverter(&fMutex); |
---|
1204 | targetCap = ucnv_fromUChars |
---|
1205 | ( |
---|
1206 | fConverter |
---|
1207 | , toFill |
---|
1208 | , (int32_t)maxChars |
---|
1209 | , actualSrc |
---|
1210 | , -1 |
---|
1211 | , &err |
---|
1212 | ); |
---|
1213 | } |
---|
1214 | |
---|
1215 | if (U_FAILURE(err)) |
---|
1216 | return false; |
---|
1217 | |
---|
1218 | toFill[targetCap] = 0; |
---|
1219 | return true; |
---|
1220 | } |
---|
1221 | |
---|
1222 | XERCES_CPP_NAMESPACE_END |
---|