source: icXML/icXML-devel/tests/src/EncodingTest/EncodingTest.cpp @ 2733

Last change on this file since 2733 was 2733, checked in by cameron, 6 years ago

More path fixes.

File size: 13.7 KB
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18//---------------------------------------------------------------------
19//
20//  This test program is used, in conjunction with a set of test data files,
21//  to verify support for different character encodings in XML.
22//
23//---------------------------------------------------------------------
24
25
26// ---------------------------------------------------------------------------
27//  Includes
28// ---------------------------------------------------------------------------
29#include <icxercesc/framework/XMLBuffer.hpp>
30#include <icxercesc/util/PlatformUtils.hpp>
31#include <icxercesc/util/XMLString.hpp>
32#include <xercesc/util/XMLException.hpp>
33
34#include <xercesc/sax/SAXException.hpp>
35#include <xercesc/sax/ErrorHandler.hpp>
36#include <xercesc/sax/SAXParseException.hpp>
37
38#include <xercesc/util/OutOfMemoryException.hpp>
39#include <xercesc/parsers/XercesDOMParser.hpp>
40#include <xercesc/dom/DOM.hpp>
41#include <stdio.h>
42
43XERCES_CPP_NAMESPACE_USE
44
45static int gTestsFailed = 0;
46static int gTestsRun    = 0;
47static XercesDOMParser* parser = 0;
48
49
50//-----------------------------------------------------------------------
51//
52//  ErrorHandler.   The DOM Parser will report any parsing errors by means
53//                  of call-backs to the methods of this class.
54//                  This is just necessary boilerplate, as far as this
55//                  program is concerned.
56//
57//-----------------------------------------------------------------------
58
59class  ParseErrorHandler: public ErrorHandler
60{
61public:
62    void warning(const SAXParseException& e);
63    void error(const SAXParseException& e);
64    void fatalError(const SAXParseException& e);
65    void resetErrors() {};
66
67};
68
69void ParseErrorHandler::error(const SAXParseException& e)
70{
71    char* systemId = XMLString::transcode(e.getSystemId());
72    char* message = XMLString::transcode(e.getMessage());
73
74    fprintf(stderr, "\nError at file \"%s\", line %d, char %d:  %s\n",
75        systemId, e.getLineNumber(),
76        e.getColumnNumber(), message);
77
78    XMLString::release(&systemId);
79    XMLString::release(&message);
80    throw e;
81
82};
83
84void ParseErrorHandler::fatalError(const SAXParseException& e)
85{
86    char* systemId = XMLString::transcode(e.getSystemId());
87    char* message = XMLString::transcode(e.getMessage());
88
89    fprintf(stderr, "\nFatal Error at file \"%s\", line %d, char %d:  %s\n",
90        systemId, e.getLineNumber(),
91        e.getColumnNumber(), message);
92
93    XMLString::release(&systemId);
94    XMLString::release(&message);
95    throw e;
96};
97
98void ParseErrorHandler::warning(const SAXParseException& e)
99{
100    char* systemId = XMLString::transcode(e.getSystemId());
101    char* message = XMLString::transcode(e.getMessage());
102
103    fprintf(stderr, "\nWarning at file \"%s\", line %d, char %d:  %s\n",
104        systemId, e.getLineNumber(),
105        e.getColumnNumber(), message);
106
107    XMLString::release(&systemId);
108    XMLString::release(&message);
109    throw e;
110
111};
112
113
114//------------------------------------------------------------------------
115//
116//   parseFile  - a simpler to use function for just parsing an XML file
117//                and getting the DOM Document back.
118//
119//------------------------------------------------------------------------
120static DOMDocument* parseFile(char *fileName)
121{
122    ParseErrorHandler eh;
123    if (!parser)
124        parser = new XercesDOMParser;
125    parser->setValidationScheme(AbstractDOMParser::Val_Never);
126    parser->setErrorHandler(&eh);
127    try
128    {
129        parser->parse(fileName);
130    }
131    catch (const OutOfMemoryException&)
132    {
133            fprintf(stderr, "OutOfMemoryException during parsing: %s\n", fileName);       
134            return 0;
135    }
136    catch (const XMLException& e )
137    {
138                fprintf(stderr, "Exception Occurred \"%s\"\n",
139                        XMLString::transcode(e.getMessage()));
140                fprintf(stderr, "File being parsed is \"%s\".\n", fileName);
141        return 0;  // A null document.
142    }
143
144        catch (...)
145        {
146                fprintf(stderr, "Unexpected Exception thrown during parse of file \"%s\".\n",
147                                 fileName);
148                return 0;
149        }
150    return parser->getDocument();
151}
152
153
154//------------------------------------------------------------------------
155//
156//  writeUData - Write out a udata xml element for a XMLCh* contents.
157//
158//------------------------------------------------------------------------
159static void writeUData(const XMLCh* s)
160{
161    unsigned int i;
162    printf("<udata>\n");
163    size_t len = XMLString::stringLen(s);
164    for (i=0; i<len; i++)
165    {
166        if (i % 16 == 0)
167            printf("\n");
168        XMLCh c = s[i];
169        printf("%4x ", c);
170    }
171    printf("\n</udata>\n");
172};
173
174
175
176//------------------------------------------------------------------------
177//
178//  eatWhiteSpace -  XMLCh*s are kind of short on utility functions :-(
179//
180//------------------------------------------------------------------------
181static void eatWhiteSpace(XMLCh* s, unsigned int &i)
182{
183    size_t len = XMLString::stringLen(s);
184    while (i < len)
185    {
186    XMLCh c = s[i];
187    if (!(c == 0x20 ||           // These are the official XML space characters,
188        c == 0x09 ||             //   expressed as Unicode constants.
189        c == 0x0A))
190        break;
191    i++;
192    }
193}
194
195//------------------------------------------------------------------------
196//
197//   convertHexValue     if the XMLCh* contains a hex number at position i,
198//                       convert it and return it, and update i to index the
199//                       first char not in the string.
200//                       return 0 if string[i] didn't have a hex digit.
201//                       0 return is ambiguous, but it doesn't matter for XML,
202//                       where 0 is not a valid character.
203//
204//------------------------------------------------------------------------
205static int convertHexValue(XMLCh* s, unsigned int &i)
206{
207    int value = 0;
208
209                                   // For reference, the digits  0-9 are Unicode 0x30-39
210                                   //                the letters A-F are Unicode 0x41-0x46
211                                   //                the letters a-f are Unicode 0x61-66
212                                   // We can't use character literals - we might be
213                                   //  building on an EBCDIC machine.
214    size_t len = XMLString::stringLen(s);
215    while (i < len)
216    {
217        XMLCh c = s[i];
218        if (c >= 0x61 && c <= 0x66)     // Uppercase a-f to A-F.
219            c -= 0x20;
220
221        if (c < 0x30 || c >0x46)        // Stop if not a hex digit
222            break;
223        if (c > 0x39 && c <0x41)
224            break;
225
226        value = value << 4;             // Append this digit to accumulating value
227        if (c <= 0x39)
228            value += c-0x30;
229        else
230            value += 0xA + c - 0x41;
231
232        i++;
233    }
234    return value;
235}
236
237
238
239//------------------------------------------------------------------------
240//
241//  processTestFile   Given the file name of an encoding test xml file,
242//                    run it.
243//
244//------------------------------------------------------------------------
245static bool  processTestFile(const XMLCh* fileName)
246{
247    //
248    //  Send the input file through the parse, create a DOM document for it.
249    //
250    char cFileName[4000];
251    XMLString::transcode(fileName, cFileName, 3999);
252    DOMDocument* testDoc = parseFile(cFileName);
253    if (testDoc == 0)
254        return false;    // parse errors in the source xml.
255
256    //
257    //  Pull the "data" element out of the document.
258    //
259    XMLCh tempStr[4000];
260    XMLString::transcode("data", tempStr, 3999);
261    DOMNodeList* nl = testDoc->getElementsByTagName(tempStr);
262    if (nl->getLength() != 1) {
263        fprintf(stderr, "Test file \"%s\" must have exactly one \"data\" element.\n", cFileName);
264        return false;
265    };
266    DOMNode* tmpNode = nl->item(0);
267    DOMElement* data = (DOMElement*) tmpNode;
268
269
270    //
271    //  Build up a string containing the character data contents of the data element.
272    //
273    DOMNode* child;
274    XMLBuffer elData;
275    for (child=data->getFirstChild(); child != 0; child= child->getNextSibling())
276    {
277                if (child->getNodeType() == DOMNode::COMMENT_NODE)
278                        continue;
279        if (! (child->getNodeType() == DOMNode::TEXT_NODE ||
280               child->getNodeType() == DOMNode::CDATA_SECTION_NODE ||
281               child->getNodeType() == DOMNode::ENTITY_REFERENCE_NODE))
282        {
283               fprintf(stderr, "Test file \"%s\": data element contains unexpected children.",
284                    cFileName);
285               return false;
286        }
287        elData.append(((DOMCharacterData *)child)->getData());
288    };
289
290    //
291    //  Pull the "udata" element out of the document
292    //
293    XMLString::transcode("udata", tempStr, 3999);
294    nl = testDoc->getElementsByTagName(tempStr);
295    if (nl->getLength() != 1) {
296        fprintf(stderr, "Test file \"%s\" must have exactly one \"udata\" element.\n", cFileName);
297        return false;
298    };
299    DOMNode* tmpNode1 = nl->item(0);
300    DOMElement* udata = (DOMElement*) tmpNode1;
301
302    //
303    //  Build up a string containing the character data contents of the udata element.
304    //  This will consist of a whole bunch hex numbers, still in string from
305    //
306
307    XMLBuffer rawUData;
308    for (child=udata->getFirstChild(); child != 0; child= child->getNextSibling())
309    {
310        if (child->getNodeType() == DOMNode::COMMENT_NODE)
311            continue;
312        if (! (child->getNodeType() == DOMNode::TEXT_NODE ||
313            child->getNodeType() == DOMNode::CDATA_SECTION_NODE ||
314            child->getNodeType() == DOMNode::ENTITY_REFERENCE_NODE))
315        {
316            fprintf(stderr, "Test file \"%s\": udata element contains unexpected children.",
317                cFileName);
318            return false;
319        }
320        rawUData.append(((DOMCharacterData *)child)->getData());
321    };
322
323
324    //
325    // Convert the raw (hex numbers)  form of the udata to the corresponding string.
326    //
327    XMLBuffer uData;
328    unsigned int rawIndex = 0;
329
330    while (rawIndex < rawUData.getLen())
331    {
332        eatWhiteSpace(rawUData.getRawBuffer(), rawIndex);
333        XMLCh c = convertHexValue(rawUData.getRawBuffer(), rawIndex);
334        if (c > 0)
335            uData.append(c);
336        else
337            if (rawIndex < rawUData.getLen())
338            {
339                fprintf(stderr, "Test file \"%s\": Bad hex number in udata element.  "
340                    "Data character number %d\n", cFileName, uData.getLen());
341                return false;
342            }
343    }
344
345
346    //
347    // Compare the two strings.
348    //
349    unsigned int i;
350    for (i=0; i< elData.getLen(); i++)
351    {
352        XMLCh* elDataRaw = elData.getRawBuffer();
353        XMLCh* uDataRaw = uData.getRawBuffer();
354        if (i >= uData.getLen())
355        {
356            fprintf(stderr, "Test file \"%s\": udata element shorter than data at char number %d\n",
357                cFileName, i);
358            writeUData(elDataRaw);
359            return false;
360        }
361        if (uDataRaw[i] != elDataRaw[i])
362        {
363            fprintf(stderr, "Test file \"%s\": comparison failure at character number %d\n",
364                cFileName, i);
365            writeUData(elDataRaw);
366            return false;
367        };
368    }
369
370    if (elData.getLen() != uData.getLen())
371    {
372        fprintf(stderr, "Test file \"%s\": udata element longer than data at char number %d\n",
373            cFileName, i);
374        writeUData(elData.getRawBuffer());
375        return false;
376    }
377
378    return true;
379}
380
381
382int main(int argc, char ** argv) {
383
384   //
385    // Initialize the Xerces-c environment
386    //
387        try
388    {
389        XMLPlatformUtils::Initialize();
390    }
391
392    catch (const XMLException& toCatch)
393    {
394        fprintf(stderr, "Error during initialization of xerces-c: %s\n",
395            XMLString::transcode(toCatch.getMessage()));
396         return 1;
397    }
398
399    //
400    // Parse the command line, which should specify exactly one file, which is an
401    //   xml file containing the list of test files to be processed.
402    //
403    if (argc != 2) {
404        printf("usage: %s file_name \n"
405               "   where file name is the xml file specifying the list of test files.", argv[0]);
406        return 1;
407    }
408    DOMDocument* fileListDoc = parseFile(argv[1]);
409    if (fileListDoc == 0) return 1;
410
411
412    //
413    // Iterate over the list of files, running each as a test.
414    //
415    XMLCh tempStr[4000];
416    XMLString::transcode("testFile", tempStr, 3999);
417    DOMNodeList* list = fileListDoc->getElementsByTagName(tempStr);
418    XMLSize_t i;
419    XMLSize_t numFiles = list->getLength();
420    for (i=0; i<numFiles; i++)
421    {
422        ++gTestsRun;
423        DOMNode* tmpNode3 = list->item(i);
424        XMLString::transcode("name", tempStr, 3999);
425        const XMLCh* fileName = ((DOMElement*) tmpNode3)->getAttribute(tempStr);
426        if (processTestFile(fileName) == false)
427            ++gTestsFailed;
428    };
429
430
431
432    //
433    // We are done.  Print out a summary of the results
434    //
435    printf("Encoding Tests Results Summary: \n"
436           "   %d encoding tests run.\n"
437           "   %d tests passed,\n"
438           "   %d tests failed\n", gTestsRun, gTestsRun-gTestsFailed, gTestsFailed);
439
440    delete parser;
441    parser = 0;
442   return 0;
443};
Note: See TracBrowser for help on using the repository browser.