source: tags/parabix-0.39/src/xmlparam.h @ 4027

Last change on this file since 4027 was 24, checked in by cameron, 12 years ago


File size: 2.7 KB
1/*  xmlparam - XML parsing parameters
2    Copyright (c) 2008, Robert D. Cameron.
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters, Inc., under the Academic
5    Free License 3.0.
8#ifndef XMLPARAM_H
9#define XMLPARAM_H
11enum XML_version {XML_1_0, XML_1_1, no_XML_version_value};
12/* Documents may be encoded in accord with either XML 1.0 or XML 1.1,
13   or there may be no XML version declared ("no value" in the
14   XML infoset parlance). */
16enum CodeUnit_Base {ASCII, EBCDIC};
17/* Code units of the underlying character set may be either ASCII-compatible
18   or EBCDIC-compatible.
19   ASCII-compatibility means that any code units satisfy the following properties.
20     (1) Any code unit whose numeric value is in the ASCII range (0 to 0x7F)
21         is a complete character sequence (single code unit sequence) representing
22         that ASCII character.
23     (2) Any code units above the ASCII range are non-ASCII code units.
24         No code units or code unit sequences containing a non-ASCII code unit
25         may represent an ASCII character.  (This property ensures that
26         non-ASCII code units may be ignored in making ASCII-based parsing decisions).
27   EBCDIC-compatible, for the purposes of XML, means that the following property
28         applies.
29     (*) Code units may form all or part of a code unit sequence representing
30         a character in the Unicode range 0 to 0x9F if and only if that code
31         unit has the same interpretation unde the basic EBCDIC code page cp037.
34enum CodeUnit_Size {SingleByte = 1, DoubleByte = 2, QuadByte = 4};
35/* ASCII, EBCDIC, ISO-8859-X and UTF-8 have 8-bit code units (singlebytes);
36   The UTF-16 and UCS-2 families have 16-bit code units (doublebyte);
37   The UTF-32/UCS-4 family has 32-bit code units. */
39enum CodeUnit_ByteOrder {BigEndian, LittleEndian, Unusual_3412, Unusual_2143};
40/* The byte order of 16-bit or 32-bit code units.  The possibilities are:
41   BigEndian:  UTF-16BE, UCS-2BE, UTF-16 or UCS-2 with a BigEndian byte order mark,
42               UTF-16 without a byte order mark,
43               UTF-32BE/UCS-4BE, or UTF-32/UCS-4 with a BigEndian byte order mark.
44   LittleEndian: UTF-16LE, UCS-2LE, UTF-16 or UCS-2 with a LittleEndian byte order mark.
45                 UTF-32LE/UCS-4LE, or UTF-32/UCS-4 with a LittleEndian byte order mark.
46   Unusual_3412: Unusual octet order of UTF-32/UCS-4 with byte order mark FE FF 00 00
47   Unusual_2143: Unusual octet order of UTF-32/UCS-4 with byte order mark 00 00 FF FE.
50enum XML_standalone {Standalone_yes, Standalone_no, Standalone_no_value};
51/* Possible values depending on the optional standalone component of an
52   XML declaration. */
Note: See TracBrowser for help on using the repository browser.