Changeset 163 for trunk/src/xmlmodel.h


Ignore:
Timestamp:
Jun 22, 2008, 1:45:20 PM (11 years ago)
Author:
cameron
Message:

Restructuring: Document/Externalt? Entity Info into xmldecl.h

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/xmlmodel.h

    r160 r163  
    2222#ifndef XMLMODEL_H
    2323#define XMLMODEL_H
     24
     25//  Encoding signature, XML declaration processing included in xmldecl.h
     26#include "xmldecl.h"
     27
    2428#include <vector>
    2529#include <iostream>
     
    3236#include "contentmodel.h"
    3337#include "symtab.h"
    34 enum XML_version {XML_1_0, XML_1_1, no_XML_version_value};
    35 /* Documents may be encoded in accord with either XML 1.0 or XML 1.1,
    36    or there may be no XML version declared ("no value" in the
    37    XML infoset parlance). */
    38 
    39 enum CodeUnit_Base {ASCII, EBCDIC};
    40 /* Code units of the underlying character set may be either ASCII-compatible
    41    or EBCDIC-compatible.
    42    ASCII-compatibility means that any code units satisfy the following properties.
    43      (1) Any code unit whose numeric value is in the ASCII range (0 to 0x7F)
    44          is a complete character sequence (single code unit sequence) representing
    45          that ASCII character.
    46      (2) Any code units above the ASCII range are non-ASCII code units.
    47          No code units or code unit sequences containing a non-ASCII code unit
    48          may represent an ASCII character.  (This property ensures that
    49          non-ASCII code units may be ignored in making ASCII-based parsing decisions).
    50    EBCDIC-compatible, for the purposes of XML, means that the following property
    51          applies.
    52      (*) Code units may form all or part of a code unit sequence representing
    53          a character in the Unicode range 0 to 0x9F if and only if that code
    54          unit has the same interpretation unde the basic EBCDIC code page cp037.
    55 */
    56 
    57 enum CodeUnit_Size {SingleByte = 1, DoubleByte = 2, QuadByte = 4};
    58 /* ASCII, EBCDIC, ISO-8859-X and UTF-8 have 8-bit code units (singlebytes);
    59    The UTF-16 and UCS-2 families have 16-bit code units (doublebyte);
    60    The UTF-32/UCS-4 family has 32-bit code units. */
    61 
    62 enum CodeUnit_ByteOrder {BigEndian, LittleEndian, Unusual_3412, Unusual_2143};
    63 /* The byte order of 16-bit or 32-bit code units.  The possibilities are:
    64    BigEndian:  UTF-16BE, UCS-2BE, UTF-16 or UCS-2 with a BigEndian byte order mark,
    65                UTF-16 without a byte order mark,
    66                UTF-32BE/UCS-4BE, or UTF-32/UCS-4 with a BigEndian byte order mark.
    67    LittleEndian: UTF-16LE, UCS-2LE, UTF-16 or UCS-2 with a LittleEndian byte order mark.
    68                  UTF-32LE/UCS-4LE, or UTF-32/UCS-4 with a LittleEndian byte order mark.
    69    Unusual_3412: Unusual octet order of UTF-32/UCS-4 with byte order mark FE FF 00 00
    70    Unusual_2143: Unusual octet order of UTF-32/UCS-4 with byte order mark 00 00 FF FE.
    71 */
    72 
    73 enum XML_standalone {Standalone_yes, Standalone_no, Standalone_no_value};
    74 /* Possible values depending on the optional standalone component of an
    75    XML declaration. */
    7638
    7739
     
    13092
    13193
    132 class Entity_Info {
    133        
    134 public:
    135         Entity_Info();
    136         ~Entity_Info();
    137 
    138         /*  Information computed by analyzing the 4-byte initial signature
    139             of an XML document. */
    140         int BOM_units; /* no of initial code units for a Byte Order Mark */
    141 
    142         CodeUnit_Base code_unit_base;
    143         CodeUnit_Size code_unit_size;
    144         CodeUnit_ByteOrder byte_order; 
    145 
    146         void AnalyzeSignature(unsigned char * signature);
    147 
    148         /* Information computed from the XML or text declaration. */
    149         XML_version version;
    150         bool has_encoding_decl;
    151         unsigned char * encoding;
    152         XML_standalone standalone;
    153         int content_start;  /* position after BOM and XML/text decl.*/
    154        
    155 private:
    156         void set_charset_family(CodeUnit_Base C, CodeUnit_Size S, CodeUnit_ByteOrder O, int B);
    157 };
    158 
    15994class Model_Info {
    16095       
Note: See TracChangeset for help on using the changeset viewer.