Changeset 11 for trunk/src/engine.c


Ignore:
Timestamp:
Dec 24, 2007, 8:52:08 AM (11 years ago)
Author:
cameron
Message:

Reading XML declaration: version/encoding.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/engine.c

    r9 r11  
    1717#include <errno.h>
    1818#include <string.h>
    19 #include <sys/types.h>
    20 #include <sys/stat.h>
    21 
    22 char sentinel[] = "<]]>?>-->'>\">";
    2319
    2420ParsingEngine::ParsingEngine (char * filename) {
     
    4642  buffer_base_pos = 0;
    4743  buffer_rel_pos = 0;
    48   xml_buf = new XML_Buffer::XML_Buffer(filename, 4*strlen(sentinel));
     44  xml_buf = new XML_Buffer::XML_Buffer(filename, BLOCKSIZE);
    4945}
    5046
     
    164160}
    165161
     162inline bool ParsingEngine::S_at(int offset) const {
     163  // true for 0x09, 0x0A, 0x0D, 0x20: the XML1.0 space chars.
     164  return *(x8dataPtr(offset)) <= 0x20;
     165}
     166
    166167#include "multiliteral.h"
    167168/* Now the XML recognizers. */
     
    215216  return s2int16(x8dataPtr(0)) == c2int16('/', '>');
    216217}
     218
     219inline bool ParsingEngine::at_XmlDecl_start() const {
     220  return (s5int64(x8dataPtr(0)) == c5int64('<', '?', 'x', 'm', 'l')) &&
     221         S_at(5);
     222}
     223
     224inline bool ParsingEngine::at_version() const {
     225  return s7int64(x8dataPtr(0)) == c7int64('v', 'e', 'r', 's', 'i', 'o', 'n');
     226}
     227
     228inline bool ParsingEngine::at_1_0() const {
     229  return (s5int64(x8dataPtr(0)) == c5int64('"', '1', '.', '0', '"')) ||
     230         (s5int64(x8dataPtr(0)) == c5int64('\'', '1', '.', '0', '\''));
     231}
     232
     233inline bool ParsingEngine::at_1_1() const {
     234  return (s5int64(x8dataPtr(0)) == c5int64('"', '1', '.', '1', '"')) ||
     235         (s5int64(x8dataPtr(0)) == c5int64('\'', '1', '.', '1', '\''));
     236}
     237
     238inline bool ParsingEngine::at_encoding() const {
     239  return s8int64(x8dataPtr(0)) == c8int64('e', 'n', 'c', 'o', 'd', 'i', 'n', 'g');
     240}
     241
     242inline bool ParsingEngine::at_standalone() const {
     243  return (s8int64(x8dataPtr(0)) == c8int64('s', 't', 'a', 'n', 'd', 'a', 'l', 'o')) &
     244         (s2int16(x8dataPtr(8)) == c2int16('n', 'e'));
     245}
     246
     247inline bool ParsingEngine::at_yes() const {
     248  return (s5int64(x8dataPtr(0)) == c5int64('"', 'y', 'e', 's', '"')) |
     249         (s5int64(x8dataPtr(0)) == c5int64('\'', 'y', 'e', 's', '\''));
     250}
     251
     252inline bool ParsingEngine::at_no() const {
     253  return (s4int32(x8dataPtr(0)) == c4int32('"', 'n', 'o', '"')) |
     254         (s4int32(x8dataPtr(0)) == c4int32('\'', 'n', 'o', '\''));
     255}
     256
     257
     258
    217259
    218260
     
    316358        int target_start = AbsPos();
    317359        ScanTo(NameFollow);  /* Name delimiter */
     360        // Check for illegal [Xx][Mm][Ll] target.
     361        if ((AbsPos() - markup_start == 5) &&
     362            ((s3int32(x8dataPtr(-3)) | c3int32(0x20, 0x20, 0x20)) == c3int32('x', 'm', 'l'))) {
     363                Error_action(markup_start, AbsPos());
     364                return;
     365        }
    318366        PI_Target_action(target_start, AbsPos());
    319367        ScanTo(QMark);
     
    500548#endif
    501549}
     550
     551//
     552// The following does not yet validate the syntax of EncNames.
     553// EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
     554// Future approach: first use lookup in EncNameTable,
     555//           if not found, do case convert, try again,
     556//             (avoids cost of case convert normally)
     557//           if not found, validate syntax of EncNames,
     558//           report error or EncName unknown.
     559//
     560void ParsingEngine::ReadXmlInfo(Entity_Declaration_Info& xml_info) {
     561  int BOM = lex->BOM_size(0);
     562  xml_info.has_ByteOrderMark = BOM > 0;
     563  xml_info.has_version_decl = false;
     564  xml_info.has_encoding_decl = false;
     565  xml_info.has_standalone_decl = false;
     566  Advance(BOM);
     567  int decl_start = AbsPos();
     568  // It is possible that there is no XML declaration.
     569  if (!at_XmlDecl_start()) return;
     570  // Otherwise, the XML declaration exists and must have
     571  // at least version information.
     572  xml_info.has_version_decl = true;
     573  Advance(6);
     574  ScanTo(NonWS);
     575  if (!at_version()) {Error_action(decl_start, AbsPos()); return;}
     576  Advance(7);
     577  ScanTo(NonWS);
     578  if (!AtChar('=')) {Error_action(decl_start, AbsPos()); return;}
     579  Advance(1);
     580  ScanTo(NonWS);
     581  if (at_1_0()) xml_info.version = 0;
     582  else if (at_1_1()) xml_info.version = 1;
     583  else {Error_action(decl_start, AbsPos()); return;}
     584  Advance(5);
     585  xml_info.has_version_decl = true;
     586  if (at_PI_End()) {Advance(2); return;}
     587  if (!S_at(0)) {Error_action(decl_start, AbsPos()); return;}
     588  ScanTo(NonWS);
     589  if (at_encoding()) {
     590      xml_info.has_encoding_decl = true;
     591      Advance(8);
     592      ScanTo(NonWS);
     593      if (!AtChar('=')) {Error_action(decl_start, AbsPos()); return;}
     594      Advance(1);
     595      ScanTo(NonWS);
     596      xml_info.encoding_start_pos = AbsPos()+1;
     597      if (AtChar('"')) {
     598        Advance(1);
     599        ScanTo(DQuote);
     600        if (!AtChar('"')) {Error_action(decl_start, AbsPos()); return;}
     601      }
     602      else if (AtChar('\'')) {
     603        Advance(1);
     604        ScanTo(SQuote);
     605        if (!AtChar('\'')) {Error_action(decl_start, AbsPos()); return;}
     606      }
     607      else {Error_action(decl_start, AbsPos()); return;}
     608      xml_info.encoding_end_pos = AbsPos();
     609      Advance(1);
     610      if (at_PI_End()) {Advance(2); return;}
     611      if (!S_at(0)) {Error_action(decl_start, AbsPos()); return;}
     612      ScanTo(NonWS);
     613  }
     614  if (at_standalone()) {
     615      xml_info.has_standalone_decl = true;
     616      Advance(10);
     617      ScanTo(NonWS);
     618      if (!AtChar('=')) {Error_action(decl_start, AbsPos()); return;}
     619      Advance(1);
     620      ScanTo(NonWS);
     621      if (at_yes()) {Advance(5); xml_info.standalone = true;}
     622      else if (at_no()) {Advance(4); xml_info.standalone = false;}
     623      else {Error_action(decl_start, AbsPos()); return;}
     624  }
     625  ScanTo(NonWS);
     626  if (at_PI_End()) {Advance(2); return;}
     627  else {Error_action(decl_start, AbsPos()); return;}
     628}
Note: See TracChangeset for help on using the changeset viewer.