Changeset 125


Ignore:
Timestamp:
May 2, 2008, 10:10:52 AM (11 years ago)
Author:
lindanl
Message:

Character references: calculation and error detection.

Location:
trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/bytelex.h

    r124 r125  
    2020inline bool AtQuote(unsigned char x8data[]) {
    2121  return (x8data[0] == Ord<C, '"'>::value) | (x8data[0] == Ord<C, '\''>::value);
     22}
     23
     24/* In both ASCII and EBCDIC, digits are in a contiguous range
     25   from 0 through 9 */
     26template<CodeUnit_Base C>
     27inline bool at_Digit(unsigned char x8data[]) {
     28  return (x8data[0] >= Ord<C, '0'>::value) & (x8data[0] <= Ord<C, '9'>::value);
     29}
     30
     31template<CodeUnit_Base C>
     32inline unsigned int DigitVal(unsigned char d) {
     33  return d - Ord<C, '0'>::value;
     34}
     35
     36/* In both ASCII and EBCDIC, digits are in a contiguous range
     37   from 0 through 9.  Similarly the hex characters A through F
     38   and a through f are also in contiguous ranges that differ in
     39   only one bit position (Ord<C, 'a'>::value ^ Ord<C, 'A'>::value).*/
     40template<CodeUnit_Base C>
     41inline bool at_HexDigit(unsigned char x8data[]) {
     42  const unsigned char case_bit = Ord<C, 'a'>::value ^ Ord<C, 'A'>::value;
     43  const unsigned char canon_A =  Ord<C, 'a'>::value | case_bit;
     44  const unsigned char canon_F =  Ord<C, 'f'>::value | case_bit;
     45  unsigned char ch = x8data[0];
     46  unsigned char canon_ch = ch | case_bit;
     47  return ((ch >= Ord<C, '0'>::value) & (ch <= Ord<C, '9'>::value)) |
     48         ((canon_ch >= canon_A) & (canon_ch <= canon_F));
     49}
     50
     51template<CodeUnit_Base C>
     52inline unsigned int HexVal(unsigned char ch) {
     53  const unsigned char case_bit = Ord<C, 'a'>::value ^ Ord<C, 'A'>::value;
     54  const unsigned char canon_A =  Ord<C, 'a'>::value | case_bit;
     55  unsigned char canon_ch = ch | case_bit;
     56  if ((ch >= Ord<C, '0'>::value) & (ch <= Ord<C, '9'>::value)) return ch - Ord<C, '0'>::value;
     57  else return (ch | case_bit) - canon_A + 10;
    2258}
    2359
  • trunk/src/engine.c

    r124 r125  
    447447        }
    448448}
    449 
     449       
    450450template <CodeUnit_Base C>
    451451inline void ParsingEngine<C>::Parse_CharRef() {
    452452        Advance(2);  // skip "&#"
     453        int ch_val = 0;
    453454        if (AtChar<C,'x'>(cur())) {
    454455                Advance(1);
    455                 int hex_pos = AbsPos();
    456                 ScanTo(NonHex);
    457                 if (AbsPos() == hex_pos) Syntax_Error(NT_CharRef);
     456                while(at_HexDigit<C>(cur())){
     457                        ch_val = HexVal<C>(cur()[0]) + (ch_val<<4);
     458                        if (ch_val> 0x10FFFF )
     459                                WF_Error(wfErr_wf_Legalchar);
     460                        Advance(1);
     461                }
    458462        }
    459463        else {
    460                 int num_pos = AbsPos();
    461                 ScanTo(NonDigit);
    462                 if (AbsPos() == num_pos) Syntax_Error(NT_CharRef); 
    463         }       
     464                while(at_Digit<C>(cur())){
     465                        ch_val = DigitVal<C>(cur()[0]) + ch_val*10;
     466                        if (ch_val> 0x10FFFF )
     467                                WF_Error(wfErr_wf_Legalchar);
     468                        Advance(1);
     469                }
     470        }
     471        if ((ch_val == 0x0) || ((ch_val | 0x7FF) == 0xDFFF)|| ((ch_val | 0x1) == 0xFFFF))
     472                                WF_Error(wfErr_wf_Legalchar);   
     473                else  if (entity_Info->version != XML_1_1)
     474                        if (((ch_val < 0x20) && (ch_val != 0x9) && (ch_val != 0xD) && (ch_val != 0xA)))
     475                                WF_Error(wfErr_wf_Legalchar);
     476                               
    464477        if (!AtChar<C,';'>(cur())) {
    465478                        Syntax_Error(NT_CharRef);
    466479        }
    467         else {
    468                 Advance(1);
    469                 Reference_action(GetCodeUnitPtr(text_or_markup_start), LengthFrom(text_or_markup_start));
    470         }
     480                else {
     481                        Advance(1);
     482                        Reference_action(GetCodeUnitPtr(text_or_markup_start), LengthFrom(text_or_markup_start));
     483                }
    471484}
    472485
     
    16821695                       
    16831696                }
    1684                 char * temp = replText;
    1685                 replText = cat_string (temp,(char *)GetCodeUnitPtr(quot_start), strlen(temp), AbsPos()-quot_start);
    1686                 free(temp);
     1697                replText = cat_string (replText,(char *)GetCodeUnitPtr(quot_start), strlen(replText), AbsPos()-quot_start);
    16871698        }
    16881699        this_info->ReplacementText = replText;
Note: See TracChangeset for help on using the changeset viewer.