Changeset 33


Ignore:
Timestamp:
Feb 10, 2008, 5:55:02 AM (11 years ago)
Author:
cameron
Message:

Caseless comparison fix; encoding names (prelim)

Location:
trunk/src
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/bytelex.h

    r27 r33  
    1717}
    1818
     19template<CodeUnit_Base C>
     20inline bool AtQuote(unsigned char x8data[]) {
     21  return (x8data[0] == Ord<C, '"'>::value) | (x8data[0] == Ord<C, '\''>::value);
     22}
     23
    1924// Whitespace recognition.  This varies between XML 1.0 and
    2025// XML 1.1, but only the XML 1.0 version is needed.
     
    3944}
    4045
    41 
    42 
    4346template<CodeUnit_Base C>
    4447inline bool at_EndTag_Start(unsigned char x8data[]) {
     
    8386
    8487template<CodeUnit_Base C>
    85 inline bool at_EqualsDQuote(unsigned char x8data[]) {
    86   return s2int16(x8data) == c2int16<C, '=', '"'>::value;
    87 }
    88 
    89 template<CodeUnit_Base C>
    90 inline bool at_EqualsSQuote(unsigned char x8data[]) {
    91   return s2int16(x8data) == c2int16<C, '=', '\''>::value;
     88inline bool at_EqualsQuote(unsigned char x8data[]) {
     89  uint16_t EQ = s2int16(x8data);
     90  return (EQ == c2int16<C, '=', '"'>::value) | (EQ == c2int16<C, '=', '\''>::value);
    9291}
    9392
     
    154153}
    155154
    156 
    157155/* The at_ElementTag_Start recognizer rules out '<!', '<?', '</'
    158156   combinations while returning true for '<' followed by any NameStrt
    159157   character.
    160 
    161    The following is ugly and works only for ASCII.
    162158*/
    163159template<CodeUnit_Base C>
    164 inline bool at_ElementTag_Start(unsigned char x8data[]);
    165 
     160inline bool at_ElementTag_Start(unsigned char x8data[]) {
     161  return (x8data[0] == Ord<C, '<'>::value) & (x8data[1] != Ord<C, '!'>::value) &
     162         (x8data[1] != Ord<C, '?'>::value) & (x8data[1] != Ord<C, '/'>::value);
     163}
     164
     165/* The following ugly hack optimizes for ASCII. */
    166166template<>
    167167inline bool at_ElementTag_Start<ASCII>(unsigned char x8data[]) {
     
    170170}
    171171
    172 template<CodeUnit_Base C>
     172
    173173inline bool at_UTF_8(unsigned char x8data[]) {
    174   return caseless_comp<C, 'u', 't', 'f', '-', '8'>(x8data);
    175 }
    176 
    177 template<CodeUnit_Base C>
     174  return caseless_comp<ASCII, 'u', 't', 'f', '-', '8'>(x8data);
     175}
     176
    178177inline bool at_UCS_2(unsigned char x8data[]) {
    179   return caseless_comp<C, 'u', 'c', 's', '-', '2'>(x8data);
    180 }
    181 
    182 template<CodeUnit_Base C>
     178  return caseless_comp<ASCII, 'u', 'c', 's', '-', '2'>(x8data);
     179}
     180
    183181inline bool at_UCS_4(unsigned char x8data[]) {
    184   return caseless_comp<C, 'u', 'c', 's', '-', '4'>(x8data);
    185 }
    186 
    187 template<CodeUnit_Base C>
     182  return caseless_comp<ASCII, 'u', 'c', 's', '-', '4'>(x8data);
     183}
     184
    188185inline bool at_UCS_2LE(unsigned char x8data[]) {
    189   return caseless_comp<C, 'u', 'c', 's', '-', '2', 'l', 'e'>(x8data);
    190 }
    191 
    192 template<CodeUnit_Base C>
     186  return caseless_comp<ASCII, 'u', 'c', 's', '-', '2', 'l', 'e'>(x8data);
     187}
     188
    193189inline bool at_UCS_2BE(unsigned char x8data[]) {
    194   return caseless_comp<C, 'u', 'c', 's', '-', '2', 'b', 'e'>(x8data);
    195 }
    196 
    197 template<CodeUnit_Base C>
     190  return caseless_comp<ASCII, 'u', 'c', 's', '-', '2', 'b', 'e'>(x8data);
     191}
     192
    198193inline bool at_UCS_4LE(unsigned char x8data[]) {
    199   return caseless_comp<C, 'u', 'c', 's', '-', '4', 'l', 'e'>(x8data);
    200 }
    201 
    202 template<CodeUnit_Base C>
     194  return caseless_comp<ASCII, 'u', 'c', 's', '-', '4', 'l', 'e'>(x8data);
     195}
     196
    203197inline bool at_UCS_4BE(unsigned char x8data[]) {
    204   return caseless_comp<C, 'u', 'c', 's', '-', '4', 'b', 'e'>(x8data);
    205 }
    206 
    207 template<CodeUnit_Base C>
     198  return caseless_comp<ASCII, 'u', 'c', 's', '-', '4', 'b', 'e'>(x8data);
     199}
     200
    208201inline bool at_UTF_16(unsigned char x8data[]) {
    209   return caseless_comp<C, 'u', 't', 'f', '-', '1', '6'>(x8data);
    210 }
    211 
    212 template<CodeUnit_Base C>
     202  return caseless_comp<ASCII, 'u', 't', 'f', '-', '1', '6'>(x8data);
     203}
     204
    213205inline bool at_UTF_32(unsigned char x8data[]) {
    214   return caseless_comp<C, 'u', 't', 'f', '-', '3', '2'>(x8data);
    215 }
    216 
    217 template<CodeUnit_Base C>
     206  return caseless_comp<ASCII, 'u', 't', 'f', '-', '3', '2'>(x8data);
     207}
     208
    218209inline bool at_UTF_16LE(unsigned char x8data[]) {
    219   return caseless_comp<C, 'u', 't', 'f', '-', '1', '6', 'l', 'e'>(x8data);
    220 }
    221 
    222 template<CodeUnit_Base C>
     210  return caseless_comp<ASCII, 'u', 't', 'f', '-', '1', '6', 'l', 'e'>(x8data);
     211}
     212
    223213inline bool at_UTF_32LE(unsigned char x8data[]) {
    224   return caseless_comp<C, 'u', 't', 'f', '-', '3', '2', 'l', 'e'>(x8data);
    225 }
    226 
    227 template<CodeUnit_Base C>
     214  return caseless_comp<ASCII, 'u', 't', 'f', '-', '3', '2', 'l', 'e'>(x8data);
     215}
     216
    228217inline bool at_UTF_16BE(unsigned char x8data[]) {
    229   return caseless_comp<C, 'u', 't', 'f', '-', '1', '6', 'b', 'e'>(x8data);
    230 }
    231 
    232 template<CodeUnit_Base C>
     218  return caseless_comp<ASCII, 'u', 't', 'f', '-', '1', '6', 'b', 'e'>(x8data);
     219}
     220
    233221inline bool at_UTF_32BE(unsigned char x8data[]) {
    234   return caseless_comp<C, 'u', 't', 'f', '-', '3', '2', 'b', 'e'>(x8data);
     222  return caseless_comp<ASCII, 'u', 't', 'f', '-', '3', '2', 'b', 'e'>(x8data);
     223}
     224
     225inline bool at_ASCII(unsigned char x8data[]) {
     226  return caseless_comp<ASCII, 'a', 's', 'c', 'i', 'i'>(x8data);
     227}
     228
     229inline bool at_Latin1(unsigned char x8data[]) {
     230  return caseless_comp<ASCII, 'l', 'a', 't', 'i', 'n', '1'>(x8data);
     231}
     232
     233inline bool at_EBCDIC(unsigned char x8data[]) {
     234  return caseless_comp<EBCDIC, 'e', 'b', 'c', 'd', 'i', 'c'>(x8data);
    235235}
    236236
  • trunk/src/multiliteral.h

    r26 r33  
    144144  const uint16_t UC = c2int16<C, lc2UC<c1>::value, lc2UC<c2>::value>::value;
    145145  const uint16_t case_mask = lc ^ UC;
    146   const uint16_t canon = lc & case_mask;
    147   return (s2int16(s) & case_mask) == canon;
     146  const uint16_t canon = lc & ~case_mask;
     147  return (s2int16(s) & ~case_mask) == canon;
    148148}
    149149
     
    153153  const uint32_t UC = c3int32<C, lc2UC<c1>::value, lc2UC<c2>::value, lc2UC<c3>::value>::value;
    154154  const uint32_t case_mask = lc ^ UC;
    155   const uint32_t canon = lc & case_mask;
    156   return (s3int32(s) & case_mask) == canon;
     155  const uint32_t canon = lc & ~case_mask;
     156  return (s3int32(s) & ~case_mask) == canon;
    157157}
    158158
     
    165165                                 lc2UC<c3>::value, lc2UC<c4>::value>::value;
    166166  const uint32_t case_mask = lc ^ UC;
    167   const uint32_t canon = lc & case_mask;
    168   return (s4int32(s) & case_mask) == canon;
     167  const uint32_t canon = lc & ~case_mask;
     168  return (s4int32(s) & ~case_mask) == canon;
    169169}
    170170
     
    180180                                 lc2UC<c5>::value>::value;
    181181  const uint64_t case_mask = lc ^ UC;
    182   const uint64_t canon = lc & case_mask;
    183   return (s5int64(s) & case_mask) == canon;
     182  const uint64_t canon = lc & ~case_mask;
     183  return (s5int64(s) & ~case_mask) == canon;
    184184}
    185185
     
    195195                                 lc2UC<c5>::value, lc2UC<c6>::value>::value;
    196196  const uint64_t case_mask = lc ^ UC;
    197   const uint64_t canon = lc & case_mask;
    198   return (s6int64(s) & case_mask) == canon;
     197  const uint64_t canon = lc & ~case_mask;
     198  return (s6int64(s) & ~case_mask) == canon;
    199199}
    200200
     
    213213                                 lc2UC<c7>::value>::value;
    214214  const uint64_t case_mask = lc ^ UC;
    215   const uint64_t canon = lc & case_mask;
    216   return (s7int64(s) & case_mask) == canon;
     215  const uint64_t canon = lc & ~case_mask;
     216  return (s7int64(s) & ~case_mask) == canon;
    217217}
    218218
     
    231231                                 lc2UC<c7>::value, lc2UC<c8>::value>::value;
    232232  const uint64_t case_mask = lc ^ UC;
    233   const uint64_t canon = lc & case_mask;
    234   return (s8int64(s) & case_mask) == canon;
     233  const uint64_t canon = lc & ~case_mask;
     234  return (s8int64(s) & ~case_mask) == canon;
    235235}
    236236
Note: See TracChangeset for help on using the changeset viewer.