Changeset 26


Ignore:
Timestamp:
Jan 25, 2008, 4:29:05 AM (11 years ago)
Author:
cameron
Message:

Caseless string functions; encoding names

Location:
trunk/src
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/bytelex.h

    r23 r26  
    150150template<CodeUnit_Base C>
    151151inline bool at_XxMmLll_WS(unsigned char x8data[]) {
    152   uint32_t case_mask = ~(CaseFoldBit<C>::value * 0x01010101);
    153   return ((s3int32(x8data) & case_mask) == (c3int32<C, 'x', 'm', 'l'>::value & case_mask)) &&
    154          at_WhiteSpace<C>(&x8data[3]);
     152  return caseless_comp<C, 'x', 'm', 'l'>(x8data) &&
     153         at_WhiteSpace<XML_1_0, C>(&x8data[3]);
    155154}
    156155
     
    170169         ((x8data[1] & 0xE1) != 0x21);
    171170}
     171
     172template<CodeUnit_Base C>
     173inline bool at_UTF_8(unsigned char x8data[]) {
     174  return caseless_comp<C, 'u', 't', 'f', '-', '8'>(x8data);
     175}
     176
     177template<CodeUnit_Base C>
     178inline bool at_UCS_2(unsigned char x8data[]) {
     179  return caseless_comp<C, 'u', 'c', 's', '-', '2'>(x8data);
     180}
     181
     182template<CodeUnit_Base C>
     183inline bool at_UCS_4(unsigned char x8data[]) {
     184  return caseless_comp<C, 'u', 'c', 's', '-', '4'>(x8data);
     185}
     186
     187template<CodeUnit_Base C>
     188inline bool at_UCS_2LE(unsigned char x8data[]) {
     189  return caseless_comp<C, 'u', 'c', 's', '-', '2', 'l', 'e'>(x8data);
     190}
     191
     192template<CodeUnit_Base C>
     193inline bool at_UCS_2BE(unsigned char x8data[]) {
     194  return caseless_comp<C, 'u', 'c', 's', '-', '2', 'b', 'e'>(x8data);
     195}
     196
     197template<CodeUnit_Base C>
     198inline bool at_UCS_4LE(unsigned char x8data[]) {
     199  return caseless_comp<C, 'u', 'c', 's', '-', '4', 'l', 'e'>(x8data);
     200}
     201
     202template<CodeUnit_Base C>
     203inline bool at_UCS_4BE(unsigned char x8data[]) {
     204  return caseless_comp<C, 'u', 'c', 's', '-', '4', 'b', 'e'>(x8data);
     205}
     206
     207template<CodeUnit_Base C>
     208inline bool at_UTF_16(unsigned char x8data[]) {
     209  return caseless_comp<C, 'u', 't', 'f', '-', '1', '6'>(x8data);
     210}
     211
     212template<CodeUnit_Base C>
     213inline bool at_UTF_32(unsigned char x8data[]) {
     214  return caseless_comp<C, 'u', 't', 'f', '-', '3', '2'>(x8data);
     215}
     216
     217template<CodeUnit_Base C>
     218inline bool at_UTF_16LE(unsigned char x8data[]) {
     219  return caseless_comp<C, 'u', 't', 'f', '-', '1', '6', 'l', 'e'>(x8data);
     220}
     221
     222template<CodeUnit_Base C>
     223inline bool at_UTF_32LE(unsigned char x8data[]) {
     224  return caseless_comp<C, 'u', 't', 'f', '-', '3', '2', 'l', 'e'>(x8data);
     225}
     226
     227template<CodeUnit_Base C>
     228inline bool at_UTF_16BE(unsigned char x8data[]) {
     229  return caseless_comp<C, 'u', 't', 'f', '-', '1', '6', 'b', 'e'>(x8data);
     230}
     231
     232template<CodeUnit_Base C>
     233inline bool at_UTF_32BE(unsigned char x8data[]) {
     234  return caseless_comp<C, 'u', 't', 'f', '-', '3', '2', 'b', 'e'>(x8data);
     235}
     236
     237
     238
    172239#endif
  • trunk/src/charsets/ASCII_EBCDIC.h

    r22 r26  
    209209template<> struct Ord<EBCDIC,'}'> {static uint8_t const value = 0xd0;};
    210210
    211 // CaseFoldBit is the bit that differs between upper and lower
    212 // case versions of the same letter.  Both ASCII and EBCDIC have
    213 // the property that a single case fold bit exists and is the same
    214 // for all letters. 
    215 template <CodeUnit_Base C>
    216 struct CaseFoldBit
    217   {static uint8_t const value = Ord<C, 'a'>::value ^ Ord<C, 'A'>::value;};
     211template <unsigned char _> struct UC2lc {static unsigned char const value = _;};
     212template <> struct UC2lc<'A'> {static unsigned char const value = 'a';};
     213template <> struct UC2lc<'B'> {static unsigned char const value = 'b';};
     214template <> struct UC2lc<'C'> {static unsigned char const value = 'c';};
     215template <> struct UC2lc<'D'> {static unsigned char const value = 'd';};
     216template <> struct UC2lc<'E'> {static unsigned char const value = 'e';};
     217template <> struct UC2lc<'F'> {static unsigned char const value = 'f';};
     218template <> struct UC2lc<'G'> {static unsigned char const value = 'g';};
     219template <> struct UC2lc<'H'> {static unsigned char const value = 'h';};
     220template <> struct UC2lc<'I'> {static unsigned char const value = 'i';};
     221template <> struct UC2lc<'J'> {static unsigned char const value = 'j';};
     222template <> struct UC2lc<'K'> {static unsigned char const value = 'k';};
     223template <> struct UC2lc<'L'> {static unsigned char const value = 'l';};
     224template <> struct UC2lc<'M'> {static unsigned char const value = 'm';};
     225template <> struct UC2lc<'N'> {static unsigned char const value = 'n';};
     226template <> struct UC2lc<'O'> {static unsigned char const value = 'o';};
     227template <> struct UC2lc<'P'> {static unsigned char const value = 'p';};
     228template <> struct UC2lc<'Q'> {static unsigned char const value = 'q';};
     229template <> struct UC2lc<'R'> {static unsigned char const value = 'r';};
     230template <> struct UC2lc<'S'> {static unsigned char const value = 's';};
     231template <> struct UC2lc<'T'> {static unsigned char const value = 't';};
     232template <> struct UC2lc<'U'> {static unsigned char const value = 'u';};
     233template <> struct UC2lc<'V'> {static unsigned char const value = 'v';};
     234template <> struct UC2lc<'W'> {static unsigned char const value = 'w';};
     235template <> struct UC2lc<'X'> {static unsigned char const value = 'x';};
     236template <> struct UC2lc<'Y'> {static unsigned char const value = 'y';};
     237template <> struct UC2lc<'Z'> {static unsigned char const value = 'z';};
     238
     239template <unsigned char _> struct lc2UC {static unsigned char const value = _;};
     240template <> struct lc2UC<'a'> {static unsigned char const value = 'A';};
     241template <> struct lc2UC<'b'> {static unsigned char const value = 'B';};
     242template <> struct lc2UC<'c'> {static unsigned char const value = 'C';};
     243template <> struct lc2UC<'d'> {static unsigned char const value = 'D';};
     244template <> struct lc2UC<'e'> {static unsigned char const value = 'E';};
     245template <> struct lc2UC<'f'> {static unsigned char const value = 'F';};
     246template <> struct lc2UC<'g'> {static unsigned char const value = 'G';};
     247template <> struct lc2UC<'h'> {static unsigned char const value = 'H';};
     248template <> struct lc2UC<'i'> {static unsigned char const value = 'I';};
     249template <> struct lc2UC<'j'> {static unsigned char const value = 'J';};
     250template <> struct lc2UC<'k'> {static unsigned char const value = 'K';};
     251template <> struct lc2UC<'l'> {static unsigned char const value = 'L';};
     252template <> struct lc2UC<'m'> {static unsigned char const value = 'M';};
     253template <> struct lc2UC<'n'> {static unsigned char const value = 'N';};
     254template <> struct lc2UC<'o'> {static unsigned char const value = 'O';};
     255template <> struct lc2UC<'p'> {static unsigned char const value = 'P';};
     256template <> struct lc2UC<'q'> {static unsigned char const value = 'Q';};
     257template <> struct lc2UC<'r'> {static unsigned char const value = 'R';};
     258template <> struct lc2UC<'s'> {static unsigned char const value = 'S';};
     259template <> struct lc2UC<'t'> {static unsigned char const value = 'T';};
     260template <> struct lc2UC<'u'> {static unsigned char const value = 'U';};
     261template <> struct lc2UC<'v'> {static unsigned char const value = 'V';};
     262template <> struct lc2UC<'w'> {static unsigned char const value = 'W';};
     263template <> struct lc2UC<'x'> {static unsigned char const value = 'X';};
     264template <> struct lc2UC<'y'> {static unsigned char const value = 'Y';};
     265template <> struct lc2UC<'z'> {static unsigned char const value = 'Z';};
    218266
    219267#endif
  • trunk/src/multiliteral.h

    r25 r26  
    4444*/
    4545
     46template <unsigned char byte1, unsigned char byte2>
     47struct b2int16 {
     48  static uint16_t const value =
     49    (((uint16_t) byte1) << LOW_BYTE_SHIFT) +
     50    (((uint16_t) byte2) << HIGH_BYTE_SHIFT);
     51};
     52
    4653template <CodeUnit_Base C, unsigned char c1, unsigned char c2>
    4754struct c2int16 {
    48   static uint16_t const value =
    49     (((uint16_t) Ord<C,c1>::value) << LOW_BYTE_SHIFT) +
    50     (((uint16_t) Ord<C,c2>::value) << HIGH_BYTE_SHIFT);
     55  static uint16_t const value = b2int16<Ord<C,c1>::value, Ord<C,c2>::value>::value;
    5156};
    5257
     
    134139}
    135140
     141template <CodeUnit_Base C, unsigned char c1, unsigned char c2>
     142static inline bool caseless_comp(unsigned char s[]) {
     143  const uint16_t lc = c2int16<C, UC2lc<c1>::value, UC2lc<c2>::value>::value;
     144  const uint16_t UC = c2int16<C, lc2UC<c1>::value, lc2UC<c2>::value>::value;
     145  const uint16_t case_mask = lc ^ UC;
     146  const uint16_t canon = lc & case_mask;
     147  return (s2int16(s) & case_mask) == canon;
     148}
     149
     150template <CodeUnit_Base C, unsigned char c1, unsigned char c2, unsigned char c3>
     151static inline bool caseless_comp(unsigned char s[]) {
     152  const uint32_t lc = c3int32<C, UC2lc<c1>::value, UC2lc<c2>::value, UC2lc<c3>::value>::value;
     153  const uint32_t UC = c3int32<C, lc2UC<c1>::value, lc2UC<c2>::value, lc2UC<c3>::value>::value;
     154  const uint32_t case_mask = lc ^ UC;
     155  const uint32_t canon = lc & case_mask;
     156  return (s3int32(s) & case_mask) == canon;
     157}
     158
     159template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
     160                           unsigned char c3, unsigned char c4>
     161static inline bool caseless_comp(unsigned char s[]) {
     162  const uint32_t lc = c4int32<C, UC2lc<c1>::value, UC2lc<c2>::value,
     163                                 UC2lc<c3>::value, UC2lc<c4>::value>::value;
     164  const uint32_t UC = c4int32<C, lc2UC<c1>::value, lc2UC<c2>::value,
     165                                 lc2UC<c3>::value, lc2UC<c4>::value>::value;
     166  const uint32_t case_mask = lc ^ UC;
     167  const uint32_t canon = lc & case_mask;
     168  return (s4int32(s) & case_mask) == canon;
     169}
     170
     171template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
     172                           unsigned char c3, unsigned char c4,
     173                           unsigned char c5>
     174static inline bool caseless_comp(unsigned char s[]) {
     175  const uint64_t lc = c5int64<C, UC2lc<c1>::value, UC2lc<c2>::value,
     176                                 UC2lc<c3>::value, UC2lc<c4>::value,
     177                                 UC2lc<c5>::value>::value;
     178  const uint64_t UC = c5int64<C, lc2UC<c1>::value, lc2UC<c2>::value,
     179                                 lc2UC<c3>::value, lc2UC<c4>::value,
     180                                 lc2UC<c5>::value>::value;
     181  const uint64_t case_mask = lc ^ UC;
     182  const uint64_t canon = lc & case_mask;
     183  return (s5int64(s) & case_mask) == canon;
     184}
     185
     186template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
     187                           unsigned char c3, unsigned char c4,
     188                           unsigned char c5, unsigned char c6>
     189static inline bool caseless_comp(unsigned char s[]) {
     190  const uint64_t lc = c6int64<C, UC2lc<c1>::value, UC2lc<c2>::value,
     191                                 UC2lc<c3>::value, UC2lc<c4>::value,
     192                                 UC2lc<c5>::value, UC2lc<c6>::value>::value;
     193  const uint64_t UC = c6int64<C, lc2UC<c1>::value, lc2UC<c2>::value,
     194                                 lc2UC<c3>::value, lc2UC<c4>::value,
     195                                 lc2UC<c5>::value, lc2UC<c6>::value>::value;
     196  const uint64_t case_mask = lc ^ UC;
     197  const uint64_t canon = lc & case_mask;
     198  return (s6int64(s) & case_mask) == canon;
     199}
     200
     201template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
     202                           unsigned char c3, unsigned char c4,
     203                           unsigned char c5, unsigned char c6,
     204                           unsigned char c7>
     205static inline bool caseless_comp(unsigned char s[]) {
     206  const uint64_t lc = c7int64<C, UC2lc<c1>::value, UC2lc<c2>::value,
     207                                 UC2lc<c3>::value, UC2lc<c4>::value,
     208                                 UC2lc<c5>::value, UC2lc<c6>::value,
     209                                 UC2lc<c7>::value>::value;
     210  const uint64_t UC = c7int64<C, lc2UC<c1>::value, lc2UC<c2>::value,
     211                                 lc2UC<c3>::value, lc2UC<c4>::value,
     212                                 lc2UC<c5>::value, lc2UC<c6>::value,
     213                                 lc2UC<c7>::value>::value;
     214  const uint64_t case_mask = lc ^ UC;
     215  const uint64_t canon = lc & case_mask;
     216  return (s7int64(s) & case_mask) == canon;
     217}
     218
     219template <CodeUnit_Base C, unsigned char c1, unsigned char c2,
     220                           unsigned char c3, unsigned char c4,
     221                           unsigned char c5, unsigned char c6,
     222                           unsigned char c7, unsigned char c8>
     223static inline bool caseless_comp(unsigned char s[]) {
     224  const uint64_t lc = c8int64<C, UC2lc<c1>::value, UC2lc<c2>::value,
     225                                 UC2lc<c3>::value, UC2lc<c4>::value,
     226                                 UC2lc<c5>::value, UC2lc<c6>::value,
     227                                 UC2lc<c7>::value, UC2lc<c8>::value>::value;
     228  const uint64_t UC = c8int64<C, lc2UC<c1>::value, lc2UC<c2>::value,
     229                                 lc2UC<c3>::value, lc2UC<c4>::value,
     230                                 lc2UC<c5>::value, lc2UC<c6>::value,
     231                                 lc2UC<c7>::value, lc2UC<c8>::value>::value;
     232  const uint64_t case_mask = lc ^ UC;
     233  const uint64_t canon = lc & case_mask;
     234  return (s8int64(s) & case_mask) == canon;
     235}
     236
     237
     238
    136239#endif
Note: See TracChangeset for help on using the changeset viewer.