Ignore:
Timestamp:
Dec 16, 2010, 7:13:23 AM (9 years ago)
Author:
cameron
Message:

Move bitmap base NameStart/NameChar? tests into namechars.h

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/parabix2/pablo_template.c

    r815 r827  
    6565        return 0;
    6666}
    67 
    68 
    69 
    70 inline bool bit_test(unsigned char * bit_Map, int codepoint) {
    71         return (bit_Map[codepoint/8] >> (7 - codepoint % 8)) & 1;
    72 }
    73 
    74 bool is_XML10_NameStrt_codepoint(int codepoint) {
    75         switch (codepoint >> 12) {
    76                 case 0: return bit_test(NameStrt_XML10_0000_11FF, codepoint);
    77                 case 1: if (codepoint <= 0x11FF)
    78                                 return bit_test(NameStrt_XML10_0000_11FF, codepoint);
    79                         else if (codepoint < 0x1E00) return false;
    80                         else return bit_test(NameStrt_XML10_1E00_1FFF, codepoint & 0x1FF);
    81                 case 2: if (codepoint > 0x2182) return false;
    82                         else return bit_test(NameStrt_XML10_2000_21FF, codepoint & 0x1FF);
    83                 case 3: if (codepoint > 0x312C) return false;
    84                         else return bit_test(NameStrt_XML10_3000_31FF, codepoint & 0x1FF);
    85                 case 4: return codepoint >= 0x4E00;
    86                 case 5: case 6: case 7: case 8: return true;
    87                 case 9: return codepoint <= 0x9FA5;
    88                 case 0xA: return codepoint >= 0xAC00;
    89                 case 0xB: case 0xC: return true;
    90                 case 0xD: return codepoint <= 0xD7A3;
    91                 default: return false;
    92         }
    93 }
    94 
    95 bool is_XML10_NameChar_codepoint(int codepoint) {
    96         switch (codepoint >> 12) {
    97                 case 0: return bit_test(NameChar_XML10_0000_11FF, codepoint);
    98                 case 1: if (codepoint <= 0x11FF)
    99                                 return bit_test(NameChar_XML10_0000_11FF, codepoint);
    100                         else if (codepoint < 0x1E00) return false;
    101                         else return bit_test(NameStrt_XML10_1E00_1FFF, codepoint & 0x1FF);
    102                 case 2: if (codepoint > 0x2182) return false;
    103                         else return bit_test(NameChar_XML10_2000_21FF, codepoint & 0x1FF);
    104                 case 3: if (codepoint > 0x312C) return false;
    105                         else return bit_test(NameChar_XML10_3000_31FF, codepoint & 0x1FF);
    106                 case 4: return codepoint >= 0x4E00;
    107                 case 5: case 6: case 7: case 8: return true;
    108                 case 9: return codepoint <= 0x9FA5;
    109                 case 0xA:       return codepoint >= 0xAC00;
    110                 case 0xB: case 0xC: return true;
    111                 case 0xD: return codepoint <= 0xD7A3;
    112                 default: return false;
    113         }
    114 }
    115 
    116 inline int XML_10_UTF8_NameStrt_bytes (unsigned char bytes[]) {
    117         if (bytes[0] <= 0x7F) {
    118                 if (bit_test(NameStrt_XML10_0000_11FF, (int) bytes[0])) return 1;
    119                 else return 0;
    120         }
    121         else if (bytes[0] <= 0xDF) {
    122                 int codepoint = ((bytes[0] & 0x3F) << 6) | (bytes[1] & 0x3F);
    123                 if (bit_test(NameStrt_XML10_0000_11FF, codepoint)) return 2;
    124                 else return 0;
    125         }
    126         else if (bytes[0] <= 0xEF) {
    127                 int codepoint = ((bytes[0] & 0x0F) << 12)| ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
    128                 return is_XML10_NameStrt_codepoint(codepoint) ? 3 : 0;
    129         }
    130         else return 0;
    131 }
    132 
    133 inline int XML_10_UTF8_NameChar_bytes (unsigned char bytes[]) {
    134         if (bytes[0] <= 0x7F) {
    135                 if (bit_test(NameChar_XML10_0000_11FF, (int) bytes[0])) return 1;
    136                 else return 0;
    137         }
    138         else if (bytes[0] <= 0xDF) {
    139                 int codepoint = ((bytes[0] & 0x3F) << 6) | (bytes[1] & 0x3F);
    140                 if (bit_test(NameChar_XML10_0000_11FF, codepoint)) return 2;
    141                 else return 0;
    142         }
    143         else if (bytes[0] <= 0xEF) {
    144                 int codepoint = ((bytes[0] & 0x0F) << 12)| ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
    145                 return is_XML10_NameChar_codepoint(codepoint) ? 3 : 0;
    146         }
    147         else return 0;
    148 }
    14967
    15068
Note: See TracChangeset for help on using the changeset viewer.