Changeset 825 for trunk/src/symtab.c


Ignore:
Timestamp:
Dec 16, 2010, 7:05:41 AM (9 years ago)
Author:
cameron
Message:

Move bitmap base NameStart/NameChar? tests into namechars.h

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/symtab.c

    r267 r825  
    11#include "symtab.h"
    2 
    3 
    4 inline bool bit_test(unsigned char * bit_Map, int codepoint) {
    5         return (bit_Map[codepoint/8] >> (7 - codepoint % 8)) & 1;
    6 }
    7 
    8 
    9 bool is_XML10_NameStrt_codepoint(int codepoint) {
    10         switch (codepoint >> 12) {
    11                 case 0: return bit_test(NameStrt_XML10_0000_11FF, codepoint);
    12                 case 1: if (codepoint <= 0x11FF)
    13                                 return bit_test(NameStrt_XML10_0000_11FF, codepoint);
    14                         else if (codepoint < 0x1E00) return false;
    15                         else return bit_test(NameStrt_XML10_1E00_1FFF, codepoint & 0x1FF);
    16                 case 2: if (codepoint > 0x2182) return false;
    17                         else return bit_test(NameStrt_XML10_2000_21FF, codepoint & 0x1FF);
    18                 case 3: if (codepoint > 0x312C) return false;
    19                         else return bit_test(NameStrt_XML10_3000_31FF, codepoint & 0x1FF);
    20                 case 4: return codepoint >= 0x4E00;
    21                 case 5: case 6: case 7: case 8: return true;
    22                 case 9: return codepoint <= 0x9FA5;
    23                 case 0xA: return codepoint >= 0xAC00;
    24                 case 0xB: case 0xC: return true;
    25                 case 0xD: return codepoint <= 0xD7A3;
    26                 default: return false;
    27         }
    28 }
    29 
    30 bool is_XML10_NameChar_codepoint(int codepoint) {
    31         switch (codepoint >> 12) {
    32                 case 0: return bit_test(NameChar_XML10_0000_11FF, codepoint);
    33                 case 1: if (codepoint <= 0x11FF)
    34                                 return bit_test(NameChar_XML10_0000_11FF, codepoint);
    35                         else if (codepoint < 0x1E00) return false;
    36                         else return bit_test(NameStrt_XML10_1E00_1FFF, codepoint & 0x1FF);
    37                 case 2: if (codepoint > 0x2182) return false;
    38                         else return bit_test(NameChar_XML10_2000_21FF, codepoint & 0x1FF);
    39                 case 3: if (codepoint > 0x312C) return false;
    40                         else return bit_test(NameChar_XML10_3000_31FF, codepoint & 0x1FF);
    41                 case 4: return codepoint >= 0x4E00;
    42                 case 5: case 6: case 7: case 8: return true;
    43                 case 9: return codepoint <= 0x9FA5;
    44                 case 0xA:       return codepoint >= 0xAC00;
    45                 case 0xB: case 0xC: return true;
    46                 case 0xD: return codepoint <= 0xD7A3;
    47                 default: return false;
    48         }
    49 }
    50 
    51 bool is_XML11_NameStrt_codepoint(int codepoint) {
    52         if (likely(codepoint) <= 0x03FF) return bit_test(NameStrt_XML11_0000_03FF, codepoint);
    53         else switch (codepoint >> 12) {
    54                 case 0: case 1: return true;
    55                 case 2: if (codepoint >= 0x2070)
    56                                 if (codepoint <= 0x218F) return true;
    57                                 else return (codepoint >= 0x2C00) & (codepoint <= 0x2FEF);
    58                         else return (codepoint >= 0x200C) & (codepoint <= 0x200D);
    59                 case 3: return codepoint >= 0x3001;
    60                 case 4: case 5: case 6: case 7: case 8: case 9: case 0xA: case 0xB: case 0xC: return true;
    61                 case 0xD: return codepoint <= 0xD7FF;
    62                 case 0xE: return false;
    63                 case 0xF: if (codepoint <= 0xFDCF) return codepoint >= 0xF900;
    64                           else return (codepoint >= 0xFDF0) & (codepoint <= 0xFFFD);
    65                 default: return codepoint <= 0xEFFFF;
    66         }
    67 }
    68 
    69 bool is_XML11_NameChar_codepoint(int codepoint) {
    70         if (likely(codepoint) <= 0x03FF) return bit_test(NameChar_XML11_0000_03FF, codepoint);
    71         else switch (codepoint >> 12) {
    72                 case 0: case 1: return true;
    73                 case 2: if (codepoint >= 0x2070)
    74                                 if (codepoint <= 0x218F) return true;
    75                                 else return (codepoint >= 0x2C00) & (codepoint <= 0x2FEF);
    76                         else if (codepoint <= 0x200D) return codepoint >= 0x200C;
    77                         else return (codepoint == 0x203F) | (codepoint == 0x2040);
    78                 case 3: return codepoint >= 0x3001;
    79                 case 4: case 5: case 6: case 7: case 8: case 9: case 0xA: case 0xB: case 0xC: return true;
    80                 case 0xD: return codepoint <= 0xD7FF;
    81                 case 0xE: return false;
    82                 case 0xF: if (codepoint <= 0xFDCF) return codepoint >= 0xF900;
    83                           else return (codepoint >= 0xFDF0) & (codepoint <= 0xFFFD);
    84                 default: return codepoint <= 0xEFFFF;
    85         }
    86 }
    87 
    88 inline int XML_10_UTF8_NameStrt_bytes (unsigned char bytes[]) {
    89         if (bytes[0] <= 0x7F) {
    90                 if (bit_test(NameStrt_XML10_0000_11FF, (int) bytes[0])) return 1;
    91                 else return 0;
    92         }
    93         else if (bytes[0] <= 0xDF) {
    94                 int codepoint = ((bytes[0] & 0x3F) << 6) | (bytes[1] & 0x3F);
    95                 if (bit_test(NameStrt_XML10_0000_11FF, codepoint)) return 2;
    96                 else return 0;
    97         }
    98         else if (bytes[0] <= 0xEF) {
    99                 int codepoint = ((bytes[0] & 0x0F) << 12)| ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
    100                 return is_XML10_NameStrt_codepoint(codepoint) ? 3 : 0;
    101         }
    102         else return 0;
    103 }
    104 
    105 inline int XML_10_UTF8_NameChar_bytes (unsigned char bytes[]) {
    106         if (bytes[0] <= 0x7F) {
    107                 if (bit_test(NameChar_XML10_0000_11FF, (int) bytes[0])) return 1;
    108                 else return 0;
    109         }
    110         else if (bytes[0] <= 0xDF) {
    111                 int codepoint = ((bytes[0] & 0x3F) << 6) | (bytes[1] & 0x3F);
    112                 if (bit_test(NameChar_XML10_0000_11FF, codepoint)) return 2;
    113                 else return 0;
    114         }
    115         else if (bytes[0] <= 0xEF) {
    116                 int codepoint = ((bytes[0] & 0x0F) << 12)| ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
    117                 return is_XML10_NameChar_codepoint(codepoint) ? 3 : 0;
    118         }
    119         else return 0;
    120 }
    121 
    122 inline int XML_11_UTF8_NameStrt_bytes (unsigned char bytes[]) {
    123         if (bytes[0] <= 0x7F) {
    124                 if (bit_test(NameStrt_XML11_0000_03FF, (int) bytes[0])) return 1;
    125                 else return 0;
    126         }
    127         else if (bytes[0] <= 0xDF) {
    128                 int codepoint = ((bytes[0] & 0x3F) << 6) | (bytes[1] & 0x3F);
    129                 return is_XML11_NameStrt_codepoint(codepoint) ? 2 : 0;
    130         }
    131         else if (bytes[0] <= 0xEF) {
    132                 int codepoint = ((bytes[0] & 0x0F) << 12)| ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
    133                 return is_XML11_NameStrt_codepoint(codepoint) ? 3 : 0;
    134         }
    135         else {
    136                 int codepoint = ((bytes[0] & 0x0F) << 18)| ((bytes[1] & 0x3F) << 12) |
    137                                 ((bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F);
    138                 return is_XML11_NameStrt_codepoint(codepoint) ? 4 : 0;
    139         }
    140 }
    141 
    142 inline int XML_11_UTF8_NameChar_bytes (unsigned char bytes[]) {
    143         if (bytes[0] <= 0x7F) {
    144                 if (bit_test(NameChar_XML11_0000_03FF, (int) bytes[0])) return 1;
    145                 else return 0;
    146         }
    147         else if (bytes[0] <= 0xDF) {
    148                 int codepoint = ((bytes[0] & 0x3F) << 6) | (bytes[1] & 0x3F);
    149                 return is_XML11_NameChar_codepoint(codepoint) ? 2 : 0;
    150         }
    151         else if (bytes[0] <= 0xEF) {
    152                 int codepoint = ((bytes[0] & 0x0F) << 12)| ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
    153                 return is_XML11_NameChar_codepoint(codepoint) ? 3 : 0;
    154         }
    155         else {
    156                 int codepoint = ((bytes[0] & 0x0F) << 18)| ((bytes[1] & 0x3F) << 12) |
    157                                 ((bytes[2] & 0x3F) << 6) | (bytes[3] & 0x3F);
    158                 return is_XML11_NameChar_codepoint(codepoint) ? 4 : 0;
    159         }
    160 }
    1612
    1623bool is_XML10_UTF8_Name(char protoname[], int lgth) {
Note: See TracChangeset for help on using the changeset viewer.