Changeset 160 for trunk/src/symtab.c


Ignore:
Timestamp:
Jun 20, 2008, 3:30:40 PM (11 years ago)
Author:
lindanl
Message:

Restructured character set architecture; StringPool? in symbol table.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/symtab.c

    r151 r160  
    11#include "symtab.h"
    22
    3 inline char * copy_name (char * s, int lgth){           
    4         char * d = new char[lgth+1];
    5         memcpy(d, s,lgth);
    6         d[lgth] = '\0';
    7         return d;
    8 }
     3const int INIT_STRINGPOOL_SIZE = 4096;
    94
    105inline bool bit_test(unsigned char * bit_Map, int codepoint) {
     
    121116        else if (bytes[0] <= 0xEF) {
    122117                int codepoint = ((bytes[0] & 0x0F) << 12)| ((bytes[1] & 0x3F) << 6) | (bytes[2] & 0x3F);
    123                 return is_XML10_NameStrt_codepoint(codepoint) ? 3 : 0;
     118                return is_XML10_NameChar_codepoint(codepoint) ? 3 : 0;
    124119        }
    125120        else return 0;
     
    172167                valid_bytes = XML_10_UTF8_NameChar_bytes((unsigned char *) &protoname[pos]);
    173168                pos += valid_bytes;
     169                 
    174170        }
    175171        /* Success requires that every byte sequence processed be valid
    176172           and that the total lgth processed be exactly that provided on
    177173           input. */
     174         
    178175        return (valid_bytes > 0) & (pos == lgth);
    179176}
     
    193190
    194191int Symbol_Table::Insert_Name(char * name, int lgth) {
    195         char * s = copy_name(name,lgth);
     192//      char * s = copy_name(name,lgth);
     193        char * s = pool->Insert(name,lgth);
    196194        UTF8NameMap[s]=++(globalNameCount);
    197195        Name_Data name_data;
     
    202200}
    203201
    204 int Symbol_Table::UTF8_Lookup_or_Insert_XML10_Name(char * name, int lgth) {
     202
     203inline bool Verify_ASCII(char * name_ptr, int name_lgth) {
     204        /* To verify that a name is ASCII, ensure that the high bit
     205           of each byte is 0.  A SIMD compare can verify this for
     206           up to sizeof(BytePack) bytes.  For less than 16 bytes,
     207           first shift out bytes beyond the name length.  For more
     208           than 16 bytes, form the logical "or" of the successive byte
     209           packs together so that a high 1 bit in any byte is preserved
     210           for the final SIMD test. */
     211        BytePack b = sisd_load_unaligned((BytePack *) name_ptr);
     212        if (name_lgth <= sizeof(BytePack)) {
     213                /* Clear bytes beyond the length of the name. */
     214                b = sisd_sfl(b, sisd_from_int(8 * (sizeof(BytePack) - name_lgth)));
     215        }
     216        else {
     217                int offset = name_lgth % sizeof(BytePack);
     218                for (int i = offset; i < name_lgth; i += sizeof(BytePack)) {
     219                        b = simd_or(sisd_load_unaligned((BytePack *) &name_ptr[i]),b);
     220                }
     221        }
     222        return !simd_any_sign_bit_8(b);
     223}
     224
     225
     226/* ASCII_LookupOrInsert determines the nameID for any ASCII name
     227from the global name table, inserting the name and allocating
     228a nameID if necessary.  If the name is non-ASCII, 0 is returned. */
     229
     230inline int Symbol_Table::ASCII_Lookup_or_Insert(char * name_ptr, int name_lgth) {
     231        if (Verify_ASCII(name_ptr, name_lgth)) {
     232                return UTF8_Lookup_or_Insert(name_ptr, name_lgth);
     233        }
     234        return 0;
     235}
     236
     237
     238
     239int Symbol_Table::UTF8_Lookup_or_Insert(char * name, int lgth) {
    205240        char delim = name[lgth];
    206241        name[lgth] = '\0';
    207242        int nameID = UTF8NameMap[name];
    208243        name[lgth] = delim;     
    209         if (nameID == 0){
     244       
     245        if(nameID == 0){
    210246        #if (not defined(OMISSION)) or (OMISSION != NAME_VALIDATION)
    211                 if (!is_XML10_UTF8_Name(name,lgth)) {
     247                if (!is_XML11_UTF8_Name(name,lgth)) {
    212248                        ShowSyntaxError(NT_Name);
    213249                        exit(-1);
    214250                }
    215251        #endif
    216                 char * s = copy_name(name,lgth);
     252//              char * s = copy_name(name,lgth);
     253                char * s = pool->Insert(name,lgth);
    217254                UTF8NameMap[s]=++(globalNameCount);
    218255                nameID = globalNameCount;
     
    226263}
    227264
    228 int Symbol_Table::UTF8_Lookup_or_Insert_XML11_Name(char * name, int lgth) {
    229         char * s = copy_name(name,lgth);
    230         int nameID = UTF8NameMap[s];
     265//char * Symbol_Table::Get_UTF8_name(int nameID) {
     266//      return  UTF8NameTable[nameID];
     267//}
     268
     269char * Symbol_Table::Get_UTF8_name(int nameID) {
     270        return  UTF8NameTable[nameID].name_string;
     271}
     272
     273int Symbol_Table::Get_UTF8_lgth(int nameID) {
     274        return  UTF8NameTable[nameID].lgth;
     275}
     276
     277char * Symbol_Table::ReserveSymbolSpace(int u8_lgth) {
     278        reserved = new char[u8_lgth+1];
     279        reserved_lgth = u8_lgth;
     280        return reserved;
     281}
     282
     283int Symbol_Table::LookupOrInsertReserved(){             
     284        int nameID = UTF8NameMap[reserved];
    231285        if(nameID == 0){
    232286        #if (not defined(OMISSION)) or (OMISSION != NAME_VALIDATION)
    233                 if (!is_XML11_UTF8_Name(name,lgth))  {
     287                if (!is_XML10_UTF8_Name(reserved,reserved_lgth))  {
    234288                        ShowSyntaxError(NT_Name);
    235289                        exit(-1);
    236290                }
    237291        #endif
    238                 UTF8NameMap[s]=++(globalNameCount);
     292                UTF8NameMap[reserved]=++(globalNameCount);
    239293                nameID = globalNameCount;
    240294                Name_Data name_data;
    241                 name_data.name_string = s;
    242                 name_data.lgth = lgth;
     295                name_data.name_string = reserved;
     296                name_data.lgth = reserved_lgth;
    243297                UTF8NameTable.push_back(name_data);
    244298//              UTF8NameTable.push_back(s);
    245299        }
     300        else {
     301                delete [] reserved;
     302        }
    246303        return nameID;
    247 }
    248 
    249 //char * Symbol_Table::Get_UTF8_name(int nameID) {
    250 //      return  UTF8NameTable[nameID];
    251 //}
    252 
    253 char * Symbol_Table::Get_UTF8_name(int nameID) {
    254         return  UTF8NameTable[nameID].name_string;
    255 }
    256 
    257 int Symbol_Table::Get_UTF8_lgth(int nameID) {
    258         return  UTF8NameTable[nameID].lgth;
    259304}
    260305
     
    274319//              UTF8NameTable.push_back(predefined[i]);
    275320        }
    276 }
    277 
    278 
     321        pool = new StringPool;
     322}
     323
     324
     325StringPool::StringPool() {
     326       buffer_capacity = INIT_STRINGPOOL_SIZE;
     327       buffer_space_used = 0;
     328       pool_buffers.push_back(new char [buffer_capacity]);
     329}
     330
     331StringPool::~StringPool() {
     332       vector<char * >::iterator i;
     333       for (i = pool_buffers.begin(); i != pool_buffers.end(); i++) {
     334               delete [] *i;
     335       }
     336}
     337
     338char * StringPool::Insert(char * s, int lgth) {
     339       while (lgth + buffer_space_used >= buffer_capacity) {
     340               buffer_capacity *= 2;
     341               pool_buffers.push_back(new char [buffer_capacity]);
     342               buffer_space_used = 0;
     343       }
     344       char * insertion_ptr = &pool_buffers.back()[buffer_space_used];
     345       memcpy(insertion_ptr, s, lgth);
     346       insertion_ptr[lgth] = '\0';
     347       buffer_space_used += lgth + 1;
     348       return insertion_ptr;
     349}
     350
     351
     352
Note: See TracChangeset for help on using the changeset viewer.