Changeset 1986


Ignore:
Timestamp:
Mar 28, 2012, 7:29:32 PM (7 years ago)
Author:
ksherdy
Message:

Updated hash table.

Location:
trunk/symbol_table
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/symbol_table/src/hash_table.hpp

    r1979 r1986  
    1010
    1111/*=============================================================================
    12   allocator.hpp - Coterminal memory pool allocators.
     12  hash_table.hpp - Hashing with chaining
    1313  Created on: 18-December-2011
    1414  Author: Ken Herdy
    1515=============================================================================*/
    1616
    17 //#define HASH_TABLE_HPP_DEBUG
     17#define HASH_TABLE_HPP_DEBUG
    1818
    1919#define MIN(a, b) ((a < b) ? a : b)
     
    5656uint64_t gid::value = 1;
    5757
    58 /*
    59 TODO - Move raw_data_pool up into a base class
    60 that so all hash table implementation share the same memory pool.
    61 */
    62 
    6358template<class COMPARE_STRATEGY, class HASH_STRATEGY, class ALLOCATOR>
    6459class hash_table {
     
    6661
    6762    hash_table(const uint32_t table_size=4096, const uint32_t load_factor=2) {
     63        #ifdef HASH_TABLE_HPP_DEBUG
     64            lookups = 0;
     65            elements = 0;
     66            collisions = 0;
     67            table_expansions = 0;
     68        #endif
    6869        init(table_size,load_factor);
    6970    }
     
    7475
    7576    IDISA_ALWAYS_INLINE uint64_t get_bucket(const uint8_t * h0, const uint8_t * h1, const uint32_t idx, const uint32_t slice_bits) {
     77        #ifdef HASH_TABLE_HPP_DEBUG
     78            lookups++;
     79        #endif
    7680        return hash_strategy.hash(h0,h1,idx,slice_bits,hash_size);
    7781    }
     
    8488            abort();
    8589        }
    86 
    8790
    8891        next->raw_bytes = raw_bytes;
     
    112115            prev = crt;
    113116            crt = crt->next;
     117            #ifdef HASH_TABLE_HPP_DEBUG
     118                collisions++;
     119            #endif
    114120        }
    115121
     
    119125        uint64_t x0 = bit_slice(h0, idx, hash_bit_lgth);
    120126        uint64_t x1 = bit_slice(h1, idx, hash_bit_lgth);
    121 
    122 #ifdef HASH_TABLE_HPP_DEBUG
    123         cout << "Insert: ";
    124         cout << "Idx: " << idx << ", ";
    125         cout << "GID: " << gid << ", ";
    126         cout << "Value: '" << string((char *) &raw_bytes[idx],0,raw_byte_lgth) << "', ";
    127         cout << "Bucket: "      << bucket << endl;
    128 #endif
    129127
    130128        insert( bucket,
     
    137135
    138136        if(expansion_test(table[bucket])) {
     137
     138            #ifdef HASH_TABLE_HPP_DEBUG
     139                this->table_expansions++;
     140            #endif
     141
    139142            expand();
    140143        }
    141         // elements++;
     144
     145        #ifdef HASH_TABLE_HPP_DEBUG
     146            elements++;
     147        #endif
     148
    142149        return gid;
    143150    }
     
    153160
    154161            while(crt != NULL) {
    155                 cout << "->"    << "(GID:" << crt->gid
    156                                 << ", Lgth:" << crt->raw_bytes_lgth
    157                                 << ", Value:'" << string((char *)&crt->raw_bytes[0], crt->raw_bytes_lgth) << "')";
     162                cout << "->";
     163                print_node(crt);
    158164                crt = crt->next;
    159165            }
     
    163169    }
    164170
     171    void print_node(node * crt) const {
     172        cout    << "(GID:"      << crt->gid
     173                << ",Lgth:"     << crt->raw_bytes_lgth
     174                << ",Value:'"   << string((char *)&crt->raw_bytes[0], crt->raw_bytes_lgth)
     175                << "')";
     176    }
     177
     178
     179    void print_diagnostics() const {
     180        #ifdef HASH_TABLE_HPP_DEBUG
     181            cout << "Table Size:"                       << table_size << endl;
     182            cout << "Total Elements:"                   << elements << endl;
     183            cout << "Total Lookups (with resizes):"     << lookups << endl;
     184            cout << "Collisions(with resizes):"         << collisions << endl;
     185            cout << "Table Expansions:"                 << table_expansions << endl;
     186            cout << "Resize Chain Length:"              << resize_chain_lgth << endl;
     187            cout << endl;
     188        #else
     189            cout << "#define  for diagnostics." << endl;
     190        #endif
     191    }
     192
    165193private:
    166194    // ----- Members -----
    167     uint32_t table_size;
    168     uint32_t hash_size;
    169     uint32_t load_factor;
     195    uint32_t table_size;            // power of 2
     196    uint32_t hash_size;             // maximum number of bits hashed on
     197    uint32_t resize_chain_lgth;     // maximum chain length
    170198
    171199    node ** table;
     
    175203    HASH_STRATEGY hash_strategy;
    176204
     205    // ----- Diagnostics -----
     206    #ifdef HASH_TABLE_HPP_DEBUG
     207        uint32_t lookups;
     208        uint32_t elements;
     209        uint32_t collisions;
     210        uint32_t table_expansions;
     211    #endif
     212
    177213    // ----- Helpers -----
    178214    uint32_t log2msb(uint32_t v) {
     
    192228                abort();
    193229        }
    194         // elements = 0;
    195         load_factor = factor;
     230        resize_chain_lgth = factor;
    196231    }
    197232
     
    218253        while(NULL != crt) {
    219254            lgth++;
    220             crt = crt->next; //?
     255            crt = crt->next;
    221256        }
    222257        return lgth;
     
    225260    bool expansion_test(const node * chain) {
    226261
    227         if ((chain_lgth(chain) > load_factor)
    228                 && (hash_size < hash_strategy.max_hashsize())
    229                 && table_size <= MAX_TABLE_SIZE) {
     262        if ((chain_lgth(chain) > resize_chain_lgth)
     263                &&  (hash_size < hash_strategy.max_hashsize())
     264                &&  (table_size < MAX_TABLE_SIZE))
     265        {
     266
    230267            return true;
    231268        }
     
    238275        uint32_t prev_size = this->table_size;
    239276
    240         init(table_size*2, load_factor); // TODO - WARNING - CONSIDER MAX MEMORY
     277        init(table_size*2, resize_chain_lgth);
    241278
    242279        uint64_t bucket;
     
    277314/* Length specialized strategy classes. Hash, Compare */
    278315
    279 /* Templated approach:
    280 (1) Avoids duplicate code,
    281 (2) Avoid vtable overhead,
    282 (3) Introduces additional coding complexity.
    283 */
     316/* Templated approach: (i) avoids duplicate code, (ii) avoids vtable overhead, (iii) introduces coding complexity.*/
    284317
    285318/* Hash Strategy */
     
    292325};
    293326
     327/* Hash functions specialized on symbol length. */
    294328template<uint32_t LGTH>
    295329class hash_strategy_t: public hash_strategy {
     
    299333};
    300334
    301 /* Specialized 'Byte' hash functions. */
    302335template<> class hash_strategy_t<1> {
    303336public:
     
    311344public:
    312345    static IDISA_ALWAYS_INLINE uint64_t hash(const uint8_t * h0, const uint8_t * h1, const int32_t idx, const uint32_t slice_bits, const uint32_t hash_bits) {
    313         return bit_compress_hash(h0, h1, bytes2bits(idx), slice_bits, hash_bits); // TODO byte_compress_hash
     346        return bit_compress_hash(h0, h1, bytes2bits(idx), slice_bits, hash_bits);
    314347    }
    315348    static IDISA_ALWAYS_INLINE uint32_t max_hashsize() { return bytes2bits(2); }
     
    371404    }
    372405    /* WARNING: Min bit count of any default (non-specialized) bit_compress_hash. */
    373     static IDISA_ALWAYS_INLINE uint32_t max_hashsize() { return 9; }
     406    static IDISA_ALWAYS_INLINE uint32_t max_hashsize() { return 17; }
    374407};
    375408
  • trunk/symbol_table/src/id_symbol_table.hpp

    r1984 r1986  
    1818static void print_symbol_debug(const uint8_t buffer [], const uint32_t spos, const uint32_t epos, const uint32_t lgth) {
    1919    cout << "Symbol(";
    20     cout << "Start:" << spos;
     20    cout << "Lgth:" << lgth;
     21    cout << ",Value:'" << string((char *)&(buffer[spos]), lgth) << "'";
     22    cout << ",Start:" << spos;
    2123    cout << ",End:" << epos;
    22     cout << ",Lgth:" << lgth;
    23     cout << ",Value:'" << string((char *)&(buffer[spos]), lgth);
    24     cout << "')" << endl;
     24    cout << ")" << endl;
    2525}
    2626#endif
     
    3737class id_symbol_table:public symbol_table {
    3838public:
    39     id_symbol_table()/*:hash_table_1(256)*/{}
     39    id_symbol_table():hash_table_1(256),hash_table_5(4096),hash_table_gte_17(65536){}
    4040    ~id_symbol_table() {
    4141#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     
    5757        hash_table_16.print_table();
    5858        hash_table_gte_17.print_table();
     59        hash_table_1.print_diagnostics();
     60        hash_table_2.print_diagnostics();
     61        hash_table_3.print_diagnostics();
     62        hash_table_4.print_diagnostics();
     63        hash_table_5.print_diagnostics();
     64        hash_table_6.print_diagnostics();
     65        hash_table_7.print_diagnostics();
     66        hash_table_8.print_diagnostics();
     67        hash_table_9.print_diagnostics();
     68        hash_table_10.print_diagnostics();
     69        hash_table_11.print_diagnostics();
     70        hash_table_12.print_diagnostics();
     71        hash_table_13.print_diagnostics();
     72        hash_table_14.print_diagnostics();
     73        hash_table_15.print_diagnostics();
     74        hash_table_16.print_diagnostics();
     75        hash_table_gte_17.print_diagnostics();
    5976#endif
    6077    }
     
    180197    if(!rscanner.is_done() && (spos < 0)) { // block boundary case
    181198
    182         assert(lgth <= (LOOKBACK_SIZE+BLOCK_SIZE)); // segment boundary
    183         if(lgth > (LOOKBACK_SIZE+BLOCK_SIZE)) {
     199        if(lgth > (LOOKBACK_SIZE)) {
    184200            cerr << "Fatal Error.";
    185             cerr << " Symbol length exceeds " << (LOOKBACK_SIZE+BLOCK_SIZE) << " bytes.";
     201            cerr << " Symbol length exceeds " << (LOOKBACK_SIZE) << " bytes.";
    186202            cerr << " Symbol tail : ";
    187203            cerr << string((char *)&(buffer[rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl;
     
    255271                cerr << lb_blk << endl;
    256272                cerr << "Fatal Error.";
    257                 cerr << " Symbol length exceeds " << (LOOKBACK_SIZE+BLOCK_SIZE) << " bytes." << endl;
     273                cerr << " Symbol length exceeds " << (LOOKBACK_SIZE) << " bytes." << endl;
    258274                cerr << " Symbol tail : ";
    259275                cerr << string((char *)&(buffer[starts_rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl;
  • trunk/symbol_table/symbol_table.pro

    r1983 r1986  
    7878    lib/bitblock_iterator.hpp \
    7979    lib/bitblock_scan.hpp \
    80     src/buffer.hpp
     80    src/buffer.hpp \
     81    lib/hash.hpp
Note: See TracChangeset for help on using the changeset viewer.