Changeset 1979 for trunk/symbol_table


Ignore:
Timestamp:
Mar 27, 2012, 8:19:26 PM (7 years ago)
Author:
ksherdy
Message:

Added arbitrary length symbol resolution support.

Location:
trunk/symbol_table/src
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/symbol_table/src/Makefile

    r1968 r1979  
    1313endif
    1414
    15 all: basis_bits.hpp byte_pool.hpp  hash_strms.hpp  hash_table.hpp  id_group_strms.hpp  id_symbol_table.hpp  main.cpp  Makefile  marker_strms.hpp  symbol_table.hpp  transpose.hpp
     15all: basis_bits.hpp buffer.hpp byte_pool.hpp  hash_strms.hpp  hash_table.hpp  id_group_strms.hpp  id_symbol_table.hpp  main.cpp  Makefile  marker_strms.hpp  symbol_table.hpp  transpose.hpp
    1616        $(CC) -o main main.cpp $(AFLAGS) -DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG #-DHASH_TABLE_HPP_DEBUG -DBUFFER_PROFILING #
    1717
  • trunk/symbol_table/src/hash_table.hpp

    r1967 r1979  
    124124        cout << "Idx: " << idx << ", ";
    125125        cout << "GID: " << gid << ", ";
    126         cout << "Val: '" << string((char *) &raw_bytes[idx],0,raw_byte_lgth) << "', ";
    127         cout << "Bkt: " << bucket << endl;
     126        cout << "Value: '" << string((char *) &raw_bytes[idx],0,raw_byte_lgth) << "', ";
     127        cout << "Bucket: "      << bucket << endl;
    128128#endif
    129129
     
    154154            while(crt != NULL) {
    155155                cout << "->"    << "(GID:" << crt->gid
    156                                 << ", Val:'" << string((char *)&crt->raw_bytes[0], crt->raw_bytes_lgth)
    157                                 << "', Len:" << crt->raw_bytes_lgth << ")";
     156                                << ", Lgth:" << crt->raw_bytes_lgth
     157                                << ", Value:'" << string((char *)&crt->raw_bytes[0], crt->raw_bytes_lgth) << "')";
    158158                crt = crt->next;
    159159            }
  • trunk/symbol_table/src/id_symbol_table.hpp

    r1968 r1979  
    1515
    1616//#define ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     17#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     18static void print_symbol_debug(const uint8_t buffer [], const uint32_t spos, const uint32_t epos, const uint32_t lgth) {
     19    cout << "Symbol(";
     20    cout << "Start: " << spos;
     21    cout << ",End: " << epos;
     22    cout << ",Lgth: " << lgth;
     23    cout << ",Value:'" << string((char *)&(buffer[spos]), lgth);
     24    cout << "')" << endl;
     25}
     26#endif
    1727
    1828#include "symbol_table.hpp"
    1929#include "hash_table.hpp"
     30#include "buffer.hpp"
    2031#include "../lib/carryQ.hpp"
    2132#include "../lib/bitblock_iterator.hpp"
    2233#include "../lib/bitblock_scan.hpp"
    23 
    24 /* NOTE: C++ template code and Pablo generated length groups must coincide. */
    25 
    26 template<class HASH_TABLE>
    27 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth,
    28                                   uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size) {
    29 
    30     int32_t spos;
    31     int32_t epos;
    32     ForwardScanner<BitBlock, scanword_t> scanner(&ends);
    33     scanner.scan_to_next();
    34     epos = scanner.get_pos();
    35     spos = (epos - lgth);
    36 
    37     if((epos != -1) && (spos < 0)) { // block boundary case
    38 
    39         assert(lgth<=BLOCK_SIZE); // segment boundary
    40 
    41         uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE;
    42         int32_t lb_spos = BLOCK_SIZE - ((-1*spos) & (BLOCK_SIZE-1));
    43 
    44         uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
    45         uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
    46 
    47         h_table.lookup_or_insert(lb_buffer, lb_spos, lgth,
    48                                  lb_h0, lb_h1, h_lgth);
    49 
    50 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
    51         cout << "Symbol(";
    52         cout << "End:" << epos;
    53         cout << ",Start:" << lb_spos;
    54         cout << ",Text:'" << string((char *)&(lb_buffer[lb_spos]), lgth);
    55         cout << "')" << endl;
    56 #endif
    57         epos = scanner.scan_to_next();
    58         spos = (epos - lgth);
    59 
    60     }
    61 
    62     while(epos != -1) {
    63 
    64         h_table.lookup_or_insert(buffer, spos, lgth,
    65                                  h0, h1, h_lgth);
    66 
    67 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
    68         cout << "Symbol(";
    69         cout << "End:" << epos;
    70         cout << ",Start:" << spos;
    71         cout << ",Text:'" << string((char *)&(buffer[spos]), lgth);
    72         cout << "')" << endl;
    73 #endif
    74         epos = scanner.scan_to_next();
    75         spos = (epos - lgth);
    76 
    77     }
    78 }
     34#include <cstdlib>
    7935
    8036template<class ALLOCATOR>
    81 class id_symbol_table : public symbol_table {
     37class id_symbol_table:public symbol_table {
    8238public:
    8339    id_symbol_table()/*:hash_table_1(256)*/{}
     
    10561
    10662    // Groups & groups
    107     void resolve(uint8_t buffer [], Groups groups [], BitBlock h0 [], BitBlock h1 [], uint32_t blocks, AoS_symbol symbol_ary [], const uint32_t symbols) {
     63    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [],
     64                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, AoS_symbol aos [], const uint32_t symbols) {
    10865
    10966        for(uint32_t blk=0;blk<blocks;blk++) {
     
    163120        }
    164121        if(bitblock::any(groups[blk].ends_gte_17)) {
    165         do_block<hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, groups[blk].ends_gte_17, &buffer[blk*BLOCK_SIZE], 17, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 17, BLOCK_SIZE/8);
    166         }
    167 
    168 
    169 
    170 //      do_segment<hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hgte_17, starts, groups.ends_gte_17, segment_size);
    171 
    172         }
    173     }
    174 
    175     // void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, BitBlock * hash_values, SoA_symbol & symbols) {}
     122        do_block<hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, &starts[blk], &ends_gte_17[blk], &buffer[blk*BLOCK_SIZE], (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], BLOCK_SIZE/8);
     123        }
     124
     125        }
     126    }
     127
    176128private:
    177 
     129    ///////////////////////////////////////////////////////////////////////////////
     130    // Byte Space Hash
     131    ///////////////////////////////////////////////////////////////////////////////
    178132    hash_table<identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> hash_table_1;
    179133    hash_table<identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> hash_table_2;
     
    183137    hash_table<identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> hash_table_6;
    184138    hash_table<identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> hash_table_7;
    185 
     139    ///////////////////////////////////////////////////////////////////////////////
     140    // Bit Space Hash
     141    ///////////////////////////////////////////////////////////////////////////////
    186142    hash_table<identity_strategy_t<uint64_t,8>, hash_strategy_t<8>, ALLOCATOR> hash_table_8;
    187143    hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> hash_table_9;
     
    193149    hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> hash_table_15;
    194150    hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> hash_table_16;
    195 
    196151    hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> hash_table_gte_17;
    197152
    198153};
    199154
     155/* NOTE: C++ template code and Pablo generated length groups must coincide. */
     156
     157// Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB (high to low memory address)
     158template<class HASH_TABLE>
     159IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth,
     160                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size) {
     161
     162    int32_t spos;
     163    ReverseScanner<BitBlock, scanword_t> rscanner(&ends);
     164
     165    rscanner.scan_to_next();
     166    spos = (rscanner.get_pos() - lgth);
     167
     168    while(!rscanner.is_done() && (spos >= 0)) {
     169
     170        h_table.lookup_or_insert(buffer, spos, lgth,
     171                                 h0, h1, h_lgth);
     172
     173#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     174        print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth);
     175#endif
     176        rscanner.scan_to_next();
     177        spos = (rscanner.get_pos() - lgth);
     178    }
     179
     180    if(!rscanner.is_done() && (spos < 0)) { // block boundary case
     181
     182        assert(lgth <= (LOOKBACK_SIZE+BLOCK_SIZE)); // segment boundary
     183        if(lgth > (LOOKBACK_SIZE+BLOCK_SIZE)) {
     184            cerr << "Fatal Error.";
     185            cerr << " Symbol length exceeds " << (LOOKBACK_SIZE+BLOCK_SIZE) << " bytes.";
     186            cerr << " Symbol tail : ";
     187            cerr << string((char *)&(buffer[rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl;
     188            abort();
     189        }
     190
     191        uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE;
     192        int32_t lb_spos = BLOCK_SIZE - ((-1*spos) & (BLOCK_SIZE-1));
     193
     194        uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
     195        uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
     196
     197        h_table.lookup_or_insert(lb_buffer, lb_spos, lgth,
     198                                 lb_h0, lb_h1, h_lgth);
     199
     200#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     201        print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth);
     202#endif
     203
     204    }
     205}
     206
     207// Variable Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB (high to low memory address)
     208template<class HASH_TABLE>
     209IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [],
     210                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size) {
     211
     212    uint32_t lgth;
     213    ReverseScanner<BitBlock, scanword_t> ends_rscanner(ends);
     214    ReverseScanner<BitBlock, scanword_t> starts_rscanner(starts);
     215
     216    ends_rscanner.scan_to_next();
     217    starts_rscanner.scan_to_next();
     218
     219    while(!starts_rscanner.is_done() && (starts_rscanner.get_pos() > ends_rscanner.get_pos())) { // synchronize
     220        starts_rscanner.scan_to_next();
     221    }
     222
     223    while((!ends_rscanner.is_done()) && (!starts_rscanner.is_done())) {
     224        lgth = ends_rscanner.get_pos() - starts_rscanner.get_pos();
     225        h_table.lookup_or_insert(buffer, starts_rscanner.get_pos(), lgth, h0, h1, lgth);
     226
     227#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     228    print_symbol_debug(buffer, starts_rscanner.get_pos(), ends_rscanner.get_pos(), lgth);
     229#endif
     230
     231        ends_rscanner.scan_to_next();
     232        starts_rscanner.scan_to_next();
     233    }
     234
     235    if((!ends_rscanner.is_done()) && (starts_rscanner.is_done())) { // block boundary case, previous block
     236
     237        uint32_t lb_blk = 0;
     238        while(1) {
     239            starts_rscanner.init(--starts);
     240            starts_rscanner.scan_to_next();
     241
     242            if(!starts_rscanner.is_done()) {
     243                break;
     244            }
     245
     246            if (lb_blk > LOOKBACK_BLOCKS) {
     247                cerr << lb_blk << endl;
     248                cerr << "Fatal Error.";
     249                cerr << " Symbol length exceeds " << (LOOKBACK_SIZE+BLOCK_SIZE) << " bytes." << endl;
     250                cerr << " Symbol tail : ";
     251                cerr << string((char *)&(buffer[starts_rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl;
     252                abort();
     253            }
     254            lb_blk++;
     255        }
     256
     257        lgth = ends_rscanner.get_pos() + (BLOCK_SIZE - starts_rscanner.get_pos()) + lb_blk * BLOCK_SIZE;
     258
     259        uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE;
     260        int32_t lb_spos = BLOCK_SIZE - ((-1*starts_rscanner.get_pos()) & (BLOCK_SIZE-1));
     261
     262        uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
     263        uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
     264
     265        h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, lgth);
     266
     267#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     268        print_symbol_debug(lb_buffer, lb_spos, ends_rscanner.get_pos(), lgth);
     269#endif
     270
     271    }
     272}
     273
     274// Fixed Lengths - FORWARD SCAN LOGIC - Scan each BLOCK LSB to MSB (low to high memory address)
     275//template<class HASH_TABLE>
     276//IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth,
     277//                                uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size) {
     278
     279//    int32_t spos;
     280//    int32_t epos;
     281//    ForwardScanner<BitBlock, scanword_t> fscanner(&ends);
     282
     283//    epos = fscanner.scan_to_next();
     284//    spos = (epos - lgth);
     285
     286//    if((epos != -1) && (spos < 0)) { // block boundary case
     287
     288//      assert(lgth<=LOOKBACK_SIZE); // segment boundary
     289
     290//      uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE;
     291//      int32_t lb_spos = BLOCK_SIZE - ((-1*spos) & (BLOCK_SIZE-1));
     292
     293//      uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
     294//      uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
     295
     296//      h_table.lookup_or_insert(lb_buffer, lb_spos, lgth,
     297//                               lb_h0, lb_h1, h_lgth);
     298
     299//#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     300//      cout << "Symbol(";
     301//      cout << "End:" << epos;
     302//      cout << ",Start:" << lb_spos;
     303//      cout << ",Text:'" << string((char *)&(lb_buffer[lb_spos]), lgth);
     304//      cout << "')" << endl;
     305//#endif
     306//      epos = fscanner.scan_to_next();
     307//      spos = (epos - lgth);
     308
     309//    }
     310
     311//    while(epos != -1) {
     312
     313//      h_table.lookup_or_insert(buffer, spos, lgth,
     314//                               h0, h1, h_lgth);
     315
     316//#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     317//      cout << "Symbol(";
     318//      cout << "End:" << epos;
     319//      cout << ",Start:" << spos;
     320//      cout << ",Text:'" << string((char *)&(buffer[spos]), lgth);
     321//      cout << "')" << endl;
     322//#endif
     323//      epos = fscanner.scan_to_next();
     324//      spos = (epos - lgth);
     325
     326//    }
     327//}
     328
    200329#endif // ID_SYMBOL_TABLE_TEMPLATE_HPP
    201330
    202 /*
    203 template<class SYMBOL, class ALLOCATOR>
    204 class div2: public symbol_table_strategy<SYMBOL, ALLOCATOR> {
    205 
    206 public:
    207     div2() {}
    208     ~div2() {}
    209 
    210     void resolve(BitBlock * buffer, BitBlock * starts, BitBlock * ends, BitBlock * hash_basis_0, BitBlock * hash_basis_1, uint32_t segment_size, AoS_symbol * symbol_ary, const uint32_t symbol_ary_size) {
    211         cout << "div2 Strategy" << endl;
    212     }
    213     //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, SoA_symbol & symbols);
    214 };
    215 */
    216 /*
    217 template<class SYMBOL, class ALLOCATOR>
    218 class log2: public symbol_table_strategy<SYMBOL, ALLOCATOR> {
    219 
    220 public:
    221     log2() {}
    222     ~log2() {}
    223 
    224     void resolve(BitBlock * buffer, BitBlock * starts, BitBlock * ends, BitBlock * hash_basis_0, BitBlock * hash_basis_1, uint32_t segment_size, AoS_symbol * symbol_ary, const uint32_t symbol_ary_size) {
    225         cout << "log2 Strategy" << endl;
    226     }
    227     //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, SoA_symbol & symbols);
    228 };
    229 */
    230 
     331
  • trunk/symbol_table/src/symbol_table.hpp

    r1967 r1979  
    3131class symbol_table {
    3232public:
    33     // TODO - Update Interface
    34     //void resolve(uint8_t * raw_buffer, Markers * markers, Groups * groups, const Hash * hash, uint32_t blocks, AoS_symbol * symbol_ary, const uint32_t symbol_ary_size);
    35     //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, BitBlock * hash_values, SoA_symbol & symbols);
     33    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [],
     34                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, AoS_symbol aos [], const uint32_t symbols);
     35
     36    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [],
     37                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SoA_symbol & soa, const uint32_t symbols);
    3638
    3739protected:
     
    4042};
    4143
     44template<class HASH_TABLE>
     45IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth,
     46                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size);
     47
     48template<class HASH_TABLE>
     49IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [],
     50                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size);
     51
    4252#endif // SYMBOL_TABLE_HPP
    4353
     54/*
     55template<class SYMBOL, class ALLOCATOR>
     56class div2: public symbol_table_strategy<SYMBOL, ALLOCATOR> {
    4457
     58public:
     59    div2() {}
     60    ~div2() {}
    4561
     62    void resolve(...) {
     63        cout << "div2 Strategy" << endl;
     64    }
     65    //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, SoA_symbol & symbols);
     66};
     67*/
     68
     69/*
     70template<class SYMBOL, class ALLOCATOR>
     71class log2: public symbol_table_strategy<SYMBOL, ALLOCATOR> {
     72
     73public:
     74    log2() {}
     75    ~log2() {}
     76
     77    void resolve(...) {
     78        cout << "log2 Strategy" << endl;
     79    }
     80    //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, SoA_symbol & symbols);
     81};
     82*/
     83
Note: See TracChangeset for help on using the changeset viewer.