Changeset 1979

Show
Ignore:
Timestamp:
03/27/12 20:19:26 (15 months ago)
Author:
ksherdy
Message:

Added arbitrary length symbol resolution support.

Location:
trunk/symbol_table/src
Files:
1 added
4 modified

Legend:

Unmodified
Added
Removed
  • trunk/symbol_table/src/Makefile

    r1968 r1979  
    1313endif 
    1414 
    15 all: basis_bits.hpp byte_pool.hpp  hash_strms.hpp  hash_table.hpp  id_group_strms.hpp  id_symbol_table.hpp  main.cpp  Makefile  marker_strms.hpp  symbol_table.hpp  transpose.hpp 
     15all: basis_bits.hpp buffer.hpp byte_pool.hpp  hash_strms.hpp  hash_table.hpp  id_group_strms.hpp  id_symbol_table.hpp  main.cpp  Makefile  marker_strms.hpp  symbol_table.hpp  transpose.hpp 
    1616        $(CC) -o main main.cpp $(AFLAGS) -DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG #-DHASH_TABLE_HPP_DEBUG -DBUFFER_PROFILING # 
    1717 
  • trunk/symbol_table/src/hash_table.hpp

    r1967 r1979  
    124124        cout << "Idx: " << idx << ", "; 
    125125        cout << "GID: " << gid << ", "; 
    126         cout << "Val: '" << string((char *) &raw_bytes[idx],0,raw_byte_lgth) << "', "; 
    127         cout << "Bkt: " << bucket << endl; 
     126        cout << "Value: '" << string((char *) &raw_bytes[idx],0,raw_byte_lgth) << "', "; 
     127        cout << "Bucket: "      << bucket << endl; 
    128128#endif 
    129129 
     
    154154            while(crt != NULL) { 
    155155                cout << "->"    << "(GID:" << crt->gid 
    156                                 << ", Val:'" << string((char *)&crt->raw_bytes[0], crt->raw_bytes_lgth) 
    157                                 << "', Len:" << crt->raw_bytes_lgth << ")"; 
     156                                << ", Lgth:" << crt->raw_bytes_lgth 
     157                                << ", Value:'" << string((char *)&crt->raw_bytes[0], crt->raw_bytes_lgth) << "')"; 
    158158                crt = crt->next; 
    159159            } 
  • trunk/symbol_table/src/id_symbol_table.hpp

    r1968 r1979  
    1515 
    1616//#define ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
     17#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
     18static void print_symbol_debug(const uint8_t buffer [], const uint32_t spos, const uint32_t epos, const uint32_t lgth) { 
     19    cout << "Symbol("; 
     20    cout << "Start: " << spos; 
     21    cout << ",End: " << epos; 
     22    cout << ",Lgth: " << lgth; 
     23    cout << ",Value:'" << string((char *)&(buffer[spos]), lgth); 
     24    cout << "')" << endl; 
     25} 
     26#endif 
    1727 
    1828#include "symbol_table.hpp" 
    1929#include "hash_table.hpp" 
     30#include "buffer.hpp" 
    2031#include "../lib/carryQ.hpp" 
    2132#include "../lib/bitblock_iterator.hpp" 
    2233#include "../lib/bitblock_scan.hpp" 
    23  
    24 /* NOTE: C++ template code and Pablo generated length groups must coincide. */ 
    25  
    26 template<class HASH_TABLE> 
    27 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth, 
    28                                   uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size) { 
    29  
    30     int32_t spos; 
    31     int32_t epos; 
    32     ForwardScanner<BitBlock, scanword_t> scanner(&ends); 
    33     scanner.scan_to_next(); 
    34     epos = scanner.get_pos(); 
    35     spos = (epos - lgth); 
    36  
    37     if((epos != -1) && (spos < 0)) { // block boundary case 
    38  
    39         assert(lgth<=BLOCK_SIZE); // segment boundary 
    40  
    41         uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE; 
    42         int32_t lb_spos = BLOCK_SIZE - ((-1*spos) & (BLOCK_SIZE-1)); 
    43  
    44         uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 
    45         uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 
    46  
    47         h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, 
    48                                  lb_h0, lb_h1, h_lgth); 
    49  
    50 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
    51         cout << "Symbol("; 
    52         cout << "End:" << epos; 
    53         cout << ",Start:" << lb_spos; 
    54         cout << ",Text:'" << string((char *)&(lb_buffer[lb_spos]), lgth); 
    55         cout << "')" << endl; 
    56 #endif 
    57         epos = scanner.scan_to_next(); 
    58         spos = (epos - lgth); 
    59  
    60     } 
    61  
    62     while(epos != -1) { 
    63  
    64         h_table.lookup_or_insert(buffer, spos, lgth, 
    65                                  h0, h1, h_lgth); 
    66  
    67 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
    68         cout << "Symbol("; 
    69         cout << "End:" << epos; 
    70         cout << ",Start:" << spos; 
    71         cout << ",Text:'" << string((char *)&(buffer[spos]), lgth); 
    72         cout << "')" << endl; 
    73 #endif 
    74         epos = scanner.scan_to_next(); 
    75         spos = (epos - lgth); 
    76  
    77     } 
    78 } 
     34#include <cstdlib> 
    7935 
    8036template<class ALLOCATOR> 
    81 class id_symbol_table : public symbol_table { 
     37class id_symbol_table:public symbol_table { 
    8238public: 
    8339    id_symbol_table()/*:hash_table_1(256)*/{} 
     
    10561 
    10662    // Groups & groups 
    107     void resolve(uint8_t buffer [], Groups groups [], BitBlock h0 [], BitBlock h1 [], uint32_t blocks, AoS_symbol symbol_ary [], const uint32_t symbols) { 
     63    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [], 
     64                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, AoS_symbol aos [], const uint32_t symbols) { 
    10865 
    10966        for(uint32_t blk=0;blk<blocks;blk++) { 
     
    163120        } 
    164121        if(bitblock::any(groups[blk].ends_gte_17)) { 
    165         do_block<hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, groups[blk].ends_gte_17, &buffer[blk*BLOCK_SIZE], 17, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 17, BLOCK_SIZE/8); 
    166         } 
    167  
    168  
    169  
    170 //      do_segment<hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hgte_17, starts, groups.ends_gte_17, segment_size); 
    171  
    172         } 
    173     } 
    174  
    175     // void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, BitBlock * hash_values, SoA_symbol & symbols) {} 
     122        do_block<hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, &starts[blk], &ends_gte_17[blk], &buffer[blk*BLOCK_SIZE], (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], BLOCK_SIZE/8); 
     123        } 
     124 
     125        } 
     126    } 
     127 
    176128private: 
    177  
     129    /////////////////////////////////////////////////////////////////////////////// 
     130    // Byte Space Hash 
     131    /////////////////////////////////////////////////////////////////////////////// 
    178132    hash_table<identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> hash_table_1; 
    179133    hash_table<identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> hash_table_2; 
     
    183137    hash_table<identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> hash_table_6; 
    184138    hash_table<identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> hash_table_7; 
    185  
     139    /////////////////////////////////////////////////////////////////////////////// 
     140    // Bit Space Hash 
     141    /////////////////////////////////////////////////////////////////////////////// 
    186142    hash_table<identity_strategy_t<uint64_t,8>, hash_strategy_t<8>, ALLOCATOR> hash_table_8; 
    187143    hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> hash_table_9; 
     
    193149    hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> hash_table_15; 
    194150    hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> hash_table_16; 
    195  
    196151    hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> hash_table_gte_17; 
    197152 
    198153}; 
    199154 
     155/* NOTE: C++ template code and Pablo generated length groups must coincide. */ 
     156 
     157// Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB (high to low memory address) 
     158template<class HASH_TABLE> 
     159IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth, 
     160                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size) { 
     161 
     162    int32_t spos; 
     163    ReverseScanner<BitBlock, scanword_t> rscanner(&ends); 
     164 
     165    rscanner.scan_to_next(); 
     166    spos = (rscanner.get_pos() - lgth); 
     167 
     168    while(!rscanner.is_done() && (spos >= 0)) { 
     169 
     170        h_table.lookup_or_insert(buffer, spos, lgth, 
     171                                 h0, h1, h_lgth); 
     172 
     173#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
     174        print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth); 
     175#endif 
     176        rscanner.scan_to_next(); 
     177        spos = (rscanner.get_pos() - lgth); 
     178    } 
     179 
     180    if(!rscanner.is_done() && (spos < 0)) { // block boundary case 
     181 
     182        assert(lgth <= (LOOKBACK_SIZE+BLOCK_SIZE)); // segment boundary 
     183        if(lgth > (LOOKBACK_SIZE+BLOCK_SIZE)) { 
     184            cerr << "Fatal Error."; 
     185            cerr << " Symbol length exceeds " << (LOOKBACK_SIZE+BLOCK_SIZE) << " bytes."; 
     186            cerr << " Symbol tail : "; 
     187            cerr << string((char *)&(buffer[rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl; 
     188            abort(); 
     189        } 
     190 
     191        uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE; 
     192        int32_t lb_spos = BLOCK_SIZE - ((-1*spos) & (BLOCK_SIZE-1)); 
     193 
     194        uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 
     195        uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 
     196 
     197        h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, 
     198                                 lb_h0, lb_h1, h_lgth); 
     199 
     200#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
     201        print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth); 
     202#endif 
     203 
     204    } 
     205} 
     206 
     207// Variable Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB (high to low memory address) 
     208template<class HASH_TABLE> 
     209IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [], 
     210                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size) { 
     211 
     212    uint32_t lgth; 
     213    ReverseScanner<BitBlock, scanword_t> ends_rscanner(ends); 
     214    ReverseScanner<BitBlock, scanword_t> starts_rscanner(starts); 
     215 
     216    ends_rscanner.scan_to_next(); 
     217    starts_rscanner.scan_to_next(); 
     218 
     219    while(!starts_rscanner.is_done() && (starts_rscanner.get_pos() > ends_rscanner.get_pos())) { // synchronize 
     220        starts_rscanner.scan_to_next(); 
     221    } 
     222 
     223    while((!ends_rscanner.is_done()) && (!starts_rscanner.is_done())) { 
     224        lgth = ends_rscanner.get_pos() - starts_rscanner.get_pos(); 
     225        h_table.lookup_or_insert(buffer, starts_rscanner.get_pos(), lgth, h0, h1, lgth); 
     226 
     227#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
     228    print_symbol_debug(buffer, starts_rscanner.get_pos(), ends_rscanner.get_pos(), lgth); 
     229#endif 
     230 
     231        ends_rscanner.scan_to_next(); 
     232        starts_rscanner.scan_to_next(); 
     233    } 
     234 
     235    if((!ends_rscanner.is_done()) && (starts_rscanner.is_done())) { // block boundary case, previous block 
     236 
     237        uint32_t lb_blk = 0; 
     238        while(1) { 
     239            starts_rscanner.init(--starts); 
     240            starts_rscanner.scan_to_next(); 
     241 
     242            if(!starts_rscanner.is_done()) { 
     243                break; 
     244            } 
     245 
     246            if (lb_blk > LOOKBACK_BLOCKS) { 
     247                cerr << lb_blk << endl; 
     248                cerr << "Fatal Error."; 
     249                cerr << " Symbol length exceeds " << (LOOKBACK_SIZE+BLOCK_SIZE) << " bytes." << endl; 
     250                cerr << " Symbol tail : "; 
     251                cerr << string((char *)&(buffer[starts_rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl; 
     252                abort(); 
     253            } 
     254            lb_blk++; 
     255        } 
     256 
     257        lgth = ends_rscanner.get_pos() + (BLOCK_SIZE - starts_rscanner.get_pos()) + lb_blk * BLOCK_SIZE; 
     258 
     259        uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE; 
     260        int32_t lb_spos = BLOCK_SIZE - ((-1*starts_rscanner.get_pos()) & (BLOCK_SIZE-1)); 
     261 
     262        uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 
     263        uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 
     264 
     265        h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, lgth); 
     266 
     267#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
     268        print_symbol_debug(lb_buffer, lb_spos, ends_rscanner.get_pos(), lgth); 
     269#endif 
     270 
     271    } 
     272} 
     273 
     274// Fixed Lengths - FORWARD SCAN LOGIC - Scan each BLOCK LSB to MSB (low to high memory address) 
     275//template<class HASH_TABLE> 
     276//IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth, 
     277//                                uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size) { 
     278 
     279//    int32_t spos; 
     280//    int32_t epos; 
     281//    ForwardScanner<BitBlock, scanword_t> fscanner(&ends); 
     282 
     283//    epos = fscanner.scan_to_next(); 
     284//    spos = (epos - lgth); 
     285 
     286//    if((epos != -1) && (spos < 0)) { // block boundary case 
     287 
     288//      assert(lgth<=LOOKBACK_SIZE); // segment boundary 
     289 
     290//      uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE; 
     291//      int32_t lb_spos = BLOCK_SIZE - ((-1*spos) & (BLOCK_SIZE-1)); 
     292 
     293//      uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 
     294//      uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 
     295 
     296//      h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, 
     297//                               lb_h0, lb_h1, h_lgth); 
     298 
     299//#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
     300//      cout << "Symbol("; 
     301//      cout << "End:" << epos; 
     302//      cout << ",Start:" << lb_spos; 
     303//      cout << ",Text:'" << string((char *)&(lb_buffer[lb_spos]), lgth); 
     304//      cout << "')" << endl; 
     305//#endif 
     306//      epos = fscanner.scan_to_next(); 
     307//      spos = (epos - lgth); 
     308 
     309//    } 
     310 
     311//    while(epos != -1) { 
     312 
     313//      h_table.lookup_or_insert(buffer, spos, lgth, 
     314//                               h0, h1, h_lgth); 
     315 
     316//#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
     317//      cout << "Symbol("; 
     318//      cout << "End:" << epos; 
     319//      cout << ",Start:" << spos; 
     320//      cout << ",Text:'" << string((char *)&(buffer[spos]), lgth); 
     321//      cout << "')" << endl; 
     322//#endif 
     323//      epos = fscanner.scan_to_next(); 
     324//      spos = (epos - lgth); 
     325 
     326//    } 
     327//} 
     328 
    200329#endif // ID_SYMBOL_TABLE_TEMPLATE_HPP 
    201330 
    202 /* 
    203 template<class SYMBOL, class ALLOCATOR> 
    204 class div2: public symbol_table_strategy<SYMBOL, ALLOCATOR> { 
    205  
    206 public: 
    207     div2() {} 
    208     ~div2() {} 
    209  
    210     void resolve(BitBlock * buffer, BitBlock * starts, BitBlock * ends, BitBlock * hash_basis_0, BitBlock * hash_basis_1, uint32_t segment_size, AoS_symbol * symbol_ary, const uint32_t symbol_ary_size) { 
    211         cout << "div2 Strategy" << endl; 
    212     } 
    213     //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, SoA_symbol & symbols); 
    214 }; 
    215 */ 
    216 /* 
    217 template<class SYMBOL, class ALLOCATOR> 
    218 class log2: public symbol_table_strategy<SYMBOL, ALLOCATOR> { 
    219  
    220 public: 
    221     log2() {} 
    222     ~log2() {} 
    223  
    224     void resolve(BitBlock * buffer, BitBlock * starts, BitBlock * ends, BitBlock * hash_basis_0, BitBlock * hash_basis_1, uint32_t segment_size, AoS_symbol * symbol_ary, const uint32_t symbol_ary_size) { 
    225         cout << "log2 Strategy" << endl; 
    226     } 
    227     //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, SoA_symbol & symbols); 
    228 }; 
    229 */ 
    230  
     331 
  • trunk/symbol_table/src/symbol_table.hpp

    r1967 r1979  
    3131class symbol_table { 
    3232public: 
    33     // TODO - Update Interface 
    34     //void resolve(uint8_t * raw_buffer, Markers * markers, Groups * groups, const Hash * hash, uint32_t blocks, AoS_symbol * symbol_ary, const uint32_t symbol_ary_size); 
    35     //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, BitBlock * hash_values, SoA_symbol & symbols); 
     33    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [], 
     34                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, AoS_symbol aos [], const uint32_t symbols); 
     35 
     36    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [], 
     37                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SoA_symbol & soa, const uint32_t symbols); 
    3638 
    3739protected: 
     
    4042}; 
    4143 
     44template<class HASH_TABLE> 
     45IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth, 
     46                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size); 
     47 
     48template<class HASH_TABLE> 
     49IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [], 
     50                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size); 
     51 
    4252#endif // SYMBOL_TABLE_HPP 
    4353 
     54/* 
     55template<class SYMBOL, class ALLOCATOR> 
     56class div2: public symbol_table_strategy<SYMBOL, ALLOCATOR> { 
    4457 
     58public: 
     59    div2() {} 
     60    ~div2() {} 
    4561 
     62    void resolve(...) { 
     63        cout << "div2 Strategy" << endl; 
     64    } 
     65    //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, SoA_symbol & symbols); 
     66}; 
     67*/ 
     68 
     69/* 
     70template<class SYMBOL, class ALLOCATOR> 
     71class log2: public symbol_table_strategy<SYMBOL, ALLOCATOR> { 
     72 
     73public: 
     74    log2() {} 
     75    ~log2() {} 
     76 
     77    void resolve(...) { 
     78        cout << "log2 Strategy" << endl; 
     79    } 
     80    //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, SoA_symbol & symbols); 
     81}; 
     82*/ 
     83