Changeset 2001

Show
Ignore:
Timestamp:
04/07/12 20:39:25 (15 months ago)
Author:
ksherdy
Message:

Significant refactor to sync with current ICXML buffer model. Report GID on start positions and scan forward support file diffs for QA

Location:
trunk/symbol_table
Files:
6 modified

Legend:

Unmodified
Added
Removed
  • trunk/symbol_table/main_template.cpp

    r1995 r2001  
    5858#endif 
    5959 
    60  
    61 // Target symbol type must inherit from AoS_symbol 
    62 class MySymbol: public AoS_symbol 
    63 { 
    64 public: 
    65     bool param_1; 
    66 }; 
    67  
    6860int main(int argc, char * argv[]) { 
    6961 
     
    121113    BitBlock * lookback_ends_gte_17 = (BitBlock *) aligned_ends_gte_17; 
    122114    memset(lookback_ends_gte_17,0,LOOKBACK_SIZE/BLOCK_SIZE); 
    123     BitBlock * ends_gte_17 = &lookback_h1[LOOKBACK_SIZE/BLOCK_SIZE]; 
    124  
     115    BitBlock * ends_gte_17 = &lookback_ends_gte_17[LOOKBACK_SIZE/BLOCK_SIZE]; 
    125116 
    126117    // BitSteams - Without lookback 
     
    131122 
    132123    // Symbol Table 
    133     const uint32_t SYMBOL_COUNT = LOOKBACK_SIZE + SEGMENT_SIZE; 
    134     //AoS_symbol symbol_ary[SYMBOL_COUNT]; 
    135     MySymbol symbol_ary[SYMBOL_COUNT]; 
    136     id_symbol_table<MySymbol, fast_pool_allocator<1024> > symbol_table; 
     124    const uint32_t SYMBOL_COUNT = SEGMENT_SIZE; 
     125 
     126    Symbol symbols(SYMBOL_COUNT); 
     127    id_symbol_table<Symbol, fast_pool_allocator<1024> > symbol_table; 
    137128 
    138129    is.read ((char *)raw_buffer, SEGMENT_SIZE); 
     
    164155 
    165156      PERF_SEC_START(parser_timer); 
    166       symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, SEGMENT_BLOCKS, symbol_ary /*, SYMBOL_COUNT*/); 
     157      symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, SEGMENT_BLOCKS, symbols /*, SYMBOL_COUNT*/); 
    167158      PERF_SEC_END(parser_timer, SEGMENT_SIZE); 
    168159 
     
    182173 
    183174      // test 
    184       uint32_t blk_base = 0; 
    185       uint32_t idx = 0; 
    186       for(blk=0;blk<SEGMENT_BLOCKS + LOOKBACK_BLOCKS;blk++) { 
    187           blk_base = blk * BLOCK_SIZE; 
    188           ForwardScanner<BitBlock, scanword_t> fscanner(&starts[blk]); 
     175      uint32_t blk_offset; 
     176      for(int blk=0;blk<SEGMENT_BLOCKS;blk++) { 
     177          blk_offset = blk * BLOCKSIZE; 
     178          gid_type gid; 
     179          ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts)); 
     180 
    189181          fscanner.scan_to_next(); 
    190182          while(!fscanner.is_done()) { 
    191               idx = LOOKBACK_SIZE + blk_base + fscanner.get_pos(); 
    192               cout <<"[" << idx << "]" << "=" << "(" << symbol_ary[idx].gid << "," << symbol_ary[idx].lgth << ")" << endl; 
     183              gid = symbols.gids[fscanner.get_pos() + blk_offset]; 
     184  //        cout <<"Symbol[" << fscanner.get_pos() << "] = " 
     185  //                << "(gid:" << gid << ",raw:" 
     186  //                << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid))<< ")" << endl; 
     187              cout << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid)) << ","; 
     188 
    193189              fscanner.scan_to_next(); 
     190 
    194191          } 
    195192      } 
    196193    } 
    197  
    198194    /* Resolve Partial Segments */ 
    199195    uint32_t remaining = chars_avail; 
     
    237233 
    238234    //PERF_SEC_START(parser_timer); 
    239     symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, blk, symbol_ary/*, SYMBOL_COUNT*/); 
     235    symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, segment_size, symbols/*, SYMBOL_COUNT*/); 
    240236    //PERF_SEC_END(parser_timer, chars_avail+1); 
    241237 
    242     // test 
    243     uint32_t blk_base = 0; 
    244     uint32_t idx = 0; 
    245     for(blk=0;blk<segment_size + LOOKBACK_BLOCKS;blk++) { 
    246         blk_base = blk * BLOCK_SIZE; 
    247         ForwardScanner<BitBlock, scanword_t> fscanner(&starts[blk]); 
     238    uint32_t blk_offset; 
     239    for(int blk=0;blk<segment_size;blk++) { 
     240        blk_offset = blk * BLOCKSIZE; 
     241        gid_type gid; 
     242        ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts)); 
     243 
    248244        fscanner.scan_to_next(); 
    249245        while(!fscanner.is_done()) { 
    250             idx = LOOKBACK_SIZE + blk_base + fscanner.get_pos(); 
    251             cout <<"[" << idx << "]" << "=" << "(" << symbol_ary[idx].gid << "," << symbol_ary[idx].lgth << ")" << endl; 
     246            gid = symbols.gids[fscanner.get_pos() + blk_offset]; 
     247//          cout <<"Symbol[" << fscanner.get_pos() << "] = " 
     248//                  << "(gid:" << gid << ",raw:" 
     249//                  << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid))<< ")" << endl; 
     250            cout << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid)) << ","; 
     251 
    252252            fscanner.scan_to_next(); 
     253 
    253254        } 
    254255    } 
  • trunk/symbol_table/src/Makefile

    r1989 r2001  
    1414 
    1515all: basis_bits.hpp buffer.hpp byte_pool.hpp  hash_strms.hpp  hash_table.hpp  id_group_strms.hpp  id_symbol_table.hpp  main.cpp  Makefile  marker_strms.hpp  symbol_table.hpp  transpose.hpp 
    16         $(CC) -o main main.cpp $(AFLAGS) -DHASH_TABLE_HPP_DEBUG # -DBUFFER_PROFILING -DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
     16        $(CC) -o main main.cpp $(AFLAGS) #-DHASH_TABLE_HPP_DEBUG # -DBUFFER_PROFILING -DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
    1717 
    1818clean:  
  • trunk/symbol_table/src/buffer.hpp

    r1979 r2001  
    99#define LOOKBACK_BLOCKS 1 
    1010#define LOOKBACK_SIZE BLOCK_SIZE * LOOKBACK_BLOCKS 
    11 #define SEGMENT_BLOCKS 10 
     11#define SEGMENT_BLOCKS 100 // Starts and NO COPY BACK 
    1212#define SEGMENT_SIZE BLOCK_SIZE * (SEGMENT_BLOCKS)                                          // (bytes) a multiple of BLOCK_SIZE 
    1313#define SEGMENT_ALLOC_SIZE (LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock) // (bytes) 
  • trunk/symbol_table/src/hash_table.hpp

    r1992 r2001  
    3333#include <sstream> 
    3434#include <iostream> 
     35#include <vector> 
    3536using namespace std; 
    3637 
     
    4748} node; 
    4849 
     50// TODO -   Single GID. 
     51//          For multiple GID sets refactor such that 
     52//          Hash Tables consult the parent Symbol Table for a per Symbol Table instance GID. 
    4953class gid { 
    5054public: 
     
    5458}; 
    5559 
    56 /* Global GID for all hash_tables */ 
    57 uint64_t gid::value = 1; 
     60// TODO -   Single GID data. 
     61// WARNING - No bounds checking. 
     62uint64_t gid::value = 0; 
     63 
     64class gid_data { 
     65public: 
     66 
     67    static void add_data(uint8_t * raw_bytes, uint32_t raw_bytes_lgth) { 
     68        data next; 
     69        next.raw_bytes = raw_bytes; 
     70        next.raw_bytes_lgth = raw_bytes_lgth; 
     71        values.push_back(next); 
     72    } 
     73 
     74    static size_t max() { return values.size(); } 
     75 
     76    static uint8_t * get_raw_bytes(size_t idx) { 
     77        return values.at(idx).raw_bytes; 
     78    } 
     79 
     80    static uint32_t get_bytes_lgth(size_t idx) { 
     81        return values.at(idx).raw_bytes_lgth; 
     82    } 
     83 
     84private: 
     85    typedef struct data { 
     86        uint8_t * raw_bytes; 
     87        uint32_t raw_bytes_lgth; 
     88    } data; 
     89 
     90    static vector<data> values; 
     91}; 
     92 
     93/* Global GID data for all hash tables. */ 
     94vector<gid_data::data> gid_data::values; 
    5895 
    5996template<class COMPARE_STRATEGY, class HASH_STRATEGY, class ALLOCATOR> 
    6097class hash_table { 
     98 
    6199public: 
    62100 
     
    127165        uint64_t x1 = bit_slice(h1, idx, hash_bit_lgth); 
    128166 
     167        uint8_t * data_pool_raw_bytes = raw_data_pool.insert(&raw_bytes[idx],raw_byte_lgth); // persist 
     168 
    129169        insert( bucket, 
    130                 raw_data_pool.insert(&raw_bytes[idx],raw_byte_lgth), 
     170                data_pool_raw_bytes, 
    131171                raw_byte_lgth, 
    132172                raw_data_pool.insert((uint8_t *)&x0, bits2bytes(hash_bit_lgth)), 
     
    147187            elements++; 
    148188        #endif 
     189 
     190        gid_data::add_data(data_pool_raw_bytes,raw_byte_lgth); 
    149191 
    150192        return gid; 
  • trunk/symbol_table/src/id_symbol_table.hpp

    r1995 r2001  
    2727 
    2828#include "symbol_table.hpp" 
    29 #include "hash_table.hpp" 
    3029#include "buffer.hpp" 
    3130#include "../lib/carryQ.hpp" 
     
    8180    // Groups & groups 
    8281    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [], 
    83                  BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL * aos/*, const uint32_t symbols*/) { 
    84  
    85  
    86         uint32_t blk_base; 
     82                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL & symbols) { 
     83 
     84        uint32_t blk_offset; 
    8785 
    8886        for(uint32_t blk=0;blk<blocks;blk++) { 
    8987 
    90         blk_base = blk*BLOCK_SIZE; 
    91  
     88        blk_offset = blk * BLOCKSIZE; 
    9289        /////////////////////////////////////////////////////////////////////////////// 
    9390        // Byte Space Hash 
    9491        /////////////////////////////////////////////////////////////////////////////// 
    9592        if(bitblock::any(groups[blk].ends_1)) { 
    96         do_block<SYMBOL, hash_table <identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> >(hash_table_1, groups[blk].ends_1, &buffer[blk_base], 1, &buffer[blk_base], &buffer[blk_base], bytes2bits(1), BLOCK_SIZE, aos, blk_base); 
     93            do_block<SYMBOL, hash_table <identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> > 
     94                    (blk_offset, 
     95                     hash_table_1, 
     96                     groups[blk].ends_1, 
     97                     &buffer[blk_offset], 1,                                                    /* buffer, symbol length */ 
     98                     &buffer[blk_offset], &buffer[blk_offset], bytes2bits(1), BLOCK_SIZE, /* h0, h1, hash lgth (bits), hash block size (bits) */ 
     99                     symbols); 
     100            } 
    97101        } 
    98         if(bitblock::any(groups[blk].ends_2)) { 
    99         do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> >(hash_table_2, groups[blk].ends_2, &buffer[blk_base], 2, &buffer[blk_base], &buffer[blk_base], bytes2bits(2), BLOCK_SIZE, aos, blk_base); 
    100         } 
    101         if(bitblock::any(groups[blk].ends_3)) { 
    102         do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,3>, hash_strategy_t<3>, ALLOCATOR> >(hash_table_3, groups[blk].ends_3, &buffer[blk_base], 3, &buffer[blk_base], &buffer[blk_base], bytes2bits(3), BLOCK_SIZE, aos, blk_base); 
    103         } 
    104         if(bitblock::any(groups[blk].ends_4)) { 
    105         do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,4>, hash_strategy_t<4>, ALLOCATOR> >(hash_table_4, groups[blk].ends_4, &buffer[blk_base], 4, &buffer[blk_base], &buffer[blk_base], bytes2bits(4), BLOCK_SIZE, aos, blk_base); 
    106         } 
    107         if(bitblock::any(groups[blk].ends_5)) { 
    108         do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,5>, hash_strategy_t<5>, ALLOCATOR> >(hash_table_5, groups[blk].ends_5, &buffer[blk_base], 5, &buffer[blk_base], &buffer[blk_base], bytes2bits(5), BLOCK_SIZE, aos, blk_base); 
    109         } 
    110         if(bitblock::any(groups[blk].ends_6)) { 
    111         do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> >(hash_table_6, groups[blk].ends_6, &buffer[blk_base], 6, &buffer[blk_base], &buffer[blk_base], bytes2bits(6), BLOCK_SIZE, aos, blk_base); 
    112         } 
    113         if(bitblock::any(groups[blk].ends_7)) { 
    114         do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> >(hash_table_7, groups[blk].ends_7, &buffer[blk_base], 7, &buffer[blk_base], &buffer[blk_base], bytes2bits(7), BLOCK_SIZE, aos, blk_base); 
    115         } 
    116         /////////////////////////////////////////////////////////////////////////////// 
    117         // Bit Space Hash 
    118         /////////////////////////////////////////////////////////////////////////////// 
    119         if(bitblock::any(groups[blk].ends_8)) { 
    120         do_block<SYMBOL, hash_table <identity_strategy_t<uint64_t,8>, hash_strategy_d, ALLOCATOR> >(hash_table_8, groups[blk].ends_8, &buffer[blk_base], 8, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 8, BLOCK_SIZE/8, aos, blk_base); 
    121         } 
    122         if(bitblock::any(groups[blk].ends_9)) { 
    123         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> >(hash_table_9, groups[blk].ends_9, &buffer[blk_base], 9, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 9, BLOCK_SIZE/8, aos, blk_base); 
    124         } 
    125         if(bitblock::any(groups[blk].ends_10)) { 
    126         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> >(hash_table_10, groups[blk].ends_10, &buffer[blk_base], 10, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 10, BLOCK_SIZE/8, aos, blk_base); 
    127         } 
    128         if(bitblock::any(groups[blk].ends_11)) { 
    129         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> >(hash_table_11, groups[blk].ends_11, &buffer[blk_base], 11, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 11, BLOCK_SIZE/8, aos, blk_base); 
    130         } 
    131         if(bitblock::any(groups[blk].ends_12)) { 
    132         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> >(hash_table_12, groups[blk].ends_12, &buffer[blk_base], 12, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 12, BLOCK_SIZE/8, aos, blk_base); 
    133         } 
    134         if(bitblock::any(groups[blk].ends_13)) { 
    135         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> >(hash_table_13, groups[blk].ends_13, &buffer[blk_base], 13, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 13, BLOCK_SIZE/8, aos, blk_base); 
    136         } 
    137         if(bitblock::any(groups[blk].ends_14)) { 
    138         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> >(hash_table_14, groups[blk].ends_14, &buffer[blk_base], 14, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 14, BLOCK_SIZE/8, aos, blk_base); 
    139         } 
    140         if(bitblock::any(groups[blk].ends_15)) { 
    141         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> >(hash_table_15, groups[blk].ends_15, &buffer[blk_base], 15, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 15, BLOCK_SIZE/8, aos, blk_base); 
    142         } 
    143         if(bitblock::any(groups[blk].ends_16)) { 
    144         do_block<SYMBOL, hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> >(hash_table_16, groups[blk].ends_16, &buffer[blk_base], 16, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 16, BLOCK_SIZE/8, aos, blk_base); 
    145         } 
    146         if(bitblock::any(ends_gte_17[blk])) { 
    147         do_block<SYMBOL, hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, &starts[blk], &ends_gte_17[blk], &buffer[blk_base], (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], BLOCK_SIZE/8, aos, blk_base); 
    148         } 
    149  
    150         } 
     102//      if(bitblock::any(groups[blk].ends_2)) { 
     103//      do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> >(hash_table_2, groups[blk].ends_2, &buffer[blk], 2, &buffer[blk], &buffer[blk], bytes2bits(2), BLOCK_SIZE, symbols); 
     104//      } 
     105//      if(bitblock::any(groups[blk].ends_3)) { 
     106//      do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,3>, hash_strategy_t<3>, ALLOCATOR> >(hash_table_3, groups[blk].ends_3, &buffer[blk], 3, &buffer[blk], &buffer[blk], bytes2bits(3), BLOCK_SIZE, symbols); 
     107//      } 
     108//      if(bitblock::any(groups[blk].ends_4)) { 
     109//      do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,4>, hash_strategy_t<4>, ALLOCATOR> >(hash_table_4, groups[blk].ends_4, &buffer[blk], 4, &buffer[blk], &buffer[blk], bytes2bits(4), BLOCK_SIZE, symbols); 
     110//      } 
     111//      if(bitblock::any(groups[blk].ends_5)) { 
     112//      do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,5>, hash_strategy_t<5>, ALLOCATOR> >(hash_table_5, groups[blk].ends_5, &buffer[blk], 5, &buffer[blk], &buffer[blk], bytes2bits(5), BLOCK_SIZE, symbols); 
     113//      } 
     114//      if(bitblock::any(groups[blk].ends_6)) { 
     115//      do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> >(hash_table_6, groups[blk].ends_6, &buffer[blk], 6, &buffer[blk], &buffer[blk], bytes2bits(6), BLOCK_SIZE, symbols); 
     116//      } 
     117//      if(bitblock::any(groups[blk].ends_7)) { 
     118//      do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> >(hash_table_7, groups[blk].ends_7, &buffer[blk], 7, &buffer[blk], &buffer[blk], bytes2bits(7), BLOCK_SIZE, symbols); 
     119//      } 
     120//      /////////////////////////////////////////////////////////////////////////////// 
     121//      // Bit Space Hash 
     122//      /////////////////////////////////////////////////////////////////////////////// 
     123//      if(bitblock::any(groups[blk].ends_8)) { 
     124//      do_block<SYMBOL, hash_table <identity_strategy_t<uint64_t,8>, hash_strategy_d, ALLOCATOR> >(hash_table_8, groups[blk].ends_8, &buffer[blk], 8, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 8, BLOCK_SIZE/8, symbols); 
     125//      } 
     126//      if(bitblock::any(groups[blk].ends_9)) { 
     127//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> >(hash_table_9, groups[blk].ends_9, &buffer[blk], 9, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 9, BLOCK_SIZE/8, symbols); 
     128//      } 
     129//      if(bitblock::any(groups[blk].ends_10)) { 
     130//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> >(hash_table_10, groups[blk].ends_10, &buffer[blk], 10, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 10, BLOCK_SIZE/8, symbols); 
     131//      } 
     132//      if(bitblock::any(groups[blk].ends_11)) { 
     133//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> >(hash_table_11, groups[blk].ends_11, &buffer[blk], 11, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 11, BLOCK_SIZE/8, symbols); 
     134//      } 
     135//      if(bitblock::any(groups[blk].ends_12)) { 
     136//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> >(hash_table_12, groups[blk].ends_12, &buffer[blk], 12, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 12, BLOCK_SIZE/8, symbols); 
     137//      } 
     138//      if(bitblock::any(groups[blk].ends_13)) { 
     139//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> >(hash_table_13, groups[blk].ends_13, &buffer[blk], 13, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 13, BLOCK_SIZE/8, symbols); 
     140//      } 
     141//      if(bitblock::any(groups[blk].ends_14)) { 
     142//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> >(hash_table_14, groups[blk].ends_14, &buffer[blk], 14, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 14, BLOCK_SIZE/8, symbols); 
     143//      } 
     144//      if(bitblock::any(groups[blk].ends_15)) { 
     145//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> >(hash_table_15, groups[blk].ends_15, &buffer[blk], 15, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 15, BLOCK_SIZE/8, symbols); 
     146//      } 
     147//      if(bitblock::any(groups[blk].ends_16)) { 
     148//      do_block<SYMBOL, hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> >(hash_table_16, groups[blk].ends_16, &buffer[blk], 16, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 16, BLOCK_SIZE/8, symbols); 
     149//      } 
     150//      if(bitblock::any(ends_gte_17[blk])) { 
     151//      do_block<SYMBOL, hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, &starts[blk], &ends_gte_17[blk], &buffer[blk], (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], BLOCK_SIZE/8, symbols); 
     152//      } 
     153 
    151154    } 
    152155 
     
    180183/* NOTE: C++ template code and Pablo generated length groups must coincide. */ 
    181184 
    182 // Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB (high to low memory address) 
     185// Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB 
    183186template<class SYMBOL, class HASH_TABLE> 
    184 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth, 
    185                                   uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 
    186                                   SYMBOL * aos , const int32_t block_base){ 
    187  
     187void do_block(uint32_t blk_offset, 
     188              HASH_TABLE & h_table, 
     189              BitBlock ends, 
     190              uint8_t buffer [], const uint32_t lgth, 
     191              uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 
     192              SYMBOL & symbols) { 
     193 
     194    gid_type gid; 
    188195    int32_t spos; 
    189     ReverseScanner<BitBlock, scanword_t> rscanner(&ends); 
    190  
    191     rscanner.scan_to_next(); 
    192     spos = (rscanner.get_pos() - lgth); 
    193  
    194     while(!rscanner.is_done() && (spos >= 0)) { 
    195  
    196         // Comment out LOOKBACK_SIZE . Nigel's code *should* not produce this case. 
    197         aos[LOOKBACK_SIZE + block_base + spos].gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, h_lgth); 
    198         aos[LOOKBACK_SIZE + block_base + spos].lgth = lgth; 
    199  
    200 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
    201         print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth); 
    202 #endif 
    203         rscanner.scan_to_next(); 
    204         spos = (rscanner.get_pos() - lgth); 
    205     } 
    206  
    207     // Comment out. Nigel's code *should* not produce this case. 
    208     if(!rscanner.is_done() && (spos < 0)) { // block boundary case. 
    209  
    210         if(lgth > (LOOKBACK_SIZE)) { 
    211             cerr << "Fatal Error."; 
    212             cerr << " Symbol length exceeds " << (LOOKBACK_SIZE) << " bytes."; 
    213             cerr << " Symbol tail : "; 
    214             cerr << string((char *)&(buffer[rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl; 
    215             abort(); 
    216         } 
     196    int32_t epos; 
     197    ForwardScanner<BitBlock, scanword_t> fscanner(&ends); 
     198 
     199    fscanner.scan_to_next(); 
     200    epos = fscanner.get_pos(); 
     201    spos = (epos - lgth); 
     202 
     203    if(!fscanner.is_done() && (spos < 0)) { // block boundary case 
    217204 
    218205        uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE; 
     
    222209        uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 
    223210 
    224         // Comment out LOOKBACK_SIZE . Nigel's code *should* not produce this case. 
    225         aos[LOOKBACK_SIZE + block_base + lb_spos].gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, h_lgth); 
    226         aos[LOOKBACK_SIZE + block_base + lb_spos].lgth = lgth; 
     211        gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, 
     212                                 lb_h0, lb_h1, h_lgth); 
     213 
     214        symbols.gids[blk_offset + spos] = gid; 
     215 
     216        epos = fscanner.scan_to_next(); 
     217        spos = (epos - lgth); 
     218 
     219    } 
     220 
     221    while(!fscanner.is_done() && (spos >= 0)) { 
     222 
     223        gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, h_lgth); 
     224        //symbols.gids[epos] = gid; 
     225        symbols.gids[blk_offset + spos] = gid; 
    227226 
    228227#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
    229         print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth); 
    230 #endif 
    231  
    232     } 
     228        print_symbol_debug(buffer, spos, epos, lgth); 
     229#endif 
     230        fscanner.scan_to_next(); 
     231        epos = fscanner.get_pos(); 
     232        spos = (epos - lgth); 
     233    } 
     234 
    233235} 
    234236 
     
    255257        lgth = ends_rscanner.get_pos() - spos; 
    256258        // Comment out LOOKBACK_SIZE . Nigel's code *should* not produce this case. 
    257         aos[LOOKBACK_SIZE + block_base + spos].gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, lgth); 
    258         aos[LOOKBACK_SIZE + block_base + spos].lgth = lgth; 
     259//      aos[LOOKBACK_SIZE + block_base + spos].gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, lgth); 
     260//      aos[LOOKBACK_SIZE + block_base + spos].lgth = lgth; 
     261 
     262 
    259263 
    260264#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 
     
    304308 
    305309        // Comment out LOOKBACK_SIZE. Nigel's code *should* not produce this case. 
    306         aos[LOOKBACK_SIZE + block_base + lb_spos].gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, lgth); 
    307         aos[LOOKBACK_SIZE + block_base + lb_spos].lgth = lgth; 
     310//      aos[LOOKBACK_SIZE + block_base + lb_spos].gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, lgth); 
     311//      aos[LOOKBACK_SIZE + block_base + lb_spos].lgth = lgth; 
    308312 
    309313        if((lb_spos) < 0) { 
  • trunk/symbol_table/src/symbol_table.hpp

    r1995 r2001  
    1111#include "../lib/bitblock.hpp" 
    1212#include "../lib/byte_pool.hpp" 
     13#include "hash_table.hpp" 
    1314 
     15#include <vector> 
    1416#include <iostream> 
    1517using namespace std; 
     
    1820 
    1921/////////////////////////////////////////////////////////////////////////// 
    20 // Symbol Type Definitions - Warning: No bounds checks. 
     22// Symbol Type - Array of gid_type 
    2123/////////////////////////////////////////////////////////////////////////// 
     24 
    2225class Symbol { 
    2326public: 
    24 //    IDISA_ALWAYS_INLINE void add_symbol(gid_type gid, uint8_t * raw_data, uint32_t lgth, uint32_t idx=0); 
    25 //    IDISA_ALWAYS_INLINE gid_type get_gid(uint32_t idx=0) const; 
    26 //    IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx=0) const; 
    27 //    IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx=0) const; 
     27    Symbol (uint32_t n) { 
     28        init(n); 
     29    } 
     30 
     31    void init(uint32_t n) { 
     32        gids.reserve(n); 
     33        //gids_idx.reserve((n/BLOCK_SIZE) + 1); 
     34    } 
     35 
     36    vector<gid_type> gids; 
     37    //vector<BitBlock> gids_idx;   // gids index 
    2838}; 
    2939 
    30 class AoS_symbol: public Symbol { // Xerces 
    31 public: 
    32  
    33 //    IDISA_ALWAYS_INLINE void add_symbol(gid_type gid, uint8_t * raw_data, uint32_t lgth, uint32_t idx=0) { 
    34 //      this->gid = gid; 
    35 //      this->raw_data = raw_data; 
    36 //      this->lgth = lgth; 
    37 //    } 
    38  
    39 //    IDISA_ALWAYS_INLINE gid_type get_gid(uint32_t idx=0) const { return gid; } 
    40 //    IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx=0) const { return raw_data; } 
    41 //    IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx=0) const { return lgth; } 
    42  
    43 //private: 
    44     gid_type gid; 
    45     uint8_t * raw_data; 
    46     uint32_t lgth; 
    47 }; 
    48  
    49 //class SoA_symbol: public Symbol { // Regex 
    50 //public: 
    51  
    52 //    IDISA_ALWAYS_INLINE void add_symbol(gid_type gid, uint8_t * raw_data, uint32_t lgth, uint32_t idx=0) { 
    53 //      this->gid[idx] = gid; 
    54 //      this->raw_data[idx] = raw_data; 
    55 //      this->lgth[idx] = lgth; 
    56 //    } 
    57  
    58 //    IDISA_ALWAYS_INLINE gid_type get_gid(uint32_t idx=0) const { return this->gid[idx]; } 
    59 //    IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx=0) const { return this->raw_data[idx]; } 
    60 //    IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx=0) const { return this->lgth[idx]; } 
    61  
    62 //private: 
    63 //    gid_type * gid; 
    64 //    uint8_t ** raw_data; 
    65 //    uint32_t * lgth; 
    66 //}; 
    67  
    6840/////////////////////////////////////////////////////////////////////////// 
    69 // Symbol Table Definition 
     41// GID indexed POD array 
    7042/////////////////////////////////////////////////////////////////////////// 
    7143 
     
    7345public: 
    7446    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [], 
    75                  BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL * aos /*, const uint32_t symbols*/); 
     47                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL & symbols /*, const uint32_t symbols*/); 
    7648 
    7749    //void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [], 
    78 //               BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SoA_symbol & soa/*, const uint32_t symbols*/); 
     50    //           BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SoA_symbol & soa/*, const uint32_t symbols*/); 
     51 
     52    IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx) const { return gid_data::get_raw_bytes(idx); } 
     53    IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx) const { return gid_data::get_bytes_lgth(idx); } 
    7954 
    8055protected: 
    8156    symbol_table() {} 
    8257    ~symbol_table() {} 
     58 
    8359}; 
    8460 
    85 template<class HASH_TABLE> 
    86 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth, 
    87                                   uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size); 
     61template<class SYMBOL, class HASH_TABLE> 
     62IDISA_ALWAYS_INLINE void do_block(uint32_t blk_offset, 
     63                                  HASH_TABLE & h_table, 
     64                                  BitBlock ends, 
     65                                  uint8_t buffer [], const uint32_t lgth, 
     66                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 
     67                                  SYMBOL & symbols); 
    8868 
    89 template<class HASH_TABLE> 
    90 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [], 
    91                                   uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size); 
     69 
     70//template<class HASH_TABLE> 
     71//IDISA_ALWAYS_INLINE void do_block(uint32_t blk, HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [], 
     72//                                uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size); 
    9273 
    9374#endif // SYMBOL_TABLE_HPP