Changeset 2001


Ignore:
Timestamp:
Apr 7, 2012, 8:39:25 PM (7 years ago)
Author:
ksherdy
Message:

Significant refactor to sync with current ICXML buffer model. Report GID on start positions and scan forward support file diffs for QA

Location:
trunk/symbol_table
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/symbol_table/main_template.cpp

    r1995 r2001  
    5858#endif
    5959
    60 
    61 // Target symbol type must inherit from AoS_symbol
    62 class MySymbol: public AoS_symbol
    63 {
    64 public:
    65     bool param_1;
    66 };
    67 
    6860int main(int argc, char * argv[]) {
    6961
     
    121113    BitBlock * lookback_ends_gte_17 = (BitBlock *) aligned_ends_gte_17;
    122114    memset(lookback_ends_gte_17,0,LOOKBACK_SIZE/BLOCK_SIZE);
    123     BitBlock * ends_gte_17 = &lookback_h1[LOOKBACK_SIZE/BLOCK_SIZE];
    124 
     115    BitBlock * ends_gte_17 = &lookback_ends_gte_17[LOOKBACK_SIZE/BLOCK_SIZE];
    125116
    126117    // BitSteams - Without lookback
     
    131122
    132123    // Symbol Table
    133     const uint32_t SYMBOL_COUNT = LOOKBACK_SIZE + SEGMENT_SIZE;
    134     //AoS_symbol symbol_ary[SYMBOL_COUNT];
    135     MySymbol symbol_ary[SYMBOL_COUNT];
    136     id_symbol_table<MySymbol, fast_pool_allocator<1024> > symbol_table;
     124    const uint32_t SYMBOL_COUNT = SEGMENT_SIZE;
     125
     126    Symbol symbols(SYMBOL_COUNT);
     127    id_symbol_table<Symbol, fast_pool_allocator<1024> > symbol_table;
    137128
    138129    is.read ((char *)raw_buffer, SEGMENT_SIZE);
     
    164155
    165156      PERF_SEC_START(parser_timer);
    166       symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, SEGMENT_BLOCKS, symbol_ary /*, SYMBOL_COUNT*/);
     157      symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, SEGMENT_BLOCKS, symbols /*, SYMBOL_COUNT*/);
    167158      PERF_SEC_END(parser_timer, SEGMENT_SIZE);
    168159
     
    182173
    183174      // test
    184       uint32_t blk_base = 0;
    185       uint32_t idx = 0;
    186       for(blk=0;blk<SEGMENT_BLOCKS + LOOKBACK_BLOCKS;blk++) {
    187           blk_base = blk * BLOCK_SIZE;
    188           ForwardScanner<BitBlock, scanword_t> fscanner(&starts[blk]);
     175      uint32_t blk_offset;
     176      for(int blk=0;blk<SEGMENT_BLOCKS;blk++) {
     177          blk_offset = blk * BLOCKSIZE;
     178          gid_type gid;
     179          ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts));
     180
    189181          fscanner.scan_to_next();
    190182          while(!fscanner.is_done()) {
    191               idx = LOOKBACK_SIZE + blk_base + fscanner.get_pos();
    192               cout <<"[" << idx << "]" << "=" << "(" << symbol_ary[idx].gid << "," << symbol_ary[idx].lgth << ")" << endl;
     183              gid = symbols.gids[fscanner.get_pos() + blk_offset];
     184  //        cout <<"Symbol[" << fscanner.get_pos() << "] = "
     185  //                << "(gid:" << gid << ",raw:"
     186  //                << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid))<< ")" << endl;
     187              cout << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid)) << ",";
     188
    193189              fscanner.scan_to_next();
     190
    194191          }
    195192      }
    196193    }
    197 
    198194    /* Resolve Partial Segments */
    199195    uint32_t remaining = chars_avail;
     
    237233
    238234    //PERF_SEC_START(parser_timer);
    239     symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, blk, symbol_ary/*, SYMBOL_COUNT*/);
     235    symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, segment_size, symbols/*, SYMBOL_COUNT*/);
    240236    //PERF_SEC_END(parser_timer, chars_avail+1);
    241237
    242     // test
    243     uint32_t blk_base = 0;
    244     uint32_t idx = 0;
    245     for(blk=0;blk<segment_size + LOOKBACK_BLOCKS;blk++) {
    246         blk_base = blk * BLOCK_SIZE;
    247         ForwardScanner<BitBlock, scanword_t> fscanner(&starts[blk]);
     238    uint32_t blk_offset;
     239    for(int blk=0;blk<segment_size;blk++) {
     240        blk_offset = blk * BLOCKSIZE;
     241        gid_type gid;
     242        ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts));
     243
    248244        fscanner.scan_to_next();
    249245        while(!fscanner.is_done()) {
    250             idx = LOOKBACK_SIZE + blk_base + fscanner.get_pos();
    251             cout <<"[" << idx << "]" << "=" << "(" << symbol_ary[idx].gid << "," << symbol_ary[idx].lgth << ")" << endl;
     246            gid = symbols.gids[fscanner.get_pos() + blk_offset];
     247//          cout <<"Symbol[" << fscanner.get_pos() << "] = "
     248//                  << "(gid:" << gid << ",raw:"
     249//                  << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid))<< ")" << endl;
     250            cout << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid)) << ",";
     251
    252252            fscanner.scan_to_next();
     253
    253254        }
    254255    }
  • trunk/symbol_table/src/Makefile

    r1989 r2001  
    1414
    1515all: basis_bits.hpp buffer.hpp byte_pool.hpp  hash_strms.hpp  hash_table.hpp  id_group_strms.hpp  id_symbol_table.hpp  main.cpp  Makefile  marker_strms.hpp  symbol_table.hpp  transpose.hpp
    16         $(CC) -o main main.cpp $(AFLAGS) -DHASH_TABLE_HPP_DEBUG # -DBUFFER_PROFILING -DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     16        $(CC) -o main main.cpp $(AFLAGS) #-DHASH_TABLE_HPP_DEBUG # -DBUFFER_PROFILING -DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
    1717
    1818clean:
  • trunk/symbol_table/src/buffer.hpp

    r1979 r2001  
    99#define LOOKBACK_BLOCKS 1
    1010#define LOOKBACK_SIZE BLOCK_SIZE * LOOKBACK_BLOCKS
    11 #define SEGMENT_BLOCKS 10
     11#define SEGMENT_BLOCKS 100 // Starts and NO COPY BACK
    1212#define SEGMENT_SIZE BLOCK_SIZE * (SEGMENT_BLOCKS)                                          // (bytes) a multiple of BLOCK_SIZE
    1313#define SEGMENT_ALLOC_SIZE (LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock) // (bytes)
  • trunk/symbol_table/src/hash_table.hpp

    r1992 r2001  
    3333#include <sstream>
    3434#include <iostream>
     35#include <vector>
    3536using namespace std;
    3637
     
    4748} node;
    4849
     50// TODO -   Single GID.
     51//          For multiple GID sets refactor such that
     52//          Hash Tables consult the parent Symbol Table for a per Symbol Table instance GID.
    4953class gid {
    5054public:
     
    5458};
    5559
    56 /* Global GID for all hash_tables */
    57 uint64_t gid::value = 1;
     60// TODO -   Single GID data.
     61// WARNING - No bounds checking.
     62uint64_t gid::value = 0;
     63
     64class gid_data {
     65public:
     66
     67    static void add_data(uint8_t * raw_bytes, uint32_t raw_bytes_lgth) {
     68        data next;
     69        next.raw_bytes = raw_bytes;
     70        next.raw_bytes_lgth = raw_bytes_lgth;
     71        values.push_back(next);
     72    }
     73
     74    static size_t max() { return values.size(); }
     75
     76    static uint8_t * get_raw_bytes(size_t idx) {
     77        return values.at(idx).raw_bytes;
     78    }
     79
     80    static uint32_t get_bytes_lgth(size_t idx) {
     81        return values.at(idx).raw_bytes_lgth;
     82    }
     83
     84private:
     85    typedef struct data {
     86        uint8_t * raw_bytes;
     87        uint32_t raw_bytes_lgth;
     88    } data;
     89
     90    static vector<data> values;
     91};
     92
     93/* Global GID data for all hash tables. */
     94vector<gid_data::data> gid_data::values;
    5895
    5996template<class COMPARE_STRATEGY, class HASH_STRATEGY, class ALLOCATOR>
    6097class hash_table {
     98
    6199public:
    62100
     
    127165        uint64_t x1 = bit_slice(h1, idx, hash_bit_lgth);
    128166
     167        uint8_t * data_pool_raw_bytes = raw_data_pool.insert(&raw_bytes[idx],raw_byte_lgth); // persist
     168
    129169        insert( bucket,
    130                 raw_data_pool.insert(&raw_bytes[idx],raw_byte_lgth),
     170                data_pool_raw_bytes,
    131171                raw_byte_lgth,
    132172                raw_data_pool.insert((uint8_t *)&x0, bits2bytes(hash_bit_lgth)),
     
    147187            elements++;
    148188        #endif
     189
     190        gid_data::add_data(data_pool_raw_bytes,raw_byte_lgth);
    149191
    150192        return gid;
  • trunk/symbol_table/src/id_symbol_table.hpp

    r1995 r2001  
    2727
    2828#include "symbol_table.hpp"
    29 #include "hash_table.hpp"
    3029#include "buffer.hpp"
    3130#include "../lib/carryQ.hpp"
     
    8180    // Groups & groups
    8281    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [],
    83                  BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL * aos/*, const uint32_t symbols*/) {
    84 
    85 
    86         uint32_t blk_base;
     82                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL & symbols) {
     83
     84        uint32_t blk_offset;
    8785
    8886        for(uint32_t blk=0;blk<blocks;blk++) {
    8987
    90         blk_base = blk*BLOCK_SIZE;
    91 
     88        blk_offset = blk * BLOCKSIZE;
    9289        ///////////////////////////////////////////////////////////////////////////////
    9390        // Byte Space Hash
    9491        ///////////////////////////////////////////////////////////////////////////////
    9592        if(bitblock::any(groups[blk].ends_1)) {
    96         do_block<SYMBOL, hash_table <identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> >(hash_table_1, groups[blk].ends_1, &buffer[blk_base], 1, &buffer[blk_base], &buffer[blk_base], bytes2bits(1), BLOCK_SIZE, aos, blk_base);
     93            do_block<SYMBOL, hash_table <identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> >
     94                    (blk_offset,
     95                     hash_table_1,
     96                     groups[blk].ends_1,
     97                     &buffer[blk_offset], 1,                                                    /* buffer, symbol length */
     98                     &buffer[blk_offset], &buffer[blk_offset], bytes2bits(1), BLOCK_SIZE, /* h0, h1, hash lgth (bits), hash block size (bits) */
     99                     symbols);
     100            }
    97101        }
    98         if(bitblock::any(groups[blk].ends_2)) {
    99         do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> >(hash_table_2, groups[blk].ends_2, &buffer[blk_base], 2, &buffer[blk_base], &buffer[blk_base], bytes2bits(2), BLOCK_SIZE, aos, blk_base);
    100         }
    101         if(bitblock::any(groups[blk].ends_3)) {
    102         do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,3>, hash_strategy_t<3>, ALLOCATOR> >(hash_table_3, groups[blk].ends_3, &buffer[blk_base], 3, &buffer[blk_base], &buffer[blk_base], bytes2bits(3), BLOCK_SIZE, aos, blk_base);
    103         }
    104         if(bitblock::any(groups[blk].ends_4)) {
    105         do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,4>, hash_strategy_t<4>, ALLOCATOR> >(hash_table_4, groups[blk].ends_4, &buffer[blk_base], 4, &buffer[blk_base], &buffer[blk_base], bytes2bits(4), BLOCK_SIZE, aos, blk_base);
    106         }
    107         if(bitblock::any(groups[blk].ends_5)) {
    108         do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,5>, hash_strategy_t<5>, ALLOCATOR> >(hash_table_5, groups[blk].ends_5, &buffer[blk_base], 5, &buffer[blk_base], &buffer[blk_base], bytes2bits(5), BLOCK_SIZE, aos, blk_base);
    109         }
    110         if(bitblock::any(groups[blk].ends_6)) {
    111         do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> >(hash_table_6, groups[blk].ends_6, &buffer[blk_base], 6, &buffer[blk_base], &buffer[blk_base], bytes2bits(6), BLOCK_SIZE, aos, blk_base);
    112         }
    113         if(bitblock::any(groups[blk].ends_7)) {
    114         do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> >(hash_table_7, groups[blk].ends_7, &buffer[blk_base], 7, &buffer[blk_base], &buffer[blk_base], bytes2bits(7), BLOCK_SIZE, aos, blk_base);
    115         }
    116         ///////////////////////////////////////////////////////////////////////////////
    117         // Bit Space Hash
    118         ///////////////////////////////////////////////////////////////////////////////
    119         if(bitblock::any(groups[blk].ends_8)) {
    120         do_block<SYMBOL, hash_table <identity_strategy_t<uint64_t,8>, hash_strategy_d, ALLOCATOR> >(hash_table_8, groups[blk].ends_8, &buffer[blk_base], 8, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 8, BLOCK_SIZE/8, aos, blk_base);
    121         }
    122         if(bitblock::any(groups[blk].ends_9)) {
    123         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> >(hash_table_9, groups[blk].ends_9, &buffer[blk_base], 9, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 9, BLOCK_SIZE/8, aos, blk_base);
    124         }
    125         if(bitblock::any(groups[blk].ends_10)) {
    126         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> >(hash_table_10, groups[blk].ends_10, &buffer[blk_base], 10, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 10, BLOCK_SIZE/8, aos, blk_base);
    127         }
    128         if(bitblock::any(groups[blk].ends_11)) {
    129         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> >(hash_table_11, groups[blk].ends_11, &buffer[blk_base], 11, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 11, BLOCK_SIZE/8, aos, blk_base);
    130         }
    131         if(bitblock::any(groups[blk].ends_12)) {
    132         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> >(hash_table_12, groups[blk].ends_12, &buffer[blk_base], 12, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 12, BLOCK_SIZE/8, aos, blk_base);
    133         }
    134         if(bitblock::any(groups[blk].ends_13)) {
    135         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> >(hash_table_13, groups[blk].ends_13, &buffer[blk_base], 13, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 13, BLOCK_SIZE/8, aos, blk_base);
    136         }
    137         if(bitblock::any(groups[blk].ends_14)) {
    138         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> >(hash_table_14, groups[blk].ends_14, &buffer[blk_base], 14, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 14, BLOCK_SIZE/8, aos, blk_base);
    139         }
    140         if(bitblock::any(groups[blk].ends_15)) {
    141         do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> >(hash_table_15, groups[blk].ends_15, &buffer[blk_base], 15, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 15, BLOCK_SIZE/8, aos, blk_base);
    142         }
    143         if(bitblock::any(groups[blk].ends_16)) {
    144         do_block<SYMBOL, hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> >(hash_table_16, groups[blk].ends_16, &buffer[blk_base], 16, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 16, BLOCK_SIZE/8, aos, blk_base);
    145         }
    146         if(bitblock::any(ends_gte_17[blk])) {
    147         do_block<SYMBOL, hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, &starts[blk], &ends_gte_17[blk], &buffer[blk_base], (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], BLOCK_SIZE/8, aos, blk_base);
    148         }
    149 
    150         }
     102//      if(bitblock::any(groups[blk].ends_2)) {
     103//      do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> >(hash_table_2, groups[blk].ends_2, &buffer[blk], 2, &buffer[blk], &buffer[blk], bytes2bits(2), BLOCK_SIZE, symbols);
     104//      }
     105//      if(bitblock::any(groups[blk].ends_3)) {
     106//      do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,3>, hash_strategy_t<3>, ALLOCATOR> >(hash_table_3, groups[blk].ends_3, &buffer[blk], 3, &buffer[blk], &buffer[blk], bytes2bits(3), BLOCK_SIZE, symbols);
     107//      }
     108//      if(bitblock::any(groups[blk].ends_4)) {
     109//      do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,4>, hash_strategy_t<4>, ALLOCATOR> >(hash_table_4, groups[blk].ends_4, &buffer[blk], 4, &buffer[blk], &buffer[blk], bytes2bits(4), BLOCK_SIZE, symbols);
     110//      }
     111//      if(bitblock::any(groups[blk].ends_5)) {
     112//      do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,5>, hash_strategy_t<5>, ALLOCATOR> >(hash_table_5, groups[blk].ends_5, &buffer[blk], 5, &buffer[blk], &buffer[blk], bytes2bits(5), BLOCK_SIZE, symbols);
     113//      }
     114//      if(bitblock::any(groups[blk].ends_6)) {
     115//      do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> >(hash_table_6, groups[blk].ends_6, &buffer[blk], 6, &buffer[blk], &buffer[blk], bytes2bits(6), BLOCK_SIZE, symbols);
     116//      }
     117//      if(bitblock::any(groups[blk].ends_7)) {
     118//      do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> >(hash_table_7, groups[blk].ends_7, &buffer[blk], 7, &buffer[blk], &buffer[blk], bytes2bits(7), BLOCK_SIZE, symbols);
     119//      }
     120//      ///////////////////////////////////////////////////////////////////////////////
     121//      // Bit Space Hash
     122//      ///////////////////////////////////////////////////////////////////////////////
     123//      if(bitblock::any(groups[blk].ends_8)) {
     124//      do_block<SYMBOL, hash_table <identity_strategy_t<uint64_t,8>, hash_strategy_d, ALLOCATOR> >(hash_table_8, groups[blk].ends_8, &buffer[blk], 8, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 8, BLOCK_SIZE/8, symbols);
     125//      }
     126//      if(bitblock::any(groups[blk].ends_9)) {
     127//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> >(hash_table_9, groups[blk].ends_9, &buffer[blk], 9, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 9, BLOCK_SIZE/8, symbols);
     128//      }
     129//      if(bitblock::any(groups[blk].ends_10)) {
     130//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> >(hash_table_10, groups[blk].ends_10, &buffer[blk], 10, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 10, BLOCK_SIZE/8, symbols);
     131//      }
     132//      if(bitblock::any(groups[blk].ends_11)) {
     133//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> >(hash_table_11, groups[blk].ends_11, &buffer[blk], 11, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 11, BLOCK_SIZE/8, symbols);
     134//      }
     135//      if(bitblock::any(groups[blk].ends_12)) {
     136//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> >(hash_table_12, groups[blk].ends_12, &buffer[blk], 12, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 12, BLOCK_SIZE/8, symbols);
     137//      }
     138//      if(bitblock::any(groups[blk].ends_13)) {
     139//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> >(hash_table_13, groups[blk].ends_13, &buffer[blk], 13, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 13, BLOCK_SIZE/8, symbols);
     140//      }
     141//      if(bitblock::any(groups[blk].ends_14)) {
     142//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> >(hash_table_14, groups[blk].ends_14, &buffer[blk], 14, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 14, BLOCK_SIZE/8, symbols);
     143//      }
     144//      if(bitblock::any(groups[blk].ends_15)) {
     145//      do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> >(hash_table_15, groups[blk].ends_15, &buffer[blk], 15, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 15, BLOCK_SIZE/8, symbols);
     146//      }
     147//      if(bitblock::any(groups[blk].ends_16)) {
     148//      do_block<SYMBOL, hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> >(hash_table_16, groups[blk].ends_16, &buffer[blk], 16, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 16, BLOCK_SIZE/8, symbols);
     149//      }
     150//      if(bitblock::any(ends_gte_17[blk])) {
     151//      do_block<SYMBOL, hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, &starts[blk], &ends_gte_17[blk], &buffer[blk], (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], BLOCK_SIZE/8, symbols);
     152//      }
     153
    151154    }
    152155
     
    180183/* NOTE: C++ template code and Pablo generated length groups must coincide. */
    181184
    182 // Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB (high to low memory address)
     185// Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB
    183186template<class SYMBOL, class HASH_TABLE>
    184 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth,
    185                                   uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size,
    186                                   SYMBOL * aos , const int32_t block_base){
    187 
     187void do_block(uint32_t blk_offset,
     188              HASH_TABLE & h_table,
     189              BitBlock ends,
     190              uint8_t buffer [], const uint32_t lgth,
     191              uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size,
     192              SYMBOL & symbols) {
     193
     194    gid_type gid;
    188195    int32_t spos;
    189     ReverseScanner<BitBlock, scanword_t> rscanner(&ends);
    190 
    191     rscanner.scan_to_next();
    192     spos = (rscanner.get_pos() - lgth);
    193 
    194     while(!rscanner.is_done() && (spos >= 0)) {
    195 
    196         // Comment out LOOKBACK_SIZE . Nigel's code *should* not produce this case.
    197         aos[LOOKBACK_SIZE + block_base + spos].gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, h_lgth);
    198         aos[LOOKBACK_SIZE + block_base + spos].lgth = lgth;
    199 
    200 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
    201         print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth);
    202 #endif
    203         rscanner.scan_to_next();
    204         spos = (rscanner.get_pos() - lgth);
    205     }
    206 
    207     // Comment out. Nigel's code *should* not produce this case.
    208     if(!rscanner.is_done() && (spos < 0)) { // block boundary case.
    209 
    210         if(lgth > (LOOKBACK_SIZE)) {
    211             cerr << "Fatal Error.";
    212             cerr << " Symbol length exceeds " << (LOOKBACK_SIZE) << " bytes.";
    213             cerr << " Symbol tail : ";
    214             cerr << string((char *)&(buffer[rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl;
    215             abort();
    216         }
     196    int32_t epos;
     197    ForwardScanner<BitBlock, scanword_t> fscanner(&ends);
     198
     199    fscanner.scan_to_next();
     200    epos = fscanner.get_pos();
     201    spos = (epos - lgth);
     202
     203    if(!fscanner.is_done() && (spos < 0)) { // block boundary case
    217204
    218205        uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE;
     
    222209        uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
    223210
    224         // Comment out LOOKBACK_SIZE . Nigel's code *should* not produce this case.
    225         aos[LOOKBACK_SIZE + block_base + lb_spos].gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, h_lgth);
    226         aos[LOOKBACK_SIZE + block_base + lb_spos].lgth = lgth;
     211        gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth,
     212                                 lb_h0, lb_h1, h_lgth);
     213
     214        symbols.gids[blk_offset + spos] = gid;
     215
     216        epos = fscanner.scan_to_next();
     217        spos = (epos - lgth);
     218
     219    }
     220
     221    while(!fscanner.is_done() && (spos >= 0)) {
     222
     223        gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, h_lgth);
     224        //symbols.gids[epos] = gid;
     225        symbols.gids[blk_offset + spos] = gid;
    227226
    228227#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
    229         print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth);
    230 #endif
    231 
    232     }
     228        print_symbol_debug(buffer, spos, epos, lgth);
     229#endif
     230        fscanner.scan_to_next();
     231        epos = fscanner.get_pos();
     232        spos = (epos - lgth);
     233    }
     234
    233235}
    234236
     
    255257        lgth = ends_rscanner.get_pos() - spos;
    256258        // Comment out LOOKBACK_SIZE . Nigel's code *should* not produce this case.
    257         aos[LOOKBACK_SIZE + block_base + spos].gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, lgth);
    258         aos[LOOKBACK_SIZE + block_base + spos].lgth = lgth;
     259//      aos[LOOKBACK_SIZE + block_base + spos].gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, lgth);
     260//      aos[LOOKBACK_SIZE + block_base + spos].lgth = lgth;
     261
     262
    259263
    260264#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
     
    304308
    305309        // Comment out LOOKBACK_SIZE. Nigel's code *should* not produce this case.
    306         aos[LOOKBACK_SIZE + block_base + lb_spos].gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, lgth);
    307         aos[LOOKBACK_SIZE + block_base + lb_spos].lgth = lgth;
     310//      aos[LOOKBACK_SIZE + block_base + lb_spos].gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, lgth);
     311//      aos[LOOKBACK_SIZE + block_base + lb_spos].lgth = lgth;
    308312
    309313        if((lb_spos) < 0) {
  • trunk/symbol_table/src/symbol_table.hpp

    r1995 r2001  
    1111#include "../lib/bitblock.hpp"
    1212#include "../lib/byte_pool.hpp"
     13#include "hash_table.hpp"
    1314
     15#include <vector>
    1416#include <iostream>
    1517using namespace std;
     
    1820
    1921///////////////////////////////////////////////////////////////////////////
    20 // Symbol Type Definitions - Warning: No bounds checks.
     22// Symbol Type - Array of gid_type
    2123///////////////////////////////////////////////////////////////////////////
     24
    2225class Symbol {
    2326public:
    24 //    IDISA_ALWAYS_INLINE void add_symbol(gid_type gid, uint8_t * raw_data, uint32_t lgth, uint32_t idx=0);
    25 //    IDISA_ALWAYS_INLINE gid_type get_gid(uint32_t idx=0) const;
    26 //    IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx=0) const;
    27 //    IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx=0) const;
     27    Symbol (uint32_t n) {
     28        init(n);
     29    }
     30
     31    void init(uint32_t n) {
     32        gids.reserve(n);
     33        //gids_idx.reserve((n/BLOCK_SIZE) + 1);
     34    }
     35
     36    vector<gid_type> gids;
     37    //vector<BitBlock> gids_idx;   // gids index
    2838};
    2939
    30 class AoS_symbol: public Symbol { // Xerces
    31 public:
    32 
    33 //    IDISA_ALWAYS_INLINE void add_symbol(gid_type gid, uint8_t * raw_data, uint32_t lgth, uint32_t idx=0) {
    34 //      this->gid = gid;
    35 //      this->raw_data = raw_data;
    36 //      this->lgth = lgth;
    37 //    }
    38 
    39 //    IDISA_ALWAYS_INLINE gid_type get_gid(uint32_t idx=0) const { return gid; }
    40 //    IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx=0) const { return raw_data; }
    41 //    IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx=0) const { return lgth; }
    42 
    43 //private:
    44     gid_type gid;
    45     uint8_t * raw_data;
    46     uint32_t lgth;
    47 };
    48 
    49 //class SoA_symbol: public Symbol { // Regex
    50 //public:
    51 
    52 //    IDISA_ALWAYS_INLINE void add_symbol(gid_type gid, uint8_t * raw_data, uint32_t lgth, uint32_t idx=0) {
    53 //      this->gid[idx] = gid;
    54 //      this->raw_data[idx] = raw_data;
    55 //      this->lgth[idx] = lgth;
    56 //    }
    57 
    58 //    IDISA_ALWAYS_INLINE gid_type get_gid(uint32_t idx=0) const { return this->gid[idx]; }
    59 //    IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx=0) const { return this->raw_data[idx]; }
    60 //    IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx=0) const { return this->lgth[idx]; }
    61 
    62 //private:
    63 //    gid_type * gid;
    64 //    uint8_t ** raw_data;
    65 //    uint32_t * lgth;
    66 //};
    67 
    6840///////////////////////////////////////////////////////////////////////////
    69 // Symbol Table Definition
     41// GID indexed POD array
    7042///////////////////////////////////////////////////////////////////////////
    7143
     
    7345public:
    7446    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [],
    75                  BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL * aos /*, const uint32_t symbols*/);
     47                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL & symbols /*, const uint32_t symbols*/);
    7648
    7749    //void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [],
    78 //               BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SoA_symbol & soa/*, const uint32_t symbols*/);
     50    //           BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SoA_symbol & soa/*, const uint32_t symbols*/);
     51
     52    IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx) const { return gid_data::get_raw_bytes(idx); }
     53    IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx) const { return gid_data::get_bytes_lgth(idx); }
    7954
    8055protected:
    8156    symbol_table() {}
    8257    ~symbol_table() {}
     58
    8359};
    8460
    85 template<class HASH_TABLE>
    86 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth,
    87                                   uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size);
     61template<class SYMBOL, class HASH_TABLE>
     62IDISA_ALWAYS_INLINE void do_block(uint32_t blk_offset,
     63                                  HASH_TABLE & h_table,
     64                                  BitBlock ends,
     65                                  uint8_t buffer [], const uint32_t lgth,
     66                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size,
     67                                  SYMBOL & symbols);
    8868
    89 template<class HASH_TABLE>
    90 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [],
    91                                   uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size);
     69
     70//template<class HASH_TABLE>
     71//IDISA_ALWAYS_INLINE void do_block(uint32_t blk, HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [],
     72//                                uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size);
    9273
    9374#endif // SYMBOL_TABLE_HPP
Note: See TracChangeset for help on using the changeset viewer.