Changeset 2001
- Timestamp:
- 04/07/12 20:39:25 (15 months ago)
- Location:
- trunk/symbol_table
- Files:
-
- 6 modified
-
main_template.cpp (modified) (6 diffs)
-
src/Makefile (modified) (1 diff)
-
src/buffer.hpp (modified) (1 diff)
-
src/hash_table.hpp (modified) (5 diffs)
-
src/id_symbol_table.hpp (modified) (6 diffs)
-
src/symbol_table.hpp (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/symbol_table/main_template.cpp
r1995 r2001 58 58 #endif 59 59 60 61 // Target symbol type must inherit from AoS_symbol62 class MySymbol: public AoS_symbol63 {64 public:65 bool param_1;66 };67 68 60 int main(int argc, char * argv[]) { 69 61 … … 121 113 BitBlock * lookback_ends_gte_17 = (BitBlock *) aligned_ends_gte_17; 122 114 memset(lookback_ends_gte_17,0,LOOKBACK_SIZE/BLOCK_SIZE); 123 BitBlock * ends_gte_17 = &lookback_h1[LOOKBACK_SIZE/BLOCK_SIZE]; 124 115 BitBlock * ends_gte_17 = &lookback_ends_gte_17[LOOKBACK_SIZE/BLOCK_SIZE]; 125 116 126 117 // BitSteams - Without lookback … … 131 122 132 123 // Symbol Table 133 const uint32_t SYMBOL_COUNT = LOOKBACK_SIZE +SEGMENT_SIZE;134 //AoS_symbol symbol_ary[SYMBOL_COUNT]; 135 MySymbol symbol_ary[SYMBOL_COUNT];136 id_symbol_table< MySymbol, fast_pool_allocator<1024> > symbol_table;124 const uint32_t SYMBOL_COUNT = SEGMENT_SIZE; 125 126 Symbol symbols(SYMBOL_COUNT); 127 id_symbol_table<Symbol, fast_pool_allocator<1024> > symbol_table; 137 128 138 129 is.read ((char *)raw_buffer, SEGMENT_SIZE); … … 164 155 165 156 PERF_SEC_START(parser_timer); 166 symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, SEGMENT_BLOCKS, symbol _ary/*, SYMBOL_COUNT*/);157 symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, SEGMENT_BLOCKS, symbols /*, SYMBOL_COUNT*/); 167 158 PERF_SEC_END(parser_timer, SEGMENT_SIZE); 168 159 … … 182 173 183 174 // test 184 uint32_t blk_base = 0; 185 uint32_t idx = 0; 186 for(blk=0;blk<SEGMENT_BLOCKS + LOOKBACK_BLOCKS;blk++) { 187 blk_base = blk * BLOCK_SIZE; 188 ForwardScanner<BitBlock, scanword_t> fscanner(&starts[blk]); 175 uint32_t blk_offset; 176 for(int blk=0;blk<SEGMENT_BLOCKS;blk++) { 177 blk_offset = blk * BLOCKSIZE; 178 gid_type gid; 179 ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts)); 180 189 181 fscanner.scan_to_next(); 190 182 while(!fscanner.is_done()) { 191 idx = LOOKBACK_SIZE + blk_base + fscanner.get_pos(); 192 cout <<"[" << idx << "]" << "=" << "(" << symbol_ary[idx].gid << "," << symbol_ary[idx].lgth << ")" << endl; 183 gid = symbols.gids[fscanner.get_pos() + blk_offset]; 184 // cout <<"Symbol[" << fscanner.get_pos() << "] = " 185 // << "(gid:" << gid << ",raw:" 186 // << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid))<< ")" << endl; 187 cout << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid)) << ","; 188 193 189 fscanner.scan_to_next(); 190 194 191 } 195 192 } 196 193 } 197 198 194 /* Resolve Partial Segments */ 199 195 uint32_t remaining = chars_avail; … … 237 233 238 234 //PERF_SEC_START(parser_timer); 239 symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, blk, symbol_ary/*, SYMBOL_COUNT*/);235 symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, segment_size, symbols/*, SYMBOL_COUNT*/); 240 236 //PERF_SEC_END(parser_timer, chars_avail+1); 241 237 242 // test243 uint32_t blk_base = 0;244 uint32_t idx = 0;245 for(blk=0;blk<segment_size + LOOKBACK_BLOCKS;blk++) { 246 blk_base = blk * BLOCK_SIZE;247 ForwardScanner<BitBlock, scanword_t> fscanner(&starts[blk]); 238 uint32_t blk_offset; 239 for(int blk=0;blk<segment_size;blk++) { 240 blk_offset = blk * BLOCKSIZE; 241 gid_type gid; 242 ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts)); 243 248 244 fscanner.scan_to_next(); 249 245 while(!fscanner.is_done()) { 250 idx = LOOKBACK_SIZE + blk_base + fscanner.get_pos(); 251 cout <<"[" << idx << "]" << "=" << "(" << symbol_ary[idx].gid << "," << symbol_ary[idx].lgth << ")" << endl; 246 gid = symbols.gids[fscanner.get_pos() + blk_offset]; 247 // cout <<"Symbol[" << fscanner.get_pos() << "] = " 248 // << "(gid:" << gid << ",raw:" 249 // << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid))<< ")" << endl; 250 cout << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid)) << ","; 251 252 252 fscanner.scan_to_next(); 253 253 254 } 254 255 } -
trunk/symbol_table/src/Makefile
r1989 r2001 14 14 15 15 all: basis_bits.hpp buffer.hpp byte_pool.hpp hash_strms.hpp hash_table.hpp id_group_strms.hpp id_symbol_table.hpp main.cpp Makefile marker_strms.hpp symbol_table.hpp transpose.hpp 16 $(CC) -o main main.cpp $(AFLAGS) -DHASH_TABLE_HPP_DEBUG # -DBUFFER_PROFILING -DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG16 $(CC) -o main main.cpp $(AFLAGS) #-DHASH_TABLE_HPP_DEBUG # -DBUFFER_PROFILING -DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 17 17 18 18 clean: -
trunk/symbol_table/src/buffer.hpp
r1979 r2001 9 9 #define LOOKBACK_BLOCKS 1 10 10 #define LOOKBACK_SIZE BLOCK_SIZE * LOOKBACK_BLOCKS 11 #define SEGMENT_BLOCKS 10 11 #define SEGMENT_BLOCKS 100 // Starts and NO COPY BACK 12 12 #define SEGMENT_SIZE BLOCK_SIZE * (SEGMENT_BLOCKS) // (bytes) a multiple of BLOCK_SIZE 13 13 #define SEGMENT_ALLOC_SIZE (LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock) // (bytes) -
trunk/symbol_table/src/hash_table.hpp
r1992 r2001 33 33 #include <sstream> 34 34 #include <iostream> 35 #include <vector> 35 36 using namespace std; 36 37 … … 47 48 } node; 48 49 50 // TODO - Single GID. 51 // For multiple GID sets refactor such that 52 // Hash Tables consult the parent Symbol Table for a per Symbol Table instance GID. 49 53 class gid { 50 54 public: … … 54 58 }; 55 59 56 /* Global GID for all hash_tables */ 57 uint64_t gid::value = 1; 60 // TODO - Single GID data. 61 // WARNING - No bounds checking. 62 uint64_t gid::value = 0; 63 64 class gid_data { 65 public: 66 67 static void add_data(uint8_t * raw_bytes, uint32_t raw_bytes_lgth) { 68 data next; 69 next.raw_bytes = raw_bytes; 70 next.raw_bytes_lgth = raw_bytes_lgth; 71 values.push_back(next); 72 } 73 74 static size_t max() { return values.size(); } 75 76 static uint8_t * get_raw_bytes(size_t idx) { 77 return values.at(idx).raw_bytes; 78 } 79 80 static uint32_t get_bytes_lgth(size_t idx) { 81 return values.at(idx).raw_bytes_lgth; 82 } 83 84 private: 85 typedef struct data { 86 uint8_t * raw_bytes; 87 uint32_t raw_bytes_lgth; 88 } data; 89 90 static vector<data> values; 91 }; 92 93 /* Global GID data for all hash tables. */ 94 vector<gid_data::data> gid_data::values; 58 95 59 96 template<class COMPARE_STRATEGY, class HASH_STRATEGY, class ALLOCATOR> 60 97 class hash_table { 98 61 99 public: 62 100 … … 127 165 uint64_t x1 = bit_slice(h1, idx, hash_bit_lgth); 128 166 167 uint8_t * data_pool_raw_bytes = raw_data_pool.insert(&raw_bytes[idx],raw_byte_lgth); // persist 168 129 169 insert( bucket, 130 raw_data_pool.insert(&raw_bytes[idx],raw_byte_lgth),170 data_pool_raw_bytes, 131 171 raw_byte_lgth, 132 172 raw_data_pool.insert((uint8_t *)&x0, bits2bytes(hash_bit_lgth)), … … 147 187 elements++; 148 188 #endif 189 190 gid_data::add_data(data_pool_raw_bytes,raw_byte_lgth); 149 191 150 192 return gid; -
trunk/symbol_table/src/id_symbol_table.hpp
r1995 r2001 27 27 28 28 #include "symbol_table.hpp" 29 #include "hash_table.hpp"30 29 #include "buffer.hpp" 31 30 #include "../lib/carryQ.hpp" … … 81 80 // Groups & groups 82 81 void resolve(uint8_t buffer [], Groups groups [], BitBlock starts [], BitBlock ends_gte_17 [], 83 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL * aos/*, const uint32_t symbols*/) { 84 85 86 uint32_t blk_base; 82 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL & symbols) { 83 84 uint32_t blk_offset; 87 85 88 86 for(uint32_t blk=0;blk<blocks;blk++) { 89 87 90 blk_base = blk*BLOCK_SIZE; 91 88 blk_offset = blk * BLOCKSIZE; 92 89 /////////////////////////////////////////////////////////////////////////////// 93 90 // Byte Space Hash 94 91 /////////////////////////////////////////////////////////////////////////////// 95 92 if(bitblock::any(groups[blk].ends_1)) { 96 do_block<SYMBOL, hash_table <identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> >(hash_table_1, groups[blk].ends_1, &buffer[blk_base], 1, &buffer[blk_base], &buffer[blk_base], bytes2bits(1), BLOCK_SIZE, aos, blk_base); 93 do_block<SYMBOL, hash_table <identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> > 94 (blk_offset, 95 hash_table_1, 96 groups[blk].ends_1, 97 &buffer[blk_offset], 1, /* buffer, symbol length */ 98 &buffer[blk_offset], &buffer[blk_offset], bytes2bits(1), BLOCK_SIZE, /* h0, h1, hash lgth (bits), hash block size (bits) */ 99 symbols); 100 } 97 101 } 98 if(bitblock::any(groups[blk].ends_2)) { 99 do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> >(hash_table_2, groups[blk].ends_2, &buffer[blk_base], 2, &buffer[blk_base], &buffer[blk_base], bytes2bits(2), BLOCK_SIZE, aos, blk_base); 100 } 101 if(bitblock::any(groups[blk].ends_3)) { 102 do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,3>, hash_strategy_t<3>, ALLOCATOR> >(hash_table_3, groups[blk].ends_3, &buffer[blk_base], 3, &buffer[blk_base], &buffer[blk_base], bytes2bits(3), BLOCK_SIZE, aos, blk_base); 103 } 104 if(bitblock::any(groups[blk].ends_4)) { 105 do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,4>, hash_strategy_t<4>, ALLOCATOR> >(hash_table_4, groups[blk].ends_4, &buffer[blk_base], 4, &buffer[blk_base], &buffer[blk_base], bytes2bits(4), BLOCK_SIZE, aos, blk_base); 106 } 107 if(bitblock::any(groups[blk].ends_5)) { 108 do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,5>, hash_strategy_t<5>, ALLOCATOR> >(hash_table_5, groups[blk].ends_5, &buffer[blk_base], 5, &buffer[blk_base], &buffer[blk_base], bytes2bits(5), BLOCK_SIZE, aos, blk_base); 109 } 110 if(bitblock::any(groups[blk].ends_6)) { 111 do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> >(hash_table_6, groups[blk].ends_6, &buffer[blk_base], 6, &buffer[blk_base], &buffer[blk_base], bytes2bits(6), BLOCK_SIZE, aos, blk_base); 112 } 113 if(bitblock::any(groups[blk].ends_7)) { 114 do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> >(hash_table_7, groups[blk].ends_7, &buffer[blk_base], 7, &buffer[blk_base], &buffer[blk_base], bytes2bits(7), BLOCK_SIZE, aos, blk_base); 115 } 116 /////////////////////////////////////////////////////////////////////////////// 117 // Bit Space Hash 118 /////////////////////////////////////////////////////////////////////////////// 119 if(bitblock::any(groups[blk].ends_8)) { 120 do_block<SYMBOL, hash_table <identity_strategy_t<uint64_t,8>, hash_strategy_d, ALLOCATOR> >(hash_table_8, groups[blk].ends_8, &buffer[blk_base], 8, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 8, BLOCK_SIZE/8, aos, blk_base); 121 } 122 if(bitblock::any(groups[blk].ends_9)) { 123 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> >(hash_table_9, groups[blk].ends_9, &buffer[blk_base], 9, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 9, BLOCK_SIZE/8, aos, blk_base); 124 } 125 if(bitblock::any(groups[blk].ends_10)) { 126 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> >(hash_table_10, groups[blk].ends_10, &buffer[blk_base], 10, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 10, BLOCK_SIZE/8, aos, blk_base); 127 } 128 if(bitblock::any(groups[blk].ends_11)) { 129 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> >(hash_table_11, groups[blk].ends_11, &buffer[blk_base], 11, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 11, BLOCK_SIZE/8, aos, blk_base); 130 } 131 if(bitblock::any(groups[blk].ends_12)) { 132 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> >(hash_table_12, groups[blk].ends_12, &buffer[blk_base], 12, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 12, BLOCK_SIZE/8, aos, blk_base); 133 } 134 if(bitblock::any(groups[blk].ends_13)) { 135 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> >(hash_table_13, groups[blk].ends_13, &buffer[blk_base], 13, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 13, BLOCK_SIZE/8, aos, blk_base); 136 } 137 if(bitblock::any(groups[blk].ends_14)) { 138 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> >(hash_table_14, groups[blk].ends_14, &buffer[blk_base], 14, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 14, BLOCK_SIZE/8, aos, blk_base); 139 } 140 if(bitblock::any(groups[blk].ends_15)) { 141 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> >(hash_table_15, groups[blk].ends_15, &buffer[blk_base], 15, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 15, BLOCK_SIZE/8, aos, blk_base); 142 } 143 if(bitblock::any(groups[blk].ends_16)) { 144 do_block<SYMBOL, hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> >(hash_table_16, groups[blk].ends_16, &buffer[blk_base], 16, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 16, BLOCK_SIZE/8, aos, blk_base); 145 } 146 if(bitblock::any(ends_gte_17[blk])) { 147 do_block<SYMBOL, hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, &starts[blk], &ends_gte_17[blk], &buffer[blk_base], (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], BLOCK_SIZE/8, aos, blk_base); 148 } 149 150 } 102 // if(bitblock::any(groups[blk].ends_2)) { 103 // do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> >(hash_table_2, groups[blk].ends_2, &buffer[blk], 2, &buffer[blk], &buffer[blk], bytes2bits(2), BLOCK_SIZE, symbols); 104 // } 105 // if(bitblock::any(groups[blk].ends_3)) { 106 // do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,3>, hash_strategy_t<3>, ALLOCATOR> >(hash_table_3, groups[blk].ends_3, &buffer[blk], 3, &buffer[blk], &buffer[blk], bytes2bits(3), BLOCK_SIZE, symbols); 107 // } 108 // if(bitblock::any(groups[blk].ends_4)) { 109 // do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,4>, hash_strategy_t<4>, ALLOCATOR> >(hash_table_4, groups[blk].ends_4, &buffer[blk], 4, &buffer[blk], &buffer[blk], bytes2bits(4), BLOCK_SIZE, symbols); 110 // } 111 // if(bitblock::any(groups[blk].ends_5)) { 112 // do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,5>, hash_strategy_t<5>, ALLOCATOR> >(hash_table_5, groups[blk].ends_5, &buffer[blk], 5, &buffer[blk], &buffer[blk], bytes2bits(5), BLOCK_SIZE, symbols); 113 // } 114 // if(bitblock::any(groups[blk].ends_6)) { 115 // do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> >(hash_table_6, groups[blk].ends_6, &buffer[blk], 6, &buffer[blk], &buffer[blk], bytes2bits(6), BLOCK_SIZE, symbols); 116 // } 117 // if(bitblock::any(groups[blk].ends_7)) { 118 // do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> >(hash_table_7, groups[blk].ends_7, &buffer[blk], 7, &buffer[blk], &buffer[blk], bytes2bits(7), BLOCK_SIZE, symbols); 119 // } 120 // /////////////////////////////////////////////////////////////////////////////// 121 // // Bit Space Hash 122 // /////////////////////////////////////////////////////////////////////////////// 123 // if(bitblock::any(groups[blk].ends_8)) { 124 // do_block<SYMBOL, hash_table <identity_strategy_t<uint64_t,8>, hash_strategy_d, ALLOCATOR> >(hash_table_8, groups[blk].ends_8, &buffer[blk], 8, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 8, BLOCK_SIZE/8, symbols); 125 // } 126 // if(bitblock::any(groups[blk].ends_9)) { 127 // do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> >(hash_table_9, groups[blk].ends_9, &buffer[blk], 9, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 9, BLOCK_SIZE/8, symbols); 128 // } 129 // if(bitblock::any(groups[blk].ends_10)) { 130 // do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> >(hash_table_10, groups[blk].ends_10, &buffer[blk], 10, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 10, BLOCK_SIZE/8, symbols); 131 // } 132 // if(bitblock::any(groups[blk].ends_11)) { 133 // do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> >(hash_table_11, groups[blk].ends_11, &buffer[blk], 11, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 11, BLOCK_SIZE/8, symbols); 134 // } 135 // if(bitblock::any(groups[blk].ends_12)) { 136 // do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> >(hash_table_12, groups[blk].ends_12, &buffer[blk], 12, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 12, BLOCK_SIZE/8, symbols); 137 // } 138 // if(bitblock::any(groups[blk].ends_13)) { 139 // do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> >(hash_table_13, groups[blk].ends_13, &buffer[blk], 13, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 13, BLOCK_SIZE/8, symbols); 140 // } 141 // if(bitblock::any(groups[blk].ends_14)) { 142 // do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> >(hash_table_14, groups[blk].ends_14, &buffer[blk], 14, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 14, BLOCK_SIZE/8, symbols); 143 // } 144 // if(bitblock::any(groups[blk].ends_15)) { 145 // do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> >(hash_table_15, groups[blk].ends_15, &buffer[blk], 15, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 15, BLOCK_SIZE/8, symbols); 146 // } 147 // if(bitblock::any(groups[blk].ends_16)) { 148 // do_block<SYMBOL, hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> >(hash_table_16, groups[blk].ends_16, &buffer[blk], 16, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 16, BLOCK_SIZE/8, symbols); 149 // } 150 // if(bitblock::any(ends_gte_17[blk])) { 151 // do_block<SYMBOL, hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, &starts[blk], &ends_gte_17[blk], &buffer[blk], (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], BLOCK_SIZE/8, symbols); 152 // } 153 151 154 } 152 155 … … 180 183 /* NOTE: C++ template code and Pablo generated length groups must coincide. */ 181 184 182 // Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB (high to low memory address)185 // Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB 183 186 template<class SYMBOL, class HASH_TABLE> 184 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth, 185 uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 186 SYMBOL * aos , const int32_t block_base){ 187 187 void do_block(uint32_t blk_offset, 188 HASH_TABLE & h_table, 189 BitBlock ends, 190 uint8_t buffer [], const uint32_t lgth, 191 uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 192 SYMBOL & symbols) { 193 194 gid_type gid; 188 195 int32_t spos; 189 ReverseScanner<BitBlock, scanword_t> rscanner(&ends); 190 191 rscanner.scan_to_next(); 192 spos = (rscanner.get_pos() - lgth); 193 194 while(!rscanner.is_done() && (spos >= 0)) { 195 196 // Comment out LOOKBACK_SIZE . Nigel's code *should* not produce this case. 197 aos[LOOKBACK_SIZE + block_base + spos].gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, h_lgth); 198 aos[LOOKBACK_SIZE + block_base + spos].lgth = lgth; 199 200 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 201 print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth); 202 #endif 203 rscanner.scan_to_next(); 204 spos = (rscanner.get_pos() - lgth); 205 } 206 207 // Comment out. Nigel's code *should* not produce this case. 208 if(!rscanner.is_done() && (spos < 0)) { // block boundary case. 209 210 if(lgth > (LOOKBACK_SIZE)) { 211 cerr << "Fatal Error."; 212 cerr << " Symbol length exceeds " << (LOOKBACK_SIZE) << " bytes."; 213 cerr << " Symbol tail : "; 214 cerr << string((char *)&(buffer[rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl; 215 abort(); 216 } 196 int32_t epos; 197 ForwardScanner<BitBlock, scanword_t> fscanner(&ends); 198 199 fscanner.scan_to_next(); 200 epos = fscanner.get_pos(); 201 spos = (epos - lgth); 202 203 if(!fscanner.is_done() && (spos < 0)) { // block boundary case 217 204 218 205 uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE; … … 222 209 uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 223 210 224 // Comment out LOOKBACK_SIZE . Nigel's code *should* not produce this case. 225 aos[LOOKBACK_SIZE + block_base + lb_spos].gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, h_lgth); 226 aos[LOOKBACK_SIZE + block_base + lb_spos].lgth = lgth; 211 gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, 212 lb_h0, lb_h1, h_lgth); 213 214 symbols.gids[blk_offset + spos] = gid; 215 216 epos = fscanner.scan_to_next(); 217 spos = (epos - lgth); 218 219 } 220 221 while(!fscanner.is_done() && (spos >= 0)) { 222 223 gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, h_lgth); 224 //symbols.gids[epos] = gid; 225 symbols.gids[blk_offset + spos] = gid; 227 226 228 227 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 229 print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth); 230 #endif 231 232 } 228 print_symbol_debug(buffer, spos, epos, lgth); 229 #endif 230 fscanner.scan_to_next(); 231 epos = fscanner.get_pos(); 232 spos = (epos - lgth); 233 } 234 233 235 } 234 236 … … 255 257 lgth = ends_rscanner.get_pos() - spos; 256 258 // Comment out LOOKBACK_SIZE . Nigel's code *should* not produce this case. 257 aos[LOOKBACK_SIZE + block_base + spos].gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, lgth); 258 aos[LOOKBACK_SIZE + block_base + spos].lgth = lgth; 259 // aos[LOOKBACK_SIZE + block_base + spos].gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, lgth); 260 // aos[LOOKBACK_SIZE + block_base + spos].lgth = lgth; 261 262 259 263 260 264 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG … … 304 308 305 309 // Comment out LOOKBACK_SIZE. Nigel's code *should* not produce this case. 306 aos[LOOKBACK_SIZE + block_base + lb_spos].gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, lgth);307 aos[LOOKBACK_SIZE + block_base + lb_spos].lgth = lgth;310 // aos[LOOKBACK_SIZE + block_base + lb_spos].gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, lgth); 311 // aos[LOOKBACK_SIZE + block_base + lb_spos].lgth = lgth; 308 312 309 313 if((lb_spos) < 0) { -
trunk/symbol_table/src/symbol_table.hpp
r1995 r2001 11 11 #include "../lib/bitblock.hpp" 12 12 #include "../lib/byte_pool.hpp" 13 #include "hash_table.hpp" 13 14 15 #include <vector> 14 16 #include <iostream> 15 17 using namespace std; … … 18 20 19 21 /////////////////////////////////////////////////////////////////////////// 20 // Symbol Type Definitions - Warning: No bounds checks.22 // Symbol Type - Array of gid_type 21 23 /////////////////////////////////////////////////////////////////////////// 24 22 25 class Symbol { 23 26 public: 24 // IDISA_ALWAYS_INLINE void add_symbol(gid_type gid, uint8_t * raw_data, uint32_t lgth, uint32_t idx=0); 25 // IDISA_ALWAYS_INLINE gid_type get_gid(uint32_t idx=0) const; 26 // IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx=0) const; 27 // IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx=0) const; 27 Symbol (uint32_t n) { 28 init(n); 29 } 30 31 void init(uint32_t n) { 32 gids.reserve(n); 33 //gids_idx.reserve((n/BLOCK_SIZE) + 1); 34 } 35 36 vector<gid_type> gids; 37 //vector<BitBlock> gids_idx; // gids index 28 38 }; 29 39 30 class AoS_symbol: public Symbol { // Xerces31 public:32 33 // IDISA_ALWAYS_INLINE void add_symbol(gid_type gid, uint8_t * raw_data, uint32_t lgth, uint32_t idx=0) {34 // this->gid = gid;35 // this->raw_data = raw_data;36 // this->lgth = lgth;37 // }38 39 // IDISA_ALWAYS_INLINE gid_type get_gid(uint32_t idx=0) const { return gid; }40 // IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx=0) const { return raw_data; }41 // IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx=0) const { return lgth; }42 43 //private:44 gid_type gid;45 uint8_t * raw_data;46 uint32_t lgth;47 };48 49 //class SoA_symbol: public Symbol { // Regex50 //public:51 52 // IDISA_ALWAYS_INLINE void add_symbol(gid_type gid, uint8_t * raw_data, uint32_t lgth, uint32_t idx=0) {53 // this->gid[idx] = gid;54 // this->raw_data[idx] = raw_data;55 // this->lgth[idx] = lgth;56 // }57 58 // IDISA_ALWAYS_INLINE gid_type get_gid(uint32_t idx=0) const { return this->gid[idx]; }59 // IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx=0) const { return this->raw_data[idx]; }60 // IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx=0) const { return this->lgth[idx]; }61 62 //private:63 // gid_type * gid;64 // uint8_t ** raw_data;65 // uint32_t * lgth;66 //};67 68 40 /////////////////////////////////////////////////////////////////////////// 69 // Symbol Table Definition41 // GID indexed POD array 70 42 /////////////////////////////////////////////////////////////////////////// 71 43 … … 73 45 public: 74 46 void resolve(uint8_t buffer [], Groups groups [], BitBlock starts [], BitBlock ends_gte_17 [], 75 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL * aos /*, const uint32_t symbols*/);47 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL & symbols /*, const uint32_t symbols*/); 76 48 77 49 //void resolve(uint8_t buffer [], Groups groups [], BitBlock starts [], BitBlock ends_gte_17 [], 78 // BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SoA_symbol & soa/*, const uint32_t symbols*/); 50 // BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SoA_symbol & soa/*, const uint32_t symbols*/); 51 52 IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx) const { return gid_data::get_raw_bytes(idx); } 53 IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx) const { return gid_data::get_bytes_lgth(idx); } 79 54 80 55 protected: 81 56 symbol_table() {} 82 57 ~symbol_table() {} 58 83 59 }; 84 60 85 template<class HASH_TABLE> 86 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth, 87 uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size); 61 template<class SYMBOL, class HASH_TABLE> 62 IDISA_ALWAYS_INLINE void do_block(uint32_t blk_offset, 63 HASH_TABLE & h_table, 64 BitBlock ends, 65 uint8_t buffer [], const uint32_t lgth, 66 uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 67 SYMBOL & symbols); 88 68 89 template<class HASH_TABLE> 90 IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [], 91 uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size); 69 70 //template<class HASH_TABLE> 71 //IDISA_ALWAYS_INLINE void do_block(uint32_t blk, HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [], 72 // uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size); 92 73 93 74 #endif // SYMBOL_TABLE_HPP
