Changeset 2052
- Timestamp:
- Apr 24, 2012, 7:37:05 PM (7 years ago)
- Location:
- trunk/symbol_table
- Files:
-
- 1 deleted
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/symbol_table/main_template.cpp
r2029 r2052 32 32 #include "transpose.hpp" 33 33 #include "buffer.hpp" 34 #include "gid.hpp" 34 35 #include "../lib/bitblock.hpp" 35 36 #include "../lib/allocator.hpp" … … 40 41 #include "hash_strms.hpp" // GENERATED HEADER 41 42 #include "id_group_strms.hpp" // GENERATED HEADER 42 #include " id_symbol_table.hpp"43 #include "symbol_table.hpp" 43 44 #include <string> 44 45 #include <iostream> … … 127 128 128 129 Symbol symbols(SYMBOL_COUNT); 129 id_symbol_table<Symbol, fast_pool_allocator<1024> > symbol_table;130 symbol_table<Symbol, fast_pool_allocator<1024> > st; 130 131 131 132 is.read ((char *)raw_buffer, SEGMENT_SIZE); … … 152 153 153 154 PERF_SEC_START(parser_timer); 154 s ymbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, SEGMENT_BLOCKS, symbols /*, SYMBOL_COUNT*/);155 st.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, SEGMENT_BLOCKS, symbols /*, SYMBOL_COUNT*/); 155 156 PERF_SEC_END(parser_timer, SEGMENT_SIZE); 156 157 … … 179 180 while(!fscanner.is_done()) { 180 181 gid = symbols.gids[fscanner.get_pos() + blk_offset]; 181 cout << string((char *)s ymbol_table.get_raw_data(gid), symbol_table.get_lgth(gid)) << ",";182 cout << string((char *)st.get_raw_data(gid), st.get_lgth(gid)) << ","; 182 183 fscanner.scan_to_next(); 183 184 } … … 221 222 222 223 //PERF_SEC_START(parser_timer); 223 s ymbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, segment_size, symbols/*, SYMBOL_COUNT*/);224 st.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, segment_size, symbols/*, SYMBOL_COUNT*/); 224 225 //PERF_SEC_END(parser_timer, chars_avail+1); 225 226 … … 233 234 while(!fscanner.is_done()) { 234 235 gid = symbols.gids[fscanner.get_pos() + blk_offset]; 235 cout << string((char *)s ymbol_table.get_raw_data(gid), symbol_table.get_lgth(gid)) << ",";236 cout << string((char *)st.get_raw_data(gid), st.get_lgth(gid)) << ","; 236 237 fscanner.scan_to_next(); 237 238 } -
trunk/symbol_table/src/Makefile
r2040 r2052 15 15 TEST_ROOT=../test 16 16 17 all: basis_bits.hpp buffer.hpp byte_pool.hpp hash_strms.hpp hash_table.hpp id_group_strms.hpp id_symbol_table.hpp main.cpp Makefile marker_strms.hpp symbol_table.hpp transpose.hpp17 all: basis_bits.hpp buffer.hpp byte_pool.hpp hash_strms.hpp hash_table.hpp id_group_strms.hpp symbol_table.hpp main.cpp Makefile marker_strms.hpp symbol_table.hpp transpose.hpp 18 18 $(CC) -o main main.cpp $(AFLAGS) #-DID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG #-DHASH_TABLE_HPP_DEBUG # -DBUFFER_PROFILING 19 19 -
trunk/symbol_table/src/hash_table.hpp
r2045 r2052 91 91 } 92 92 93 IDISA_ALWAYS_INLINE gid_type lookup_or_insert(uint8_t * raw_bytes, const uint32_t idx, const uint32_t raw_byte_lgth, 94 uint8_t * h0, uint8_t * h1, const uint32_t hash_bit_lgth, 95 GIDFactory & gid_factory, GIDData & gid_data) { 96 97 uint64_t bucket = get_bucket(h0,h1,idx,hash_bit_lgth); 98 99 node * crt = table[bucket]; 100 node * prev = crt; 101 102 /* lookup */ 103 while(NULL != crt) { 104 if(compare_strategy.compare(&raw_bytes[idx], crt->raw_bytes, raw_byte_lgth)) { 105 return crt->gid; 106 } 107 prev = crt; 108 crt = crt->next; 109 #ifdef HASH_TABLE_HPP_DEBUG 110 collisions++; 111 #endif 112 } 113 114 /* insert */ 115 gid_type gid = gid_factory.next(); 116 117 uint64_t x0 = bit_slice(h0, idx, hash_bit_lgth); 118 uint64_t x1 = bit_slice(h1, idx, hash_bit_lgth); 119 120 uint8_t * data_pool_raw_bytes = raw_data_pool.insert(&raw_bytes[idx],raw_byte_lgth); // persist 121 122 insert( bucket, 123 data_pool_raw_bytes, 124 raw_byte_lgth, 125 raw_data_pool.insert((uint8_t *)&x0, bits2bytes(hash_bit_lgth)), 126 raw_data_pool.insert((uint8_t *)&x1, bits2bytes(hash_bit_lgth)), 127 hash_bit_lgth, 128 gid); 129 130 if(expansion_test(table[bucket])) { 131 132 #ifdef HASH_TABLE_HPP_DEBUG 133 this->table_expansions++; 134 #endif 135 136 expand(); 137 } 138 139 #ifdef HASH_TABLE_HPP_DEBUG 140 elements++; 141 #endif 142 143 gid_data.add_data(data_pool_raw_bytes,raw_byte_lgth); 144 145 return gid; 146 } 93 // IDISA_ALWAYS_INLINE gid_type lookup_or_insert(uint8_t * raw_bytes, const uint32_t idx, const uint32_t raw_byte_lgth, 94 // uint8_t * h0, uint8_t * h1, const uint32_t hash_bit_lgth, 95 // GIDFactory & gid_factory, GIDData & gid_data); 147 96 148 97 void print_table() const { … … 206 155 } 207 156 208 pr ivate:157 protected: 209 158 // ----- Members ----- 210 159 uint32_t table_size; // power of 2 … … 326 275 } 327 276 } 328 277 } 278 }; 279 280 template<class COMPARE_STRATEGY, class HASH_STRATEGY, class ALLOCATOR> 281 class id_hash_table : public hash_table<COMPARE_STRATEGY, HASH_STRATEGY, ALLOCATOR> { 282 283 public: 284 285 IDISA_ALWAYS_INLINE gid_type lookup_or_insert(uint8_t * raw_bytes, const uint32_t idx, const uint32_t raw_byte_lgth, 286 uint8_t * h0, uint8_t * h1, const uint32_t hash_bit_lgth, 287 GIDFactory & gid_factory, GIDData & gid_data) { 288 289 uint64_t bucket = this->get_bucket(h0,h1,idx,hash_bit_lgth); 290 291 node * crt = this->table[bucket]; 292 node * prev = crt; 293 294 /* lookup */ 295 while(NULL != crt) { 296 if(this->compare_strategy.compare(&raw_bytes[idx], crt->raw_bytes, raw_byte_lgth)) { 297 return crt->gid; 298 } 299 prev = crt; 300 crt = crt->next; 301 #ifdef HASH_TABLE_HPP_DEBUG 302 collisions++; 303 #endif 304 } 305 306 /* insert */ 307 gid_type gid = gid_factory.next(); 308 309 uint64_t x0 = bit_slice(h0, idx, hash_bit_lgth); 310 uint64_t x1 = bit_slice(h1, idx, hash_bit_lgth); 311 312 uint8_t * data_pool_raw_bytes = this->raw_data_pool.insert(&raw_bytes[idx],raw_byte_lgth); // persist 313 314 insert( bucket, 315 data_pool_raw_bytes, 316 raw_byte_lgth, 317 this->raw_data_pool.insert((uint8_t *)&x0, bits2bytes(hash_bit_lgth)), 318 this->raw_data_pool.insert((uint8_t *)&x1, bits2bytes(hash_bit_lgth)), 319 hash_bit_lgth, 320 gid); 321 322 if(this->expansion_test(this->table[bucket])) { 323 324 #ifdef HASH_TABLE_HPP_DEBUG 325 this->table_expansions++; 326 #endif 327 328 this->expand(); 329 } 330 331 #ifdef HASH_TABLE_HPP_DEBUG 332 elements++; 333 #endif 334 335 gid_data.add_data(data_pool_raw_bytes,raw_byte_lgth); 336 337 return gid; 329 338 } 330 339 -
trunk/symbol_table/src/symbol_table.hpp
r2029 r2052 1 1 /* 2 * id_symbol_table.hpp 2 3 * Created on: 18-December-2011 3 4 * Author: Ken Herdy 4 5 * 5 * Segment-at-a-time symbol table. 6 * BitBlock type arguments must adhere to the 'full-block invariant' 7 * and mask partial block with null bytes. 8 * 9 * Number of length groups must coincide with the 10 * number compiler generated length groups. 6 11 * 7 12 */ 8 #ifndef SYMBOL_TABLE_HPP 9 #define SYMBOL_TABLE_HPP 10 11 #include "../lib/bitblock.hpp" 12 #include "../lib/byte_pool.hpp" 13 #ifndef ID_SYMBOL_TABLE_TEMPLATE_HPP 14 #define ID_SYMBOL_TABLE_TEMPLATE_HPP 15 16 #include "buffer.hpp" 13 17 #include "gid.hpp" 14 18 #include "hash_table.hpp" 15 19 #include "../lib/carryQ.hpp" 20 #include "../lib/bitblock_iterator.hpp" 21 #include "../lib/bitblock_scan.hpp" 22 #include <cstdlib> 16 23 #include <vector> 17 #include <iostream>18 24 using namespace std; 19 25 26 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 27 static void print_symbol_debug(gid_type gid, const uint8_t buffer [], const int32_t spos, const uint32_t epos, const uint32_t lgth) { 28 cout << "{Symbol:{"; 29 cout << "GID:" << gid; 30 cout << ",Length:" << lgth; 31 cout << ",Value:'" << string((char *)&(buffer[spos]), lgth) << "'"; 32 cout << ",Start:" << spos; 33 cout << ",End:" << epos; 34 cout << "}}" << endl; 35 } 36 #endif 37 20 38 /////////////////////////////////////////////////////////////////////////// 21 // Symbol Type - Array gids.39 // Symbol Type - do_block() 22 40 /////////////////////////////////////////////////////////////////////////// 23 24 class Symbol {25 public:26 Symbol (uint32_t n) {27 init(n);28 }29 30 void init(uint32_t n) {31 gids.reserve(n);32 //gids_idx.reserve((n/BLOCK_SIZE) + 1);33 }34 35 vector<gid_type> gids;36 //vector<BitBlock> gids_idx; // gids index37 };38 39 template <class SYMBOL> class symbol_table{40 public:41 void resolve(uint8_t buffer [], Groups groups [], BitBlock starts [], BitBlock ends_gte_17 [],42 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL & symbols /*, const uint32_t symbols*/);43 44 IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx) const { return gid_data.get_raw_bytes(idx); }45 IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx) const { return gid_data.get_bytes_lgth(idx); }46 47 protected:48 symbol_table() {}49 ~symbol_table() {}50 51 GIDFactory gid_factory;52 GIDData gid_data;53 };54 55 41 template<class SYMBOL, class HASH_TABLE> 56 42 void do_block(uint32_t blk_offset, … … 69 55 SYMBOL & symbols, GIDFactory & gid_factory, GIDData & gid_data); 70 56 71 #endif // SYMBOL_TABLE_HPP 72 57 /////////////////////////////////////////////////////////////////////////// 58 // Symbol Type - Array gids. 59 /////////////////////////////////////////////////////////////////////////// 60 61 class Symbol { 62 public: 63 Symbol (uint32_t n) { 64 init(n); 65 } 66 67 void init(uint32_t n) { 68 gids.reserve(n); 69 //gids_idx.reserve((n/BLOCK_SIZE) + 1); 70 } 71 72 vector<gid_type> gids; 73 //vector<BitBlock> gids_idx; // gids index 74 }; 75 76 77 // TODO - Refactor as a single mixed symbol table class composed of Id, Div2, Log2 hash tables. 78 template<class SYMBOL, class ALLOCATOR> 79 class symbol_table { 80 public: 81 symbol_table()/*:hash_table_1(256)*/{} 82 ~symbol_table() { 83 // hash_table_1.print_table(); 84 // hash_table_2.print_table(); 85 // hash_table_3.print_table(); 86 // hash_table_4.print_table(); 87 // hash_table_5.print_table(); 88 // hash_table_6.print_table(); 89 // hash_table_7.print_table(); 90 // hash_table_8.print_table(); 91 // hash_table_9.print_table(); 92 // hash_table_10.print_table(); 93 // hash_table_11.print_table(); 94 // hash_table_12.print_table(); 95 // hash_table_13.print_table(); 96 // hash_table_14.print_table(); 97 // hash_table_15.print_table(); 98 // hash_table_16.print_table(); 99 // hash_table_gte_17.print_table(); 100 #ifdef HASH_TABLE_HPP_DEBUG 101 hash_table_1.print_diagnostics(); 102 hash_table_2.print_diagnostics(); 103 hash_table_3.print_diagnostics(); 104 hash_table_4.print_diagnostics(); 105 hash_table_5.print_diagnostics(); 106 hash_table_6.print_diagnostics(); 107 hash_table_7.print_diagnostics(); 108 hash_table_8.print_diagnostics(); 109 hash_table_9.print_diagnostics(); 110 hash_table_10.print_diagnostics(); 111 hash_table_11.print_diagnostics(); 112 hash_table_12.print_diagnostics(); 113 hash_table_13.print_diagnostics(); 114 hash_table_14.print_diagnostics(); 115 hash_table_15.print_diagnostics(); 116 hash_table_16.print_diagnostics(); 117 hash_table_gte_17.print_diagnostics(); 118 #endif 119 } 120 121 // Groups & groups 122 void resolve(uint8_t buffer [], Groups groups [], BitBlock starts [], BitBlock ends_gte_17 [], 123 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL & symbols) { 124 125 for(uint32_t blk = 0; blk < blocks; blk++) { 126 const uint32_t blk_offset = blk * BLOCKSIZE; 127 resolve(blk_offset, &buffer[blk_offset], groups[blk], &starts[blk], &h0[blk], &h1[blk], symbols); 128 } 129 } 130 131 // Groups & groups 132 IDISA_ALWAYS_INLINE 133 void resolve(uint32_t blk_offset, uint8_t buffer [], Groups & groups, BitBlock starts[], 134 BitBlock * h0, BitBlock * h1, SYMBOL & symbols) { 135 136 /////////////////////////////////////////////////////////////////////////////// 137 // Byte Space Hash 138 /////////////////////////////////////////////////////////////////////////////// 139 #define BYTE_HASH(LGTH, TYPE) \ 140 if(bitblock::any(groups.ends_##LGTH)) { \ 141 do_block<SYMBOL, id_hash_table <identity_strategy_t<LGTH,TYPE>, hash_strategy_t<LGTH>, ALLOCATOR> > \ 142 (blk_offset, \ 143 hash_table_##LGTH, \ 144 groups.ends_##LGTH, \ 145 buffer, LGTH, /* buffer, symbol length */ \ 146 buffer, buffer, bytes2bits(LGTH), BLOCK_SIZE, /* h0, h1, hash lgth (bits), hash block size (bits) */ \ 147 symbols, this->gid_factory, this->gid_data); \ 148 } 149 150 BYTE_HASH(1, uint8_t); 151 BYTE_HASH(2, uint16_t); 152 BYTE_HASH(3, uint16_t); 153 BYTE_HASH(4, uint32_t); 154 BYTE_HASH(5, uint32_t); 155 BYTE_HASH(6, uint32_t); 156 BYTE_HASH(7, uint32_t); 157 158 #undef BYTE_HASH 159 160 /////////////////////////////////////////////////////////////////////////////// 161 // Bit Space Hash 162 /////////////////////////////////////////////////////////////////////////////// 163 #define BIT_HASH(LGTH, TYPE) \ 164 if(bitblock::any(groups.ends_##LGTH)) { \ 165 do_block<SYMBOL, id_hash_table <identity_strategy_t<LGTH,TYPE>, hash_strategy_t<LGTH>, ALLOCATOR> > \ 166 (blk_offset, \ 167 hash_table_##LGTH, \ 168 groups.ends_##LGTH, \ 169 buffer, LGTH, \ 170 (uint8_t *)h0, (uint8_t *)h1, LGTH, (BLOCK_SIZE / 8), \ 171 symbols, this->gid_factory, this->gid_data); \ 172 } 173 174 BIT_HASH(8, uint64_t); 175 BIT_HASH(9, uint64_t); 176 BIT_HASH(10, uint64_t); 177 BIT_HASH(11, uint64_t); 178 BIT_HASH(12, uint64_t); 179 BIT_HASH(13, uint64_t); 180 BIT_HASH(14, uint64_t); 181 BIT_HASH(15, uint64_t); 182 BIT_HASH(16, BitBlock); 183 184 #undef BIT_HASH 185 186 if(bitblock::any(groups.ends_gte_17)) { 187 do_block<SYMBOL, id_hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> > 188 (blk_offset, 189 hash_table_gte_17, 190 starts, &groups.ends_gte_17, 191 buffer, 192 (uint8_t *)h0, (uint8_t *)h1, 17, BLOCK_SIZE/8, 193 symbols, this->gid_factory, this->gid_data); 194 } 195 } 196 197 IDISA_ALWAYS_INLINE uint8_t * get_raw_data(uint32_t idx) const { return gid_data.get_raw_bytes(idx); } 198 IDISA_ALWAYS_INLINE uint32_t get_lgth(uint32_t idx) const { return gid_data.get_bytes_lgth(idx); } 199 200 private: 201 202 GIDFactory gid_factory; 203 GIDData gid_data; 204 205 /////////////////////////////////////////////////////////////////////////////// 206 // Byte Space Hash 207 /////////////////////////////////////////////////////////////////////////////// 208 id_hash_table<identity_strategy_t<1, uint8_t>, hash_strategy_t<1>, ALLOCATOR> hash_table_1; 209 id_hash_table<identity_strategy_t<2, uint16_t>, hash_strategy_t<2>, ALLOCATOR> hash_table_2; 210 id_hash_table<identity_strategy_t<3, uint16_t>, hash_strategy_t<3>, ALLOCATOR> hash_table_3; 211 id_hash_table<identity_strategy_t<4, uint32_t>, hash_strategy_t<4>, ALLOCATOR> hash_table_4; 212 id_hash_table<identity_strategy_t<5, uint32_t>, hash_strategy_t<5>, ALLOCATOR> hash_table_5; 213 id_hash_table<identity_strategy_t<6, uint32_t>, hash_strategy_t<6>, ALLOCATOR> hash_table_6; 214 id_hash_table<identity_strategy_t<7, uint32_t>, hash_strategy_t<7>, ALLOCATOR> hash_table_7; 215 // /////////////////////////////////////////////////////////////////////////////// 216 // // Bit Space Hash 217 // /////////////////////////////////////////////////////////////////////////////// 218 id_hash_table<identity_strategy_t<8, uint64_t>, hash_strategy_t<8>, ALLOCATOR> hash_table_8; 219 id_hash_table<identity_strategy_t<9, uint64_t>, hash_strategy_t<9>, ALLOCATOR> hash_table_9; 220 id_hash_table<identity_strategy_t<10, uint64_t>, hash_strategy_t<10>, ALLOCATOR> hash_table_10; 221 id_hash_table<identity_strategy_t<11, uint64_t>, hash_strategy_t<11>, ALLOCATOR> hash_table_11; 222 id_hash_table<identity_strategy_t<12, uint64_t>, hash_strategy_t<12>, ALLOCATOR> hash_table_12; 223 id_hash_table<identity_strategy_t<13, uint64_t>, hash_strategy_t<13>, ALLOCATOR> hash_table_13; 224 id_hash_table<identity_strategy_t<14, uint64_t>, hash_strategy_t<14>, ALLOCATOR> hash_table_14; 225 id_hash_table<identity_strategy_t<15, uint64_t>, hash_strategy_t<15>, ALLOCATOR> hash_table_15; 226 id_hash_table<identity_strategy_t<16, BitBlock>, hash_strategy_t<16>, ALLOCATOR> hash_table_16; 227 id_hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> hash_table_gte_17; 228 }; 229 230 /* NOTE: C++ template code and Pablo generated length groups must coincide. */ 231 232 // Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB 233 template<class SYMBOL, class HASH_TABLE> 234 void do_block(uint32_t blk_offset, 235 HASH_TABLE & h_table, 236 BitBlock ends, 237 uint8_t buffer [], const uint32_t lgth, 238 uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 239 SYMBOL & symbols, GIDFactory & gid_factory, GIDData & gid_data) { 240 241 uint8_t * buffer_base = buffer; 242 uint8_t * h0_base = h0; 243 uint8_t * h1_base = h1; 244 245 gid_type gid; 246 int32_t epos; 247 int32_t spos; 248 uint32_t blk_count; 249 250 ReverseScanner<BitBlock, scanword_t> rscanner(&ends); 251 252 rscanner.scan_to_next(); 253 epos = rscanner.get_pos(); 254 255 while(!rscanner.is_done()) { 256 257 spos = epos - lgth; 258 259 if(spos < 0) { // boundary case 260 spos = (BLOCK_SIZE - (-1 * spos)) & (BLOCK_SIZE - 1); 261 blk_count = (lgth/BLOCK_SIZE)+1; 262 buffer_base -= (BLOCK_SIZE * blk_count); 263 h0_base -= (h_block_size * blk_count); 264 h1_base -= (h_block_size * blk_count); 265 } 266 267 assert (spos >= 0); 268 269 gid = h_table.lookup_or_insert(buffer_base, spos, lgth, h0_base, h1_base, h_lgth, gid_factory, gid_data); // WARNING: spos must be >= 0 270 271 #ifdef ID_SYMBOL_STORE_SYMBOL_GIDS_AT_END_POSITION 272 symbols.gids[blk_offset + epos] = gid; 273 #else 274 symbols.gids[blk_offset + epos - lgth] = gid; 275 #endif 276 277 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 278 print_symbol_debug(gid, buffer_base, spos, epos, lgth); 279 #endif 280 281 rscanner.scan_to_next(); 282 epos = rscanner.get_pos(); 283 } 284 } 285 286 287 // Variable Lengths, reverse scanner logic 288 // Precondition: A symbol end is marked iff a symbol start is marked within a buffer segment. 289 template<class SYMBOL, class HASH_TABLE> 290 void do_block(uint32_t blk_offset, 291 HASH_TABLE & h_table, 292 BitBlock starts [], BitBlock ends [], 293 uint8_t buffer [], 294 uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 295 SYMBOL & symbols, GIDFactory & gid_factory, GIDData & gid_data) { 296 297 BitBlock * starts_base = starts; 298 uint8_t * buffer_base = buffer; 299 uint8_t * h0_base = h0; 300 uint8_t * h1_base = h1; 301 302 gid_type gid; 303 int32_t epos; 304 int32_t spos; 305 uint32_t lgth; 306 uint32_t blk_count = 0; 307 308 ReverseScanner<BitBlock, scanword_t> ends_rscanner(ends); 309 ReverseScanner<BitBlock, scanword_t> starts_rscanner(starts); 310 311 ends_rscanner.scan_to_next(); 312 epos = ends_rscanner.get_pos(); 313 314 while(!ends_rscanner.is_done()) { 315 316 starts_rscanner.move_to(epos); 317 starts_rscanner.scan_to_next(); 318 spos = starts_rscanner.get_pos(); 319 lgth = epos - spos; 320 321 while(starts_rscanner.is_done()) { // boundary case 322 starts_base--; 323 324 blk_count++; 325 326 starts_rscanner.init(starts_base); 327 starts_rscanner.scan_to_next(); 328 329 if(!starts_rscanner.is_done()) { // found start 330 lgth = epos + (BLOCK_SIZE - starts_rscanner.get_pos()) + (BLOCK_SIZE * (blk_count-1)); 331 // spos = (BLOCK_SIZE - (-1 * spos)) & (BLOCK_SIZE - 1); 332 333 // buffer_base -= (BLOCK_SIZE * blk_count); 334 //spos = epos - lgth; 335 spos = starts_rscanner.get_pos(); 336 337 buffer_base -= (BLOCK_SIZE * blk_count); 338 h0_base -= (h_block_size * blk_count); 339 h1_base -= (h_block_size * blk_count); 340 break; 341 } 342 343 } 344 345 gid = h_table.lookup_or_insert(buffer_base, spos, lgth, h0_base, h1_base, h_lgth, gid_factory, gid_data); // WARNING: spos must be >= 0 346 347 #ifdef ID_SYMBOL_STORE_SYMBOL_GIDS_AT_END_POSITION 348 symbols.gids[blk_offset + epos] = gid; 349 #else 350 symbols.gids[blk_offset + epos - lgth] = gid; 351 #endif 352 353 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 354 //print_symbol_debug(gid, buffer, spos, epos, lgth); 355 print_symbol_debug(gid, buffer_base, spos, epos, lgth); 356 #endif 357 358 ends_rscanner.scan_to_next(); 359 epos = ends_rscanner.get_pos(); 360 } 361 } 362 363 #endif // ID_SYMBOL_TABLE_TEMPLATE_HPP 364 365 // 73 366 /* 74 template<class SYMBOL, class ALLOCATOR> 75 class div2: public symbol_table_strategy<SYMBOL, ALLOCATOR> { 76 77 public: 78 div2() {} 79 ~div2() {} 80 81 void resolve(...) { 82 cout << "div2 Strategy" << endl; 83 } 84 //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, SoA_symbol & symbols); 85 }; 367 void do_block(uint32_t blk_offset, 368 HASH_TABLE & h_table, 369 BitBlock ends, 370 uint8_t buffer [], const uint32_t lgth, 371 uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 372 SYMBOL & symbols, GIDFactory & gid_factory, GIDData & gid_data) { 373 374 gid_type gid; 375 int32_t spos; 376 int32_t epos; 377 ForwardScanner<BitBlock, scanword_t> fscanner(&ends); 378 379 fscanner.scan_to_next(); 380 epos = fscanner.get_pos(); 381 spos = (epos - lgth); 382 383 if(!fscanner.is_done() && (spos < 0) ) { // block boundary case 384 385 //////////////////////////////////////////////////////////////////// 386 // Start - Review boundary logic 387 //////////////////////////////////////////////////////////////////// 388 uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE; 389 int32_t lb_spos = (BLOCK_SIZE - (-1*spos)) & (BLOCK_SIZE-1); 390 391 uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 392 uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size; 393 394 gid = h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, h_lgth, gid_factory, gid_data); 395 396 symbols.gids[blk_offset + spos] = gid; 397 //////////////////////////////////////////////////////////////////// 398 // End 399 //////////////////////////////////////////////////////////////////// 400 401 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 402 print_symbol_debug(gid, lb_buffer, lb_spos, epos, lgth); 403 #endif 404 405 fscanner.scan_to_next(); 406 epos = fscanner.get_pos(); 407 spos = (epos - lgth); 408 409 } 410 411 while(!fscanner.is_done()) { 412 413 gid = h_table.lookup_or_insert(buffer, spos, lgth, h0, h1, h_lgth, gid_factory, gid_data); 414 symbols.gids[blk_offset + spos] = gid; 415 416 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 417 print_symbol_debug(gid, buffer, spos, epos, lgth); 418 #endif 419 420 fscanner.scan_to_next(); 421 epos = fscanner.get_pos(); 422 spos = (epos - lgth); 423 } 424 425 } 86 426 */ 87 427 88 /*89 template<class SYMBOL, class ALLOCATOR>90 class log2: public symbol_table_strategy<SYMBOL, ALLOCATOR> {91 92 public:93 log2() {}94 ~log2() {}95 96 void resolve(...) {97 cout << "log2 Strategy" << endl;98 }99 //void resolve(uint8_t * segment, uint32_t segment_blocks, BitBlock * starts, BitBlock * ends, SoA_symbol & symbols);100 };101 */102 -
trunk/symbol_table/symbol_table.pro
r2045 r2052 62 62 hash_table.hpp \ 63 63 src/transpose.hpp \ 64 src/symbol_table.hpp \65 64 src/marker_strms.hpp \ 66 src/id_symbol_table.hpp \67 65 src/id_group_strms.hpp \ 68 66 src/hash_table.hpp \ … … 76 74 lib/hash.hpp \ 77 75 src/gid.hpp \ 78 lib/byte_compare.hpp 76 lib/byte_compare.hpp \ 77 lib/allocator.hpp \ 78 src/symbol_table.hpp
Note: See TracChangeset
for help on using the changeset viewer.