Changeset 2034 for trunk/symbol_table/src/id_symbol_table.hpp
 Timestamp:
 Apr 16, 2012, 5:49:46 PM (7 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/symbol_table/src/id_symbol_table.hpp
r2032 r2034 23 23 #ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG 24 24 static void print_symbol_debug(gid_type gid, const uint8_t buffer [], const int32_t spos, const uint32_t epos, const uint32_t lgth) { 25 26 27 28 29 30 31 25 cout << "{Symbol:{"; 26 cout << "GID:" << gid; 27 cout << ",Length:" << lgth; 28 cout << ",Value:'" << string((char *)&(buffer[spos]), lgth) << "'"; 29 cout << ",Start:" << spos; 30 cout << ",End:" << epos; 31 cout << "}}" << endl; 32 32 } 33 33 #endif … … 37 37 class id_symbol_table: public symbol_table<SYMBOL> { 38 38 public: 39 40 39 id_symbol_table()/*:hash_table_1(256)*/{} 40 ~id_symbol_table() { 41 41 // hash_table_1.print_table(); 42 42 // hash_table_2.print_table(); … … 76 76 hash_table_gte_17.print_diagnostics(); 77 77 #endif 78 } 79 80 // Groups & groups 81 void resolve(uint8_t buffer [], Groups groups [], BitBlock starts [], BitBlock ends_gte_17 [], 82 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL & symbols) { 83 84 uint32_t blk_offset; 85 86 for(uint32_t blk=0;blk<blocks;blk++) { 87 88 blk_offset = blk * BLOCKSIZE; 89 /////////////////////////////////////////////////////////////////////////////// 90 // Byte Space Hash 91 /////////////////////////////////////////////////////////////////////////////// 92 if(bitblock::any(groups[blk].ends_1)) { 93 do_block<SYMBOL, hash_table <identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> > 94 (blk_offset, 95 hash_table_1, 96 groups[blk].ends_1, 97 &buffer[blk_offset], 1, /* buffer, symbol length */ 98 &buffer[blk_offset], &buffer[blk_offset], bytes2bits(1), BLOCK_SIZE, /* h0, h1, hash lgth (bits), hash block size (bits) */ 78 } 79 80 // Groups & groups 81 void resolve(uint8_t buffer [], Groups groups [], BitBlock starts [], BitBlock ends_gte_17 [], 82 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, SYMBOL & symbols) { 83 84 for(uint32_t blk = 0; blk < blocks; blk++) { 85 const uint32_t blk_offset = blk * BLOCKSIZE; 86 resolve(blk_offset, &buffer[blk_offset], groups[blk], &starts[blk], &ends_gte_17[blk], h0[blk], h1[blk], symbols); 87 } 88 } 89 90 // Groups & groups 91 IDISA_ALWAYS_INLINE 92 void resolve(uint32_t blk_offset, uint8_t buffer [], Groups groups, BitBlock starts [], BitBlock ends_gte_17[], 93 BitBlock h0, BitBlock h1, SYMBOL & symbols) { 94 95 /////////////////////////////////////////////////////////////////////////////// 96 // Byte Space Hash 97 /////////////////////////////////////////////////////////////////////////////// 98 #define BYTE_HASH(LENGTH_GROUP, COMPARISON_TYPE) \ 99 if(bitblock::any(groups.ends_##LENGTH_GROUP)) { \ 100 do_block<SYMBOL, hash_table <identity_strategy_t<COMPARISON_TYPE,LENGTH_GROUP>, hash_strategy_t<LENGTH_GROUP>, ALLOCATOR> > \ 101 (blk_offset, \ 102 hash_table_##LENGTH_GROUP, \ 103 groups.ends_##LENGTH_GROUP, \ 104 buffer, LENGTH_GROUP, /* buffer, symbol length */ \ 105 buffer, buffer, bytes2bits(LENGTH_GROUP), BLOCK_SIZE, /* h0, h1, hash lgth (bits), hash block size (bits) */ \ 106 symbols, this>gid_factory, this>gid_data); \ 107 } 108 109 BYTE_HASH(1, uint8_t); 110 BYTE_HASH(2, uint16_t); 111 BYTE_HASH(3, uint16_t); 112 BYTE_HASH(4, uint32_t); 113 BYTE_HASH(5, uint32_t); 114 BYTE_HASH(6, uint32_t); 115 BYTE_HASH(7, uint32_t); 116 117 #undef BYTE_HASH 118 119 /////////////////////////////////////////////////////////////////////////////// 120 // Bit Space Hash 121 /////////////////////////////////////////////////////////////////////////////// 122 if(bitblock::any(groups.ends_8)) { 123 do_block<SYMBOL, hash_table <identity_strategy_t<uint64_t,8>, hash_strategy_d, ALLOCATOR> > 124 (blk_offset, 125 hash_table_8, 126 groups.ends_8, buffer, 8, 127 (uint8_t *)&h0, (uint8_t *)&h1, 8, BLOCK_SIZE/8, 128 symbols, this>gid_factory, this>gid_data); 129 } 130 if(bitblock::any(groups.ends_9)) { 131 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> > 132 (blk_offset, 133 hash_table_9, 134 groups.ends_9, buffer, 9, 135 (uint8_t *)&h0, (uint8_t *)&h1, 9, BLOCK_SIZE/8, 136 symbols, this>gid_factory, this>gid_data); 137 } 138 if(bitblock::any(groups.ends_10)) { 139 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> > 140 (blk_offset, 141 hash_table_10, 142 groups.ends_10, buffer, 10, 143 (uint8_t *)&h0, (uint8_t *)&h1, 10, BLOCK_SIZE/8, 144 symbols, this>gid_factory, this>gid_data); 145 } 146 if(bitblock::any(groups.ends_11)) { 147 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> > 148 (blk_offset, 149 hash_table_11, 150 groups.ends_11, buffer, 11, 151 (uint8_t *)&h0, (uint8_t *)&h1, 11, BLOCK_SIZE/8, 152 symbols, this>gid_factory, this>gid_data); 153 } 154 if(bitblock::any(groups.ends_12)) { 155 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> > 156 (blk_offset, 157 hash_table_12, 158 groups.ends_12, buffer, 12, 159 (uint8_t *)&h0, (uint8_t *)&h1, 12, BLOCK_SIZE/8, 160 symbols, this>gid_factory, this>gid_data); 161 } 162 if(bitblock::any(groups.ends_13)) { 163 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> > 164 (blk_offset, 165 hash_table_13, 166 groups.ends_13, buffer, 13, 167 (uint8_t *)&h0, (uint8_t *)&h1, 13, BLOCK_SIZE/8, 168 symbols, this>gid_factory, this>gid_data); 169 } 170 if(bitblock::any(groups.ends_14)) { 171 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> > 172 (blk_offset, 173 hash_table_14, 174 groups.ends_14, buffer, 14, 175 (uint8_t *)&h0, (uint8_t *)&h1, 14, BLOCK_SIZE/8, 176 symbols, this>gid_factory, this>gid_data); 177 } 178 if(bitblock::any(groups.ends_15)) { 179 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> > 180 (blk_offset, 181 hash_table_15, 182 groups.ends_15, buffer, 15, 183 (uint8_t *)&h0, (uint8_t *)&h1, 15, BLOCK_SIZE/8, 184 symbols, this>gid_factory, this>gid_data); 185 } 186 if(bitblock::any(groups.ends_16)) { 187 do_block<SYMBOL, hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> > 188 (blk_offset, 189 hash_table_16, 190 groups.ends_16, buffer, 16, 191 (uint8_t *)&h0, (uint8_t *)&h1, 16, BLOCK_SIZE/8, 192 symbols, this>gid_factory, this>gid_data); 193 } 194 if(bitblock::any(groups.ends_gte_17)) { 195 do_block<SYMBOL, hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> > 196 (blk_offset, 197 hash_table_gte_17, 198 starts, ends_gte_17, 199 buffer, 200 (uint8_t *)&h0, (uint8_t *)&h1, 17, BLOCK_SIZE/8, 99 201 symbols, this>gid_factory, this>gid_data); 100 } 101 if(bitblock::any(groups[blk].ends_2)) { 102 do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> > 103 (blk_offset, 104 hash_table_2, 105 groups[blk].ends_2, 106 &buffer[blk_offset], 2, 107 &buffer[blk_offset], &buffer[blk_offset], bytes2bits(2), BLOCK_SIZE, 108 symbols, this>gid_factory, this>gid_data); 109 } 110 if(bitblock::any(groups[blk].ends_3)) { 111 do_block<SYMBOL, hash_table <identity_strategy_t<uint16_t,3>, hash_strategy_t<3>, ALLOCATOR> > 112 (blk_offset, 113 hash_table_3, 114 groups[blk].ends_3, 115 &buffer[blk_offset], 3, 116 &buffer[blk_offset], &buffer[blk_offset], bytes2bits(3), BLOCK_SIZE, 117 symbols, this>gid_factory, this>gid_data); 118 } 119 if(bitblock::any(groups[blk].ends_4)) { 120 do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,4>, hash_strategy_t<4>, ALLOCATOR> > 121 (blk_offset, 122 hash_table_4, 123 groups[blk].ends_4, 124 &buffer[blk_offset], 4, 125 &buffer[blk_offset], &buffer[blk_offset], bytes2bits(4), BLOCK_SIZE, 126 symbols, this>gid_factory, this>gid_data); 127 } 128 if(bitblock::any(groups[blk].ends_5)) { 129 do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,5>, hash_strategy_t<5>, ALLOCATOR> > 130 (blk_offset, 131 hash_table_5, 132 groups[blk].ends_5, 133 &buffer[blk_offset], 5, 134 &buffer[blk_offset], &buffer[blk_offset], bytes2bits(5), BLOCK_SIZE, 135 symbols, this>gid_factory, this>gid_data); 136 } 137 if(bitblock::any(groups[blk].ends_6)) { 138 do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> > 139 (blk_offset, 140 hash_table_6, 141 groups[blk].ends_6, 142 &buffer[blk_offset], 6, 143 &buffer[blk_offset], &buffer[blk_offset], bytes2bits(6), BLOCK_SIZE, 144 symbols, this>gid_factory, this>gid_data); 145 } 146 if(bitblock::any(groups[blk].ends_7)) { 147 do_block<SYMBOL, hash_table <identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> > 148 (blk_offset, 149 hash_table_7, 150 groups[blk].ends_7, 151 &buffer[blk_offset], 7, 152 &buffer[blk_offset], &buffer[blk_offset], bytes2bits(7), BLOCK_SIZE, 153 symbols, this>gid_factory, this>gid_data); 154 } 155 /////////////////////////////////////////////////////////////////////////////// 156 // Bit Space Hash 157 /////////////////////////////////////////////////////////////////////////////// 158 if(bitblock::any(groups[blk].ends_8)) { 159 do_block<SYMBOL, hash_table <identity_strategy_t<uint64_t,8>, hash_strategy_d, ALLOCATOR> > 160 (blk_offset, 161 hash_table_8, 162 groups[blk].ends_8, &buffer[blk_offset], 8, 163 (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 8, BLOCK_SIZE/8, 164 symbols, this>gid_factory, this>gid_data); 165 } 166 if(bitblock::any(groups[blk].ends_9)) { 167 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> > 168 (blk_offset, 169 hash_table_9, 170 groups[blk].ends_9, &buffer[blk_offset], 9, 171 (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 9, BLOCK_SIZE/8, 172 symbols, this>gid_factory, this>gid_data); 173 } 174 if(bitblock::any(groups[blk].ends_10)) { 175 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> > 176 (blk_offset, 177 hash_table_10, 178 groups[blk].ends_10, &buffer[blk_offset], 10, 179 (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 10, BLOCK_SIZE/8, 180 symbols, this>gid_factory, this>gid_data); 181 } 182 if(bitblock::any(groups[blk].ends_11)) { 183 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> > 184 (blk_offset, 185 hash_table_11, 186 groups[blk].ends_11, &buffer[blk_offset], 11, 187 (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 11, BLOCK_SIZE/8, 188 symbols, this>gid_factory, this>gid_data); 189 } 190 if(bitblock::any(groups[blk].ends_12)) { 191 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> > 192 (blk_offset, 193 hash_table_12, 194 groups[blk].ends_12, &buffer[blk_offset], 12, 195 (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 12, BLOCK_SIZE/8, 196 symbols, this>gid_factory, this>gid_data); 197 } 198 if(bitblock::any(groups[blk].ends_13)) { 199 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> > 200 (blk_offset, 201 hash_table_13, 202 groups[blk].ends_13, &buffer[blk_offset], 13, 203 (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 13, BLOCK_SIZE/8, 204 symbols, this>gid_factory, this>gid_data); 205 } 206 if(bitblock::any(groups[blk].ends_14)) { 207 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> > 208 (blk_offset, 209 hash_table_14, 210 groups[blk].ends_14, &buffer[blk_offset], 14, 211 (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 14, BLOCK_SIZE/8, 212 symbols, this>gid_factory, this>gid_data); 213 } 214 if(bitblock::any(groups[blk].ends_15)) { 215 do_block<SYMBOL, hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> > 216 (blk_offset, 217 hash_table_15, 218 groups[blk].ends_15, &buffer[blk_offset], 15, 219 (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 15, BLOCK_SIZE/8, 220 symbols, this>gid_factory, this>gid_data); 221 } 222 if(bitblock::any(groups[blk].ends_16)) { 223 do_block<SYMBOL, hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> > 224 (blk_offset, 225 hash_table_16, 226 groups[blk].ends_16, &buffer[blk_offset], 16, 227 (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 16, BLOCK_SIZE/8, 228 symbols, this>gid_factory, this>gid_data); 229 } 230 if(bitblock::any(groups[blk].ends_gte_17)) { 231 do_block<SYMBOL, hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> > 232 (blk_offset, 233 hash_table_gte_17, 234 &starts[blk], &ends_gte_17[blk], 235 &buffer[blk_offset], 236 (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 17, BLOCK_SIZE/8, 237 symbols, this>gid_factory, this>gid_data); 238 } 239 } 240 } 202 } 203 } 241 204 242 205 private: 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 206 /////////////////////////////////////////////////////////////////////////////// 207 // Byte Space Hash 208 /////////////////////////////////////////////////////////////////////////////// 209 hash_table<identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> hash_table_1; 210 hash_table<identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> hash_table_2; 211 hash_table<identity_strategy_t<uint16_t,3>, hash_strategy_t<3>, ALLOCATOR> hash_table_3; 212 hash_table<identity_strategy_t<uint32_t,4>, hash_strategy_t<4>, ALLOCATOR> hash_table_4; 213 hash_table<identity_strategy_t<uint32_t,5>, hash_strategy_t<5>, ALLOCATOR> hash_table_5; 214 hash_table<identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> hash_table_6; 215 hash_table<identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> hash_table_7; 216 /////////////////////////////////////////////////////////////////////////////// 217 // Bit Space Hash 218 /////////////////////////////////////////////////////////////////////////////// 219 hash_table<identity_strategy_t<uint64_t,8>, hash_strategy_d, ALLOCATOR> hash_table_8; 220 hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> hash_table_9; 221 hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> hash_table_10; 222 hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> hash_table_11; 223 hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> hash_table_12; 224 hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> hash_table_13; 225 hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> hash_table_14; 226 hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> hash_table_15; 227 hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> hash_table_16; 228 hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> hash_table_gte_17; 266 229 }; 267 230 … … 271 234 template<class SYMBOL, class HASH_TABLE> 272 235 void do_block(uint32_t blk_offset, 273 274 275 276 277 236 HASH_TABLE & h_table, 237 BitBlock ends, 238 uint8_t buffer [], const uint32_t lgth, 239 uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 240 SYMBOL & symbols, GIDFactory & gid_factory, GIDData & gid_data) { 278 241 279 242 uint8_t * buffer_base = buffer; … … 286 249 uint32_t blk_count; 287 250 288 289 290 291 292 293 while(!rscanner.is_done()) { 294 295 251 ReverseScanner<BitBlock, scanword_t> rscanner(&ends); 252 253 rscanner.scan_to_next(); 254 epos = rscanner.get_pos(); 255 256 while(!rscanner.is_done()) { 257 258 spos = epos  lgth; 296 259 297 260 if(spos < 0) { // boundary case 298 261 spos = (BLOCK_SIZE  (1 * spos)) & (BLOCK_SIZE  1); 299 blk_count = (lgth/BLOCK_SIZE)+1; 300 buffer_base = (BLOCK_SIZE * blk_count); 262 blk_count = (lgth/BLOCK_SIZE)+1; 263 buffer_base = (BLOCK_SIZE * blk_count); 301 264 h0_base = (h_block_size * blk_count); 302 265 h1_base = (h_block_size * blk_count); … … 311 274 312 275 rscanner.scan_to_next(); 313 276 epos = rscanner.get_pos(); 314 277 } 315 278 } … … 346 309 347 310 while(!ends_rscanner.is_done()) { 348 311 349 312 starts_rscanner.move_to(epos); 350 313 starts_rscanner.scan_to_next(); … … 355 318 starts_base; 356 319 357 358 359 360 361 362 320 blk_count++; 321 322 starts_rscanner.init(starts_base); 323 starts_rscanner.scan_to_next(); 324 325 if(!starts_rscanner.is_done()) { // found start 363 326 lgth = epos + (BLOCK_SIZE  starts_rscanner.get_pos()) + (BLOCK_SIZE * (blk_count1)); 364 365 366 // buffer_base = (BLOCK_SIZE * blk_count); 327 // spos = (BLOCK_SIZE  (1 * spos)) & (BLOCK_SIZE  1); 328 329 // buffer_base = (BLOCK_SIZE * blk_count); 367 330 //spos = epos  lgth; 368 331 spos = starts_rscanner.get_pos(); … … 372 335 h1_base = (h_block_size * blk_count); 373 336 break; 374 337 } 375 338 376 339 } … … 394 357 /* 395 358 void do_block(uint32_t blk_offset, 396 397 398 399 400 401 402 403 404 405 406 407 408 409 359 HASH_TABLE & h_table, 360 BitBlock ends, 361 uint8_t buffer [], const uint32_t lgth, 362 uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size, 363 SYMBOL & symbols, GIDFactory & gid_factory, GIDData & gid_data) { 364 365 gid_type gid; 366 int32_t spos; 367 int32_t epos; 368 ForwardScanner<BitBlock, scanword_t> fscanner(&ends); 369 370 fscanner.scan_to_next(); 371 epos = fscanner.get_pos(); 372 spos = (epos  lgth); 410 373 411 374 if(!fscanner.is_done() && (spos < 0) ) { // block boundary case … … 435 398 spos = (epos  lgth); 436 399 437 400 } 438 401 439 402 while(!fscanner.is_done()) { … … 449 412 epos = fscanner.get_pos(); 450 413 spos = (epos  lgth); 451 414 } 452 415 453 416 }
Note: See TracChangeset
for help on using the changeset viewer.