source: trunk/symbol_table/src/id_symbol_table.hpp @ 1980

Last change on this file since 1980 was 1980, checked in by ksherdy, 7 years ago

Added debug information.

File size: 14.4 KB
Line 
1/*
2 * id_symbol_table.hpp - Identity Symbol Table Pablo Template
3 * Created on: 18-December-2011
4 * Author: Ken Herdy
5 *
6 * BitBlock type arguments must adhere to the 'full-block invariant'
7 * and mask partial block with null bytes.
8 *
9 * Number of length groups MUST coincide with the
10 * number compiler generated length groups.
11 *
12 */
13#ifndef ID_SYMBOL_TABLE_TEMPLATE_HPP
14#define ID_SYMBOL_TABLE_TEMPLATE_HPP
15
16//#define ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
17#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
18static void print_symbol_debug(const uint8_t buffer [], const uint32_t spos, const uint32_t epos, const uint32_t lgth) {
19    cout << "Symbol(";
20    cout << "Start: " << spos;
21    cout << ",End: " << epos;
22    cout << ",Lgth: " << lgth;
23    cout << ",Value:'" << string((char *)&(buffer[spos]), lgth);
24    cout << "')" << endl;
25}
26#endif
27
28#include "symbol_table.hpp"
29#include "hash_table.hpp"
30#include "buffer.hpp"
31#include "../lib/carryQ.hpp"
32#include "../lib/bitblock_iterator.hpp"
33#include "../lib/bitblock_scan.hpp"
34#include <cstdlib>
35
36template<class ALLOCATOR>
37class id_symbol_table:public symbol_table {
38public:
39    id_symbol_table()/*:hash_table_1(256)*/{}
40    ~id_symbol_table() {
41#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
42        hash_table_1.print_table();
43        hash_table_2.print_table();
44        hash_table_3.print_table();
45        hash_table_4.print_table();
46        hash_table_5.print_table();
47        hash_table_6.print_table();
48        hash_table_7.print_table();
49        hash_table_8.print_table();
50        hash_table_9.print_table();
51        hash_table_10.print_table();
52        hash_table_11.print_table();
53        hash_table_12.print_table();
54        hash_table_13.print_table();
55        hash_table_14.print_table();
56        hash_table_15.print_table();
57        hash_table_16.print_table();
58        hash_table_gte_17.print_table();
59#endif
60    }
61
62    // Groups & groups
63    void resolve(uint8_t buffer [], Groups groups [],  BitBlock starts [], BitBlock ends_gte_17 [],
64                 BitBlock h0 [], BitBlock h1 [], uint32_t blocks, AoS_symbol aos [], const uint32_t symbols) {
65
66        for(uint32_t blk=0;blk<blocks;blk++) {
67        ///////////////////////////////////////////////////////////////////////////////
68        // Byte Space Hash
69        ///////////////////////////////////////////////////////////////////////////////
70        if(bitblock::any(groups[blk].ends_1)) {
71        do_block<hash_table <identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> >(hash_table_1, groups[blk].ends_1, &buffer[blk*BLOCK_SIZE], 1, &buffer[blk*BLOCK_SIZE], &buffer[blk*BLOCK_SIZE], bytes2bits(1), BLOCK_SIZE);
72        }
73        if(bitblock::any(groups[blk].ends_2)) {
74        do_block<hash_table <identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> >(hash_table_2, groups[blk].ends_2, &buffer[blk*BLOCK_SIZE], 2, &buffer[blk*BLOCK_SIZE], &buffer[blk*BLOCK_SIZE], bytes2bits(2), BLOCK_SIZE);
75        }
76        if(bitblock::any(groups[blk].ends_3)) {
77        do_block<hash_table <identity_strategy_t<uint16_t,3>, hash_strategy_t<3>, ALLOCATOR> >(hash_table_3, groups[blk].ends_3, &buffer[blk*BLOCK_SIZE], 3, &buffer[blk*BLOCK_SIZE], &buffer[blk*BLOCK_SIZE], bytes2bits(3), BLOCK_SIZE);
78        }
79        if(bitblock::any(groups[blk].ends_4)) {
80        do_block<hash_table <identity_strategy_t<uint32_t,4>, hash_strategy_t<4>, ALLOCATOR> >(hash_table_4, groups[blk].ends_4, &buffer[blk*BLOCK_SIZE], 4, &buffer[blk*BLOCK_SIZE], &buffer[blk*BLOCK_SIZE], bytes2bits(4), BLOCK_SIZE);
81        }
82        if(bitblock::any(groups[blk].ends_5)) {
83        do_block<hash_table <identity_strategy_t<uint32_t,5>, hash_strategy_t<5>, ALLOCATOR> >(hash_table_5, groups[blk].ends_5, &buffer[blk*BLOCK_SIZE], 5, &buffer[blk*BLOCK_SIZE], &buffer[blk*BLOCK_SIZE], bytes2bits(5), BLOCK_SIZE);
84        }
85        if(bitblock::any(groups[blk].ends_6)) {
86        do_block<hash_table <identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> >(hash_table_6, groups[blk].ends_6, &buffer[blk*BLOCK_SIZE], 6, &buffer[blk*BLOCK_SIZE], &buffer[blk*BLOCK_SIZE], bytes2bits(6), BLOCK_SIZE);
87        }
88        if(bitblock::any(groups[blk].ends_7)) {
89        do_block<hash_table <identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> >(hash_table_7, groups[blk].ends_7, &buffer[blk*BLOCK_SIZE], 7, &buffer[blk*BLOCK_SIZE], &buffer[blk*BLOCK_SIZE], bytes2bits(7), BLOCK_SIZE);
90        }
91        ///////////////////////////////////////////////////////////////////////////////
92        // Bit Space Hash
93        ///////////////////////////////////////////////////////////////////////////////
94        if(bitblock::any(groups[blk].ends_8)) {
95        do_block<hash_table <identity_strategy_t<uint64_t,8>, hash_strategy_t<8>, ALLOCATOR> >(hash_table_8, groups[blk].ends_8, &buffer[blk*BLOCK_SIZE], 8, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 8, BLOCK_SIZE/8);
96        }
97        if(bitblock::any(groups[blk].ends_9)) {
98        do_block<hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> >(hash_table_9, groups[blk].ends_9, &buffer[blk*BLOCK_SIZE], 9, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 9, BLOCK_SIZE/8);
99        }
100        if(bitblock::any(groups[blk].ends_10)) {
101        do_block<hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> >(hash_table_10, groups[blk].ends_10, &buffer[blk*BLOCK_SIZE], 10, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 10, BLOCK_SIZE/8);
102        }
103        if(bitblock::any(groups[blk].ends_11)) {
104        do_block<hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> >(hash_table_11, groups[blk].ends_11, &buffer[blk*BLOCK_SIZE], 11, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 11, BLOCK_SIZE/8);
105        }
106        if(bitblock::any(groups[blk].ends_12)) {
107        do_block<hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> >(hash_table_12, groups[blk].ends_12, &buffer[blk*BLOCK_SIZE], 12, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 12, BLOCK_SIZE/8);
108        }
109        if(bitblock::any(groups[blk].ends_13)) {
110        do_block<hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> >(hash_table_13, groups[blk].ends_13, &buffer[blk*BLOCK_SIZE], 13, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 13, BLOCK_SIZE/8);
111        }
112        if(bitblock::any(groups[blk].ends_14)) {
113        do_block<hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> >(hash_table_14, groups[blk].ends_14, &buffer[blk*BLOCK_SIZE], 14, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 14, BLOCK_SIZE/8);
114        }
115        if(bitblock::any(groups[blk].ends_15)) {
116        do_block<hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> >(hash_table_15, groups[blk].ends_15, &buffer[blk*BLOCK_SIZE], 15, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 15, BLOCK_SIZE/8);
117        }
118        if(bitblock::any(groups[blk].ends_16)) {
119        do_block<hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> >(hash_table_16, groups[blk].ends_16, &buffer[blk*BLOCK_SIZE], 16, (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], 16, BLOCK_SIZE/8);
120        }
121        if(bitblock::any(ends_gte_17[blk])) {
122            print_register("s",starts[blk]);
123            print_register("e",ends_gte_17[blk]);
124        do_block<hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> >(hash_table_gte_17, &starts[blk], &ends_gte_17[blk], &buffer[blk*BLOCK_SIZE], (uint8_t *)&h0[blk], (uint8_t *)&h1[blk], BLOCK_SIZE/8);
125        }
126
127        }
128    }
129
130private:
131    ///////////////////////////////////////////////////////////////////////////////
132    // Byte Space Hash
133    ///////////////////////////////////////////////////////////////////////////////
134    hash_table<identity_strategy_t<uint8_t,1>, hash_strategy_t<1>, ALLOCATOR> hash_table_1;
135    hash_table<identity_strategy_t<uint16_t,2>, hash_strategy_t<2>, ALLOCATOR> hash_table_2;
136    hash_table<identity_strategy_t<uint16_t,3>, hash_strategy_t<3>, ALLOCATOR> hash_table_3;
137    hash_table<identity_strategy_t<uint32_t,4>, hash_strategy_t<4>, ALLOCATOR> hash_table_4;
138    hash_table<identity_strategy_t<uint32_t,5>, hash_strategy_t<5>, ALLOCATOR> hash_table_5;
139    hash_table<identity_strategy_t<uint32_t,6>, hash_strategy_t<6>, ALLOCATOR> hash_table_6;
140    hash_table<identity_strategy_t<uint32_t,7>, hash_strategy_t<7>, ALLOCATOR> hash_table_7;
141    ///////////////////////////////////////////////////////////////////////////////
142    // Bit Space Hash
143    ///////////////////////////////////////////////////////////////////////////////
144    hash_table<identity_strategy_t<uint64_t,8>, hash_strategy_t<8>, ALLOCATOR> hash_table_8;
145    hash_table<identity_strategy_t<uint64_t,9>, hash_strategy_d, ALLOCATOR> hash_table_9;
146    hash_table<identity_strategy_t<uint64_t,10>, hash_strategy_d, ALLOCATOR> hash_table_10;
147    hash_table<identity_strategy_t<uint64_t,11>, hash_strategy_d, ALLOCATOR> hash_table_11;
148    hash_table<identity_strategy_t<uint64_t,12>, hash_strategy_d, ALLOCATOR> hash_table_12;
149    hash_table<identity_strategy_t<uint64_t,13>, hash_strategy_d, ALLOCATOR> hash_table_13;
150    hash_table<identity_strategy_t<uint64_t,14>, hash_strategy_d, ALLOCATOR> hash_table_14;
151    hash_table<identity_strategy_t<uint64_t,15>, hash_strategy_d, ALLOCATOR> hash_table_15;
152    hash_table<identity_strategy_t<BitBlock,16>, hash_strategy_d, ALLOCATOR> hash_table_16;
153    hash_table<identity_strategy_d, hash_strategy_d, ALLOCATOR> hash_table_gte_17;
154
155};
156
157/* NOTE: C++ template code and Pablo generated length groups must coincide. */
158
159// Fixed Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB (high to low memory address)
160template<class HASH_TABLE>
161IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth,
162                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size) {
163
164    int32_t spos;
165    ReverseScanner<BitBlock, scanword_t> rscanner(&ends);
166
167    rscanner.scan_to_next();
168    spos = (rscanner.get_pos() - lgth);
169
170    while(!rscanner.is_done() && (spos >= 0)) {
171
172        h_table.lookup_or_insert(buffer, spos, lgth,
173                                 h0, h1, h_lgth);
174
175#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
176        print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth);
177#endif
178        rscanner.scan_to_next();
179        spos = (rscanner.get_pos() - lgth);
180    }
181
182    if(!rscanner.is_done() && (spos < 0)) { // block boundary case
183
184        assert(lgth <= (LOOKBACK_SIZE+BLOCK_SIZE)); // segment boundary
185        if(lgth > (LOOKBACK_SIZE+BLOCK_SIZE)) {
186            cerr << "Fatal Error.";
187            cerr << " Symbol length exceeds " << (LOOKBACK_SIZE+BLOCK_SIZE) << " bytes.";
188            cerr << " Symbol tail : ";
189            cerr << string((char *)&(buffer[rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl;
190            abort();
191        }
192
193        uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE;
194        int32_t lb_spos = BLOCK_SIZE - ((-1*spos) & (BLOCK_SIZE-1));
195
196        uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
197        uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
198
199        h_table.lookup_or_insert(lb_buffer, lb_spos, lgth,
200                                 lb_h0, lb_h1, h_lgth);
201
202#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
203        print_symbol_debug(buffer, spos, rscanner.get_pos(), lgth);
204#endif
205
206    }
207}
208
209// Variable Lengths - REVERSE SCAN LOGIC - Scan each BLOCK MSB to LSB (high to low memory address)
210template<class HASH_TABLE>
211IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock starts [], BitBlock ends [], uint8_t buffer [],
212                                  uint8_t h0 [], uint8_t h1 [], const uint32_t h_block_size) {
213
214    uint32_t lgth;
215    ReverseScanner<BitBlock, scanword_t> ends_rscanner(ends);
216    ReverseScanner<BitBlock, scanword_t> starts_rscanner(starts);
217
218    cout << ends_rscanner.scan_to_next() << ",";
219    cout << starts_rscanner.scan_to_next() << endl;
220
221    while(!starts_rscanner.is_done() && (starts_rscanner.get_pos() > ends_rscanner.get_pos())) { // synchronize
222         cout << starts_rscanner.scan_to_next() << endl;
223    }
224
225    while((!ends_rscanner.is_done()) && (!starts_rscanner.is_done())) {
226        lgth = ends_rscanner.get_pos() - starts_rscanner.get_pos();
227        h_table.lookup_or_insert(buffer, starts_rscanner.get_pos(), lgth, h0, h1, lgth);
228
229#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
230    print_symbol_debug(buffer, starts_rscanner.get_pos(), ends_rscanner.get_pos(), lgth);
231#endif
232
233
234
235        cout << ends_rscanner.scan_to_next() << ",";
236        cout << starts_rscanner.scan_to_next() << endl;
237
238
239    }
240
241    if((!ends_rscanner.is_done()) && (starts_rscanner.is_done())) { // block boundary case, previous block
242
243        uint32_t lb_blk = 0;
244        while(1) {
245            starts_rscanner.init(--starts);
246            starts_rscanner.scan_to_next();
247
248            if(!starts_rscanner.is_done()) {
249                break;
250            }
251
252            if (lb_blk > LOOKBACK_BLOCKS) {
253                cerr << lb_blk << endl;
254                cerr << "Fatal Error.";
255                cerr << " Symbol length exceeds " << (LOOKBACK_SIZE+BLOCK_SIZE) << " bytes." << endl;
256                cerr << " Symbol tail : ";
257                cerr << string((char *)&(buffer[starts_rscanner.get_pos()-(LOOKBACK_SIZE+BLOCK_SIZE)]), LOOKBACK_SIZE+BLOCK_SIZE) << endl;
258                abort();
259            }
260            lb_blk++;
261        }
262
263        lgth = ends_rscanner.get_pos() + (BLOCK_SIZE - starts_rscanner.get_pos()) + lb_blk * BLOCK_SIZE;
264
265        uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE;
266        int32_t lb_spos = BLOCK_SIZE - ((-1*starts_rscanner.get_pos()) & (BLOCK_SIZE-1));
267
268        uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
269        uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
270
271        h_table.lookup_or_insert(lb_buffer, lb_spos, lgth, lb_h0, lb_h1, lgth);
272
273#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
274        print_symbol_debug(lb_buffer, lb_spos, ends_rscanner.get_pos(), lgth);
275#endif
276
277    }
278}
279
280// Fixed Lengths - FORWARD SCAN LOGIC - Scan each BLOCK LSB to MSB (low to high memory address)
281//template<class HASH_TABLE>
282//IDISA_ALWAYS_INLINE void do_block(HASH_TABLE & h_table, BitBlock ends, uint8_t buffer [], const uint32_t lgth,
283//                                uint8_t h0 [], uint8_t h1 [], const uint32_t h_lgth, const uint32_t h_block_size) {
284
285//    int32_t spos;
286//    int32_t epos;
287//    ForwardScanner<BitBlock, scanword_t> fscanner(&ends);
288
289//    epos = fscanner.scan_to_next();
290//    spos = (epos - lgth);
291
292//    if((epos != -1) && (spos < 0)) { // block boundary case
293
294//      assert(lgth<=LOOKBACK_SIZE); // segment boundary
295
296//      uint8_t * lb_buffer = buffer - ((lgth / BLOCK_SIZE) + 1)*BLOCK_SIZE;
297//      int32_t lb_spos = BLOCK_SIZE - ((-1*spos) & (BLOCK_SIZE-1));
298
299//      uint8_t * lb_h0 = h0 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
300//      uint8_t * lb_h1 = h1 - ((lgth / BLOCK_SIZE) + 1)*h_block_size;
301
302//      h_table.lookup_or_insert(lb_buffer, lb_spos, lgth,
303//                               lb_h0, lb_h1, h_lgth);
304
305//#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
306//      cout << "Symbol(";
307//      cout << "End:" << epos;
308//      cout << ",Start:" << lb_spos;
309//      cout << ",Text:'" << string((char *)&(lb_buffer[lb_spos]), lgth);
310//      cout << "')" << endl;
311//#endif
312//      epos = fscanner.scan_to_next();
313//      spos = (epos - lgth);
314
315//    }
316
317//    while(epos != -1) {
318
319//      h_table.lookup_or_insert(buffer, spos, lgth,
320//                               h0, h1, h_lgth);
321
322//#ifdef ID_SYMBOL_TABLE_TEMPLATE_HPP_DEBUG
323//      cout << "Symbol(";
324//      cout << "End:" << epos;
325//      cout << ",Start:" << spos;
326//      cout << ",Text:'" << string((char *)&(buffer[spos]), lgth);
327//      cout << "')" << endl;
328//#endif
329//      epos = fscanner.scan_to_next();
330//      spos = (epos - lgth);
331
332//    }
333//}
334
335#endif // ID_SYMBOL_TABLE_TEMPLATE_HPP
336
337
Note: See TracBrowser for help on using the repository browser.