source: trunk/symbol_table/main_template.cpp @ 2092

Last change on this file since 2092 was 2092, checked in by ksherdy, 7 years ago

Refactored id,div2,log2. All tests pass.

File size: 8.6 KB
Line 
1/*
2 * Created on: 18-December-2011
3 * Author: Ken Herdy
4 *
5 * A simple test driver that perform an 'identity' translation of CSV (single) seperate symbol values
6 * and prints results to standard output.
7 *
8 * Length sorted symbol table main.
9 *
10 * Lookahead versus Lookback
11 *
12 * The current implementation applies bit stream length grouping based on 'end' markers.
13 * In a sense, 'end' markers are precomputed 'lookahead'.
14 * True 'lookahead' would compute the current block and number of 'lookahead' position and
15 * support 'shift back' and to mark the 'start' rather than the 'end' positions of lexical items.
16 *
17 * In any case, the current implementation 'expects' that the previous block will be located in a contiguous
18 * memory location that may be indexed as some negative offset of the base address of the current
19 * block.
20 *
21 * Further, to reduce complexity in processing, although structs of BitBlock types are not stored
22 * contiguously in memory, BitBlock struct members are copied into contiguous memory positions.
23 *
24 * Design Considerations.
25 *
26 * (1) Hash table memory allocation.
27 * (2) Negative shift values.
28 * (3) Whether to specialized Hash/Symbol classes on symbol length...
29 *
30 */
31
32#include "transpose.hpp"
33#include "buffer.hpp"
34#include "../lib/bitblock.hpp"
35#include "../lib/allocator.hpp"
36#include "../lib/s2p.hpp"
37#include "../lib/perflib/perfsec.h"
38#include "../lib/bitblock_scan.hpp"
39#include "marker_strms.hpp"     // GENERATED HEADER
40#include "hash_strms.hpp"       // GENERATED HEADER
41#include "group_strms.hpp"      // GENERATED HEADER
42#include "symbol_table.hpp"
43#include <string>
44#include <iostream>
45#include <fstream>
46#include <sstream>
47using namespace std;
48
49#ifdef BUFFER_PROFILING
50    BOM_Table * parser_timer;
51#elif CODE_CLOCKER
52    #define NUM_EVENTS 1
53    int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
54    //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
55    //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
56    int cal_size = 20;
57    CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
58#else
59    void * parser_timer;
60#endif
61
62int main(int argc, char * argv[]) {
63
64    if (argc < 2) {
65            cout << "Usage: " << argv[0] << " <filename>" << endl;
66            exit(-1);
67    }
68
69    stringstream filename;
70    filename << argv[1];
71
72    ifstream is;
73    is.open (filename.str().c_str(), ios::binary);
74
75    if (!is) {
76        cerr << "Error: " << filename << endl;
77        abort();
78    }
79
80    // PERF_SEC_BIND(1);
81    PERF_SEC_INIT(parser_timer);
82
83    ///////////////////////////////////////////////////////////////////////////
84    // Stream Definitions
85    ///////////////////////////////////////////////////////////////////////////
86
87    // Byte Segments - Raw byte streams - With lookback.
88    BitBlock aligned_buffer[SEGMENT_ALLOC_SIZE];
89    uint8_t * lookback = (uint8_t *)aligned_buffer;
90    memset(lookback,0,LOOKBACK_SIZE);
91    uint8_t * raw_buffer = &lookback[LOOKBACK_SIZE];
92
93    // Bit Segments - Hash bit streams - With lookback.
94
95    // hash 0
96    BitBlock aligned_h0[SEGMENT_ALLOC_SIZE/8];
97    BitBlock * lookback_h0 = (BitBlock *) aligned_h0;
98    memset(lookback_h0,0,LOOKBACK_SIZE/BLOCK_SIZE);
99    BitBlock * h0 = &lookback_h0[LOOKBACK_SIZE/BLOCK_SIZE];
100
101    // hash 1
102    BitBlock aligned_h1[SEGMENT_ALLOC_SIZE/8];
103    BitBlock * lookback_h1 = (BitBlock *) aligned_h1;
104    memset(lookback_h1,0,LOOKBACK_SIZE/BLOCK_SIZE);
105    BitBlock * h1 = &lookback_h1[LOOKBACK_SIZE/BLOCK_SIZE];
106
107    // starts
108    BitBlock aligned_starts[SEGMENT_ALLOC_SIZE/8];
109    BitBlock * lookback_starts = (BitBlock *) aligned_starts;
110    memset(lookback_starts,0,LOOKBACK_SIZE/BLOCK_SIZE);
111    BitBlock * starts = &lookback_starts[LOOKBACK_SIZE/BLOCK_SIZE];
112
113    // ends_gte_17
114    BitBlock aligned_ends_gte_17[SEGMENT_ALLOC_SIZE/8];
115    BitBlock * lookback_ends_gte_17 = (BitBlock *) aligned_ends_gte_17;
116    memset(lookback_ends_gte_17,0,LOOKBACK_SIZE/BLOCK_SIZE);
117    BitBlock * ends_gte_17 = &lookback_ends_gte_17[LOOKBACK_SIZE/BLOCK_SIZE];
118
119    // BitSteams - Without lookback
120    Basis_bits basis_bits[SEGMENT_BLOCKS];
121    Markers markers[SEGMENT_BLOCKS];
122    Hash hash[SEGMENT_BLOCKS];
123    Groups groups[SEGMENT_BLOCKS];
124
125    // Symbol Table
126    gid<SEGMENT_SIZE> gids;
127    symbol_table<gid<SEGMENT_SIZE>, fast_pool_allocator<1024> > st;
128
129    is.read ((char *)raw_buffer, SEGMENT_SIZE);
130    uint32_t chars_avail = is.gcount();
131
132    ///////////////////////////////////////////////////////////////////////////
133    // Full Segments
134    ///////////////////////////////////////////////////////////////////////////
135    while (chars_avail >= SEGMENT_SIZE) {
136        uint32_t blk;
137        for(blk=0;blk<SEGMENT_BLOCKS;blk++) {
138        s2p_do_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk]);    // transpose
139        markers_do_block(basis_bits[blk], markers[blk]);                            // gen symbol spans, mark starts & follows
140        hash_strms_do_block(basis_bits[blk], hash[blk]);                            // gen hash bit streams
141        identity_group_do_block(markers[blk], groups[blk]);                         // sort marker bit stream (identity)
142    }
143
144    for(int blk=0;blk<SEGMENT_BLOCKS;blk++) {   // write contiguous bit streams
145        h0[blk] = hash[blk].h0;
146        h1[blk] = hash[blk].h1;
147        starts[blk] = groups[blk].starts;
148        ends_gte_17[blk] = groups[blk].ends_gte_17;
149    }
150
151    PERF_SEC_START(parser_timer);
152    st.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, SEGMENT_BLOCKS, gids);
153    PERF_SEC_END(parser_timer, SEGMENT_SIZE);
154
155    // copy loopback bytes
156    memmove(lookback,&raw_buffer[SEGMENT_SIZE-LOOKBACK_SIZE],LOOKBACK_SIZE);
157    // copy loopback bits
158    memmove(lookback_h0,&((uint8_t *)h0)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
159    memmove(lookback_h1,&((uint8_t *)h1)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
160
161    memmove(lookback_starts,&((uint8_t *)starts)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
162    memmove(lookback_ends_gte_17,&((uint8_t *)ends_gte_17)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
163
164    //lookback_h0[0] = h0[SEGMENT_BLOCKS-1];
165    //lookback_h1[0] = h1[SEGMENT_BLOCKS-1];
166    is.read ((char *)(raw_buffer), SEGMENT_SIZE);
167    chars_avail = is.gcount();
168
169    // test
170    #ifdef IDENTITY_TEST
171    uint32_t blk_offset;
172    for(int blk=0;blk<SEGMENT_BLOCKS;blk++) {
173        blk_offset = blk * BLOCKSIZE;
174        gid_type gid;
175        ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts));
176
177        fscanner.scan_to_next();
178        while(!fscanner.is_done()) {
179            gid = gids.at[fscanner.get_pos() + blk_offset];
180            cout << string((char *)st.get_raw_data(gid), st.get_lgth(gid)) << ",";
181            fscanner.scan_to_next();
182        }
183    }
184    #endif
185    }
186
187    /* Resolve Partial Segments */
188    uint32_t remaining = chars_avail;
189
190    ///////////////////////////////////////////////////////////////////////////
191    // Full blocks
192    ///////////////////////////////////////////////////////////////////////////
193    uint32_t blk = 0;
194    while (remaining >= BLOCK_SIZE) {
195        s2p_do_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk]);
196        markers_do_block(basis_bits[blk], markers[blk]);
197        hash_strms_do_block(basis_bits[blk], hash[blk]);
198        identity_group_do_block(markers[blk], groups[blk]);
199        blk++;
200        remaining -= BLOCK_SIZE;
201    }
202
203    ///////////////////////////////////////////////////////////////////////////
204    // Final partial block or any carry
205    ///////////////////////////////////////////////////////////////////////////
206    if (remaining > 0 || @marker_strms_any_carry || @group_strms_any_carry /*|| hash_strms_any_carry*/) {
207        BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
208        s2p_do_final_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk], EOF_mask);
209        markers_do_final_block(basis_bits[blk], markers[blk], EOF_mask);
210        hash_strms_do_final_block(basis_bits[blk], hash[blk], EOF_mask);
211        identity_group_do_final_block(markers[blk], groups[blk], EOF_mask);
212        blk++;
213    }
214
215    uint32_t segment_blocks = blk;
216    for(int blk=0;blk<segment_blocks;blk++) { // write contiguous hash bit streams
217        h0[blk] = hash[blk].h0;
218        h1[blk] = hash[blk].h1;
219        starts[blk] = groups[blk].starts;
220        ends_gte_17[blk] = groups[blk].ends_gte_17;
221    }
222
223    //PERF_SEC_START(parser_timer);
224    st.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, segment_blocks, gids);
225    //PERF_SEC_END(parser_timer, chars_avail+1);
226
227    #ifdef IDENTITY_TEST
228    uint32_t blk_offset;
229    for(int blk=0;blk<segment_blocks;blk++) {
230        blk_offset = blk * BLOCKSIZE;
231        gid_type gid;
232        ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts));
233
234        fscanner.scan_to_next();
235        while(!fscanner.is_done()) {
236                gid = gids.at[fscanner.get_pos() + blk_offset];
237                cout << string((char *)st.get_raw_data(gid), st.get_lgth(gid)) << ",";
238                fscanner.scan_to_next();
239        }
240    }
241    #endif
242
243    #ifdef GID_TEST
244        cout << st.get_max_gid() << endl;
245    #endif
246
247    PERF_SEC_DUMP(parser_timer);
248    PERF_SEC_DESTROY(parser_timer);
249
250    is.close();
251
252    return 1;
253}
254
255
Note: See TracBrowser for help on using the repository browser.