source: trunk/symbol_table/src/main.cpp @ 2106

Last change on this file since 2106 was 2106, checked in by ksherdy, 7 years ago

Added bit / byte strategy. Added length test to hash table.

File size: 8.8 KB
Line 
1/*
2 * Created on: 18-December-2011
3 * Author: Ken Herdy
4 *
5 * A simple test driver that perform an 'identity' translation of CSV (single) seperate symbol values
6 * and prints results to standard output.
7 *
8 * Length sorted symbol table main.
9 *
10 * Lookahead versus Lookback
11 *
12 * The current implementation applies bit stream length grouping based on 'end' markers.
13 * In a sense, 'end' markers are precomputed 'lookahead'.
14 * True 'lookahead' would compute the current block and number of 'lookahead' position and
15 * support 'shift back' and to mark the 'start' rather than the 'end' positions of lexical items.
16 *
17 * In any case, the current implementation 'expects' that the previous block will be located in a contiguous
18 * memory location that may be indexed as some negative offset of the base address of the current
19 * block.
20 *
21 * Further, to reduce complexity in processing, although structs of BitBlock types are not stored
22 * contiguously in memory, BitBlock struct members are copied into contiguous memory positions.
23 *
24 * Design Considerations.
25 *
26 * (1) Hash table memory allocation.
27 * (2) Negative shift values.
28 * (3) Whether to specialized Hash/Symbol classes on symbol length...
29 *
30 */
31
32#undef IDISA_ALWAYS_INLINE
33
34#include "transpose.hpp"
35#include "buffer.hpp"
36#include "../lib/bitblock.hpp"
37#include "../lib/allocator.hpp"
38#include "../lib/s2p.hpp"
39#include "../lib/perflib/perfsec.h"
40#include "../lib/bitblock_scan.hpp"
41#include "marker_strms.hpp"     // GENERATED HEADER
42#include "hash_strms.hpp"       // GENERATED HEADER
43#include "group_strms.hpp"      // GENERATED HEADER
44#include "symbol_table.hpp"
45#include <string>
46#include <iostream>
47#include <fstream>
48#include <sstream>
49using namespace std;
50
51#ifdef BUFFER_PROFILING
52    BOM_Table * parser_timer;
53#elif CODE_CLOCKER
54    #define NUM_EVENTS 1
55    int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
56    //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
57    //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
58    int cal_size = 20;
59    CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
60#else
61    void * parser_timer;
62#endif
63
64int main(int argc, char * argv[]) {
65
66    if (argc < 2) {
67            cout << "Usage: " << argv[0] << " <filename>" << endl;
68            exit(-1);
69    }
70
71    stringstream filename;
72    filename << argv[1];
73
74    ifstream is;
75    is.open (filename.str().c_str(), ios::binary);
76
77    if (!is) {
78        cerr << "Error: " << filename << endl;
79        abort();
80    }
81
82    // PERF_SEC_BIND(1);
83    PERF_SEC_INIT(parser_timer);
84
85    ///////////////////////////////////////////////////////////////////////////
86    // Stream Definitions
87    ///////////////////////////////////////////////////////////////////////////
88
89    // Byte Segments - Raw byte streams - With lookback.
90    BitBlock aligned_buffer[SEGMENT_ALLOC_SIZE];
91    uint8_t * lookback = (uint8_t *)aligned_buffer;
92    memset(lookback,0,LOOKBACK_SIZE);
93    uint8_t * raw_buffer = &lookback[LOOKBACK_SIZE];
94
95    // Bit Segments - Hash bit streams - With lookback.
96
97    // hash 0
98    BitBlock aligned_h0[SEGMENT_ALLOC_SIZE/8];
99    BitBlock * lookback_h0 = (BitBlock *) aligned_h0;
100    memset(lookback_h0,0,LOOKBACK_SIZE/BLOCK_SIZE);
101    BitBlock * h0 = &lookback_h0[LOOKBACK_SIZE/BLOCK_SIZE];
102
103    // hash 1
104    BitBlock aligned_h1[SEGMENT_ALLOC_SIZE/8];
105    BitBlock * lookback_h1 = (BitBlock *) aligned_h1;
106    memset(lookback_h1,0,LOOKBACK_SIZE/BLOCK_SIZE);
107    BitBlock * h1 = &lookback_h1[LOOKBACK_SIZE/BLOCK_SIZE];
108
109    // starts
110    BitBlock aligned_starts[SEGMENT_ALLOC_SIZE/8];
111    BitBlock * lookback_starts = (BitBlock *) aligned_starts;
112    memset(lookback_starts,0,LOOKBACK_SIZE/BLOCK_SIZE);
113    BitBlock * starts = &lookback_starts[LOOKBACK_SIZE/BLOCK_SIZE];
114
115    // follows_0 - Arbitrary length symbols
116    BitBlock aligned_follows_0[SEGMENT_ALLOC_SIZE/8];
117    BitBlock * lookback_follows_0 = (BitBlock *) aligned_follows_0;
118    memset(lookback_follows_0,0,LOOKBACK_SIZE/BLOCK_SIZE);
119    BitBlock * follows_0 = &lookback_follows_0[LOOKBACK_SIZE/BLOCK_SIZE];
120
121    // BitSteams - Without lookback
122    Basis_bits basis_bits[SEGMENT_BLOCKS];
123    Markers markers[SEGMENT_BLOCKS];
124    Hash hash[SEGMENT_BLOCKS];
125    Groups groups[SEGMENT_BLOCKS];
126
127    // Symbol Table
128    gid<SEGMENT_SIZE> gids;
129    symbol_table<gid<SEGMENT_SIZE>, fast_pool_allocator<1024> > st;
130
131    is.read ((char *)raw_buffer, SEGMENT_SIZE);
132    uint32_t chars_avail = is.gcount();
133
134    #if IDENTITY_TEST
135        cout << ","; // prepend delimeter
136    #endif
137
138    ///////////////////////////////////////////////////////////////////////////
139    // Full Segments
140    ///////////////////////////////////////////////////////////////////////////
141    while (chars_avail >= SEGMENT_SIZE) {
142        uint32_t blk;
143        for(blk=0;blk<SEGMENT_BLOCKS;blk++) {
144        s2p_do_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk]);    // transpose
145        markers_do_block(basis_bits[blk], markers[blk]);                            // gen symbol spans, mark starts & follows
146        hash_strms_do_block(basis_bits[blk], hash[blk]);                            // gen hash bit streams
147        identity_group_do_block(markers[blk], groups[blk]);                         // sort marker bit stream (identity)
148    }
149
150    for(int blk=0;blk<SEGMENT_BLOCKS;blk++) {   // write contiguous bit streams
151        h0[blk] = hash[blk].h0;
152        h1[blk] = hash[blk].h1;
153        starts[blk] = groups[blk].starts;
154        follows_0[blk] = groups[blk].follows_0;
155    }
156
157    PERF_SEC_START(parser_timer);
158    st.resolve(raw_buffer, groups, starts, follows_0, h0, h1, SEGMENT_BLOCKS, gids);
159    PERF_SEC_END(parser_timer, SEGMENT_SIZE);
160
161    // copy loopback bytes
162    memmove(lookback,&raw_buffer[SEGMENT_SIZE-LOOKBACK_SIZE],LOOKBACK_SIZE);
163    // copy loopback bits
164    memmove(lookback_h0,&((uint8_t *)h0)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
165    memmove(lookback_h1,&((uint8_t *)h1)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
166
167    memmove(lookback_starts,&((uint8_t *)starts)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
168    memmove(lookback_follows_0,&((uint8_t *)follows_0)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
169
170    //lookback_h0[0] = h0[SEGMENT_BLOCKS-1];
171    //lookback_h1[0] = h1[SEGMENT_BLOCKS-1];
172    is.read ((char *)(raw_buffer), SEGMENT_SIZE);
173    chars_avail = is.gcount();
174
175    // test
176    #ifdef IDENTITY_TEST
177
178    cout << "," << endl; // prepend delimeter
179
180    uint32_t blk_offset;
181    for(int blk=0;blk<SEGMENT_BLOCKS;blk++) {
182        blk_offset = blk * BLOCKSIZE;
183        gid_type gid;
184        ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts));
185
186        fscanner.scan_to_next();
187        while(!fscanner.is_done()) {
188            gid = gids.at[fscanner.get_pos() + blk_offset];
189            cout << string((char *)st.get_raw_data(gid), st.get_lgth(gid)) << ",";
190            fscanner.scan_to_next();
191        }
192    }
193    #endif
194    }
195
196    /* Resolve Partial Segments */
197    uint32_t remaining = chars_avail;
198
199    ///////////////////////////////////////////////////////////////////////////
200    // Full blocks
201    ///////////////////////////////////////////////////////////////////////////
202    uint32_t blk = 0;
203    while (remaining >= BLOCK_SIZE) {
204        s2p_do_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk]);
205        markers_do_block(basis_bits[blk], markers[blk]);
206        hash_strms_do_block(basis_bits[blk], hash[blk]);
207        identity_group_do_block(markers[blk], groups[blk]);
208        blk++;
209        remaining -= BLOCK_SIZE;
210    }
211
212    ///////////////////////////////////////////////////////////////////////////
213    // Final partial block or any carry
214    ///////////////////////////////////////////////////////////////////////////
215    if (remaining > 0 || generate_markers.carryQ.CarryTest(0, 2) || gen_lgth_groups.carryQ.CarryTest(0, 9) /*|| hash_strms_any_carry*/) {
216        BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
217        s2p_do_final_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk], EOF_mask);
218        markers_do_final_block(basis_bits[blk], markers[blk], EOF_mask);
219        hash_strms_do_final_block(basis_bits[blk], hash[blk], EOF_mask);
220        identity_group_do_final_block(markers[blk], groups[blk], EOF_mask);
221        blk++;
222    }
223
224    uint32_t segment_blocks = blk;
225    for(int blk=0;blk<segment_blocks;blk++) { // write contiguous hash bit streams
226        h0[blk] = hash[blk].h0;
227        h1[blk] = hash[blk].h1;
228        starts[blk] = groups[blk].starts;
229        follows_0[blk] = groups[blk].follows_0;
230    }
231
232    // PERF_SEC_BIND(0);
233    PERF_SEC_START(parser_timer);
234    st.resolve(raw_buffer, groups, starts, follows_0, h0, h1, segment_blocks, gids);
235    PERF_SEC_END(parser_timer, chars_avail+1);
236
237    #ifdef IDENTITY_TEST
238    uint32_t blk_offset;
239    for(int blk=0;blk<segment_blocks;blk++) {
240        blk_offset = blk * BLOCKSIZE;
241        gid_type gid;
242        ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts));
243
244        fscanner.scan_to_next();
245        while(!fscanner.is_done()) {
246                gid = gids.at[fscanner.get_pos() + blk_offset];
247                cout << string((char *)st.get_raw_data(gid), st.get_lgth(gid)) << ",";
248                fscanner.scan_to_next();
249        }
250    }
251    #endif
252
253    #ifdef GID_TEST
254        cout << st.get_max_gid() << endl;
255    #endif
256
257    PERF_SEC_DUMP(parser_timer);
258    PERF_SEC_DESTROY(parser_timer);
259
260    is.close();
261
262    return 1;
263}
264
265
Note: See TracBrowser for help on using the repository browser.