source: trunk/symbol_table/main_template.cpp @ 2026

Last change on this file since 2026 was 2010, checked in by ksherdy, 7 years ago

Refactor fixed lengths to sync ICXML buffer model. Report GID on start pos, scan forward support file diff QA.

File size: 9.2 KB
Line 
1/*
2 * Created on: 18-December-2011
3 * Author: Ken Herdy
4 *
5 * Length sorted symbol table main.
6 *
7 * Lookahead versus Lookback
8 *
9 * The current implementation applies bit stream length grouping based on 'end' markers.
10 * In a sense, 'end' markers are precomputed 'lookahead'.
11 * True 'lookahead' would compute the current block and number of 'lookahead' position and
12 * support 'shift back' and to mark the 'start' rather than the 'end' positions of lexical items.
13 *
14 * In any case, the current implementation 'expects' that the previous block will be located in a contiguous
15 * memory location that may be indexed as some negative offset of the base address of the current
16 * block.
17 *
18 * Further, to reduce complexity in processing, although structs of BitBlock types are not stored
19 * contiguously in memory, BitBlock struct members are copied into contiguous memory positions.
20 *
21 * Design Issues
22 *
23 * (1) Max hash table size.
24 * (2) Negative shift values.
25 * (3) Template classes on Length L.
26 *
27 */
28
29#include "transpose.hpp"
30#include "buffer.hpp"
31
32#include "../lib/bitblock.hpp"
33#include "../lib/allocator.hpp"
34#include "../lib/s2p.hpp"
35#include "../lib/perflib/perfsec.h"
36#include "../lib/bitblock_scan.hpp"
37#include "marker_strms.hpp"     // GENERATED HEADER
38#include "hash_strms.hpp"       // GENERATED HEADER
39#include "id_group_strms.hpp"   // GENERATED HEADER
40#include "id_symbol_table.hpp"
41#include <string>
42#include <iostream>
43#include <fstream>
44#include <sstream>
45using namespace std;
46
47#ifdef BUFFER_PROFILING
48    BOM_Table * parser_timer;
49#elif CODE_CLOCKER
50    #define NUM_EVENTS 1
51    int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
52    //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
53    //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
54    int cal_size = 20;
55    CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
56#else
57    void * parser_timer;
58#endif
59
60int main(int argc, char * argv[]) {
61
62    if (argc < 2) {
63            cout << "Usage: " << argv[0] << " <filename>" << endl;
64            exit(-1);
65    }
66
67    stringstream filename;
68    filename << argv[1];
69
70    ifstream is;
71    is.open (filename.str().c_str(), ios::binary);
72
73    if (!is) {
74        cerr << "Error: " << filename << endl;
75        abort();
76    }
77
78    // PERF_SEC_BIND(1);
79    PERF_SEC_INIT(parser_timer);
80
81    ///////////////////////////////////////////////////////////////////////////
82    // Stream Definitions
83    ///////////////////////////////////////////////////////////////////////////
84
85    // Byte Segments - Raw byte streams - With lookback.
86    BitBlock aligned_buffer[SEGMENT_ALLOC_SIZE];
87    uint8_t * lookback = (uint8_t *)aligned_buffer;
88    memset(lookback,0,LOOKBACK_SIZE);
89    uint8_t * raw_buffer = &lookback[LOOKBACK_SIZE];
90
91    // Bit Segments - Hash bit streams - With lookback.
92
93    // hash 0
94    BitBlock aligned_h0[SEGMENT_ALLOC_SIZE/8];
95    BitBlock * lookback_h0 = (BitBlock *) aligned_h0;
96    memset(lookback_h0,0,LOOKBACK_SIZE/BLOCK_SIZE);
97    BitBlock * h0 = &lookback_h0[LOOKBACK_SIZE/BLOCK_SIZE];
98
99    // hash 1
100    BitBlock aligned_h1[SEGMENT_ALLOC_SIZE/8];
101    BitBlock * lookback_h1 = (BitBlock *) aligned_h1;
102    memset(lookback_h1,0,LOOKBACK_SIZE/BLOCK_SIZE);
103    BitBlock * h1 = &lookback_h1[LOOKBACK_SIZE/BLOCK_SIZE];
104
105    // starts
106    BitBlock aligned_starts[SEGMENT_ALLOC_SIZE/8];
107    BitBlock * lookback_starts = (BitBlock *) aligned_starts;
108    memset(lookback_starts,0,LOOKBACK_SIZE/BLOCK_SIZE);
109    BitBlock * starts = &lookback_starts[LOOKBACK_SIZE/BLOCK_SIZE];
110
111    // ends_gte_17
112    BitBlock aligned_ends_gte_17[SEGMENT_ALLOC_SIZE/8];
113    BitBlock * lookback_ends_gte_17 = (BitBlock *) aligned_ends_gte_17;
114    memset(lookback_ends_gte_17,0,LOOKBACK_SIZE/BLOCK_SIZE);
115    BitBlock * ends_gte_17 = &lookback_ends_gte_17[LOOKBACK_SIZE/BLOCK_SIZE];
116
117    // BitSteams - Without lookback
118    Basis_bits basis_bits[SEGMENT_BLOCKS];
119    Markers markers[SEGMENT_BLOCKS];
120    Hash hash[SEGMENT_BLOCKS];
121    Groups groups[SEGMENT_BLOCKS];
122
123    // Symbol Table
124    const uint32_t SYMBOL_COUNT = SEGMENT_SIZE;
125
126    Symbol symbols(SYMBOL_COUNT);
127    id_symbol_table<Symbol, fast_pool_allocator<1024> > symbol_table;
128
129    is.read ((char *)raw_buffer, SEGMENT_SIZE);
130    uint32_t chars_avail = is.gcount();
131
132    ///////////////////////////////////////////////////////////////////////////
133    // Full Segments
134    ///////////////////////////////////////////////////////////////////////////
135    while (chars_avail >= SEGMENT_SIZE) {
136
137      uint32_t blk;
138      for(blk=0;blk<SEGMENT_BLOCKS;blk++) {
139        s2p_do_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk]);    // transpose
140        markers_do_block(basis_bits[blk], markers[blk]);                            // gen symbol spans, mark starts & follows
141        hash_strms_do_block(basis_bits[blk], hash[blk]);                            // gen hash bit streams
142        identity_group_do_block(markers[blk], groups[blk]);                         // sort marker bit stream (identity)
143      }
144
145//    for(int k=0;k<SEGMENT_BLOCKS;k++) {
146//        cout << "RAW " << string((((char*)&raw_buffer[0])+k*BLOCK_SIZE),BLOCK_SIZE) << endl;
147//    }
148
149      for(int blk=0;blk<SEGMENT_BLOCKS;blk++) { // write contiguous bit streams
150        h0[blk] = hash[blk].h0;
151        h1[blk] = hash[blk].h1;
152        starts[blk] = groups[blk].starts;
153        ends_gte_17[blk] = groups[blk].ends_gte_17;
154      }
155
156      PERF_SEC_START(parser_timer);
157      symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, SEGMENT_BLOCKS, symbols /*, SYMBOL_COUNT*/);
158      PERF_SEC_END(parser_timer, SEGMENT_SIZE);
159
160      // copy loopback bytes
161      memmove(lookback,&raw_buffer[SEGMENT_SIZE-LOOKBACK_SIZE],LOOKBACK_SIZE);
162      // copy loopback bits
163      memmove(lookback_h0,&((uint8_t *)h0)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
164      memmove(lookback_h1,&((uint8_t *)h1)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
165
166      memmove(lookback_starts,&((uint8_t *)starts)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
167      memmove(lookback_ends_gte_17,&((uint8_t *)ends_gte_17)[(SEGMENT_SIZE-LOOKBACK_SIZE)/8],LOOKBACK_SIZE/8);
168
169      //lookback_h0[0] = h0[SEGMENT_BLOCKS-1];
170      //lookback_h1[0] = h1[SEGMENT_BLOCKS-1];
171      is.read ((char *)(raw_buffer), SEGMENT_SIZE);
172      chars_avail = is.gcount();
173
174      // test
175      uint32_t blk_offset;
176      for(int blk=0;blk<SEGMENT_BLOCKS;blk++) {
177          blk_offset = blk * BLOCKSIZE;
178          gid_type gid;
179          ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts));
180
181          fscanner.scan_to_next();
182          while(!fscanner.is_done()) {
183              gid = symbols.gids[fscanner.get_pos() + blk_offset];
184  //        cout <<"Symbol[" << fscanner.get_pos() << "] = "
185  //                << "(gid:" << gid << ",raw:"
186  //                << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid))<< ")" << endl;
187              cout << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid)) << ",";
188
189              fscanner.scan_to_next();
190
191          }
192      }
193    }
194    /* Resolve Partial Segments */
195    uint32_t remaining = chars_avail;
196
197    ///////////////////////////////////////////////////////////////////////////
198    // Full blocks
199    ///////////////////////////////////////////////////////////////////////////
200    uint32_t blk = 0;
201    while (remaining >= BLOCK_SIZE) {
202        s2p_do_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk]);
203        markers_do_block(basis_bits[blk], markers[blk]);
204        hash_strms_do_block(basis_bits[blk], hash[blk]);
205        identity_group_do_block(markers[blk], groups[blk]);
206        blk++;
207        remaining -= BLOCK_SIZE;
208    }
209
210    ///////////////////////////////////////////////////////////////////////////
211    // Final partial block or any carry
212    ///////////////////////////////////////////////////////////////////////////
213    if (remaining > 0 || @marker_strms_any_carry /*|| hash_strms_any_carry*/) {
214          BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
215          s2p_do_final_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk], EOF_mask);
216          markers_do_final_block(basis_bits[blk], markers[blk], EOF_mask);
217          hash_strms_do_final_block(basis_bits[blk], hash[blk], EOF_mask);
218          identity_group_do_final_block(markers[blk], groups[blk], EOF_mask);
219          blk++;
220    }
221
222//    for(int k=0;k<blk;k++) {
223//      cout << "RAW " << string((((char*)&raw_buffer[0])+k*BLOCK_SIZE),BLOCK_SIZE) << endl;
224//    }
225
226    uint32_t segment_size = blk;
227    for(int blk=0;blk<segment_size;blk++) { // write contiguous hash bit streams
228        h0[blk] = hash[blk].h0;
229        h1[blk] = hash[blk].h1;
230        starts[blk] = groups[blk].starts;
231        ends_gte_17[blk] = groups[blk].ends_gte_17;
232    }
233
234    //PERF_SEC_START(parser_timer);
235    symbol_table.resolve(raw_buffer, groups, starts, ends_gte_17, h0, h1, segment_size, symbols/*, SYMBOL_COUNT*/);
236    //PERF_SEC_END(parser_timer, chars_avail+1);
237
238    uint32_t blk_offset;
239    for(int blk=0;blk<segment_size;blk++) {
240        blk_offset = blk * BLOCKSIZE;
241        gid_type gid;
242        ForwardScanner<BitBlock, scanword_t> fscanner(&(groups[blk].starts));
243
244        fscanner.scan_to_next();
245        while(!fscanner.is_done()) {
246            gid = symbols.gids[fscanner.get_pos() + blk_offset];
247//          cout <<"Symbol[" << fscanner.get_pos() << "] = "
248//                  << "(gid:" << gid << ",raw:"
249//                  << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid))<< ")" << endl;
250            cout << string((char *)symbol_table.get_raw_data(gid), symbol_table.get_lgth(gid)) << ",";
251
252            fscanner.scan_to_next();
253        }
254
255    }
256
257    PERF_SEC_DUMP(parser_timer);
258    PERF_SEC_DESTROY(parser_timer);
259
260    is.close();
261
262    return 1;
263}
264
265
Note: See TracBrowser for help on using the repository browser.