source: trunk/symbol_table/main_template.cpp @ 1960

Last change on this file since 1960 was 1960, checked in by ksherdy, 7 years ago

Standalone symbol table - initial check in.

File size: 7.7 KB
Line 
1/*
2 * Created on: 18-December-2011
3 * Author: Ken Herdy
4 *
5 * Quick and dirty.
6 *
7 */
8
9#include "transpose.hpp"
10#include "../lib/bitblock.hpp"
11#include "../lib/allocator.hpp"
12#include "../lib/s2p.hpp"
13#include "../lib/perflib/perfsec.h"
14
15// GENERATED
16#include "marker_strms.hpp"
17// GENERATED
18#include "hash_strms.hpp"
19// GENERATED
20#include "id_group_strms.hpp"
21
22#include "id_symbol_table.hpp"
23
24#include <string>
25#include <iostream>
26#include <fstream>
27#include <sstream>
28using namespace std;
29
30#ifdef BUFFER_PROFILING
31    BOM_Table * parser_timer;
32#elif CODE_CLOCKER
33    #define NUM_EVENTS 1
34    int Events[NUM_EVENTS] = {PAPI_TOT_CYC};
35    //int Events[NUM_EVENTS] = {PAPI_L2_DCM};
36    //int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP};
37    int cal_size = 20;
38    CC * parser_timer = new CC(Events,NUM_EVENTS,cal_size);
39#else
40    void * parser_timer;
41#endif
42
43typedef struct symbol: public AoS_symbol
44{
45    string name;
46    char d;
47} symbol;
48
49#define PADDING_BLOCKS 1
50#define PADDING_SIZE BLOCK_SIZE * PADDING_BLOCKS
51#define LOOKBACK_BLOCKS 1
52#define LOOKBACK_SIZE BLOCK_SIZE * LOOKBACK_BLOCKS
53#define SEGMENT_BLOCKS 8
54#define SEGMENT_SIZE BLOCK_SIZE * (SEGMENT_BLOCKS) // multiple of BLOCK_SIZE (bytes)
55
56int main(int argc, char * argv[]) {
57
58    if (argc < 2) {
59            cout << "Usage: " << argv[0] << " <filename>" << endl;
60            exit(-1);
61    }
62
63    stringstream filename;
64    filename << argv[1];
65
66    ifstream is;
67    is.open (filename.str().c_str(), ios::binary);
68
69    if (!is) {
70        cerr << "Error: " << filename << endl;
71        abort();
72    }
73
74    // PERF_SEC_BIND(1);
75    PERF_SEC_INIT(parser_timer);
76
77    /* Byte Buffer */
78    BitBlock aligned_buffer[(LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock)];
79    uint8_t * lookback = (uint8_t *)aligned_buffer;
80    memset(lookback,0,LOOKBACK_SIZE);
81    uint8_t * raw_buffer = lookback + LOOKBACK_SIZE;
82
83    /* Bit Stream Hash Buffers */
84
85    // TODO - Verify
86
87    /* h0 */
88    BitBlock aligned_h0[(LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock)];
89    BitBlock * lookback_h0 = (BitBlock *) aligned_h0;
90    memset(lookback_h0,0,sizeof(BitBlock));
91    BitBlock * h0 = (BitBlock *)(lookback_h0 + 1); // TODO - Correct
92
93    /* h1 */
94    BitBlock aligned_h1[(LOOKBACK_SIZE + SEGMENT_SIZE + PADDING_SIZE) / sizeof(BitBlock)];
95    BitBlock * lookback_h1 = (BitBlock *) aligned_h1;
96    memset(lookback_h1,0,sizeof(BitBlock));
97    BitBlock * h1 = (BitBlock *)(lookback_h1 + 1); // TODO - Correct
98
99    /* BitSteams */
100    Basis_bits basis_bits[SEGMENT_BLOCKS];
101    Markers markers[SEGMENT_BLOCKS];
102    Hash hash[SEGMENT_BLOCKS];
103    Groups groups[SEGMENT_BLOCKS];
104
105    /* Symbol Table */
106    const uint32_t SYMBOL_COUNT = SEGMENT_SIZE;
107    symbol symbol_ary[SYMBOL_COUNT];
108    id_symbol_table<fast_pool_allocator<1024> > symbol_table;
109
110    is.read ((char *)raw_buffer, SEGMENT_SIZE);
111    uint32_t chars_avail = is.gcount();
112
113    while (chars_avail >= SEGMENT_SIZE) {
114
115      uint32_t blk;
116      for(blk=0;blk<SEGMENT_BLOCKS;blk++) {
117          s2p_do_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk]);
118          markers_do_block(basis_bits[blk], markers[blk]);
119          hash_strms_do_block(basis_bits[blk], hash[blk]);
120          identity_group_do_block(markers[blk], groups[blk]);
121      }
122
123//      for(int k=0;k<SEGMENT_BLOCKS;k++) {
124//        cout << "RAW " << string((((char*)&raw_buffer[0])+k*BLOCK_SIZE),BLOCK_SIZE) << endl;
125//      }
126
127      /* Write contiguous hash bit streams */
128      for(int blk=0;blk<SEGMENT_BLOCKS;blk++) {
129          h0[blk] = hash[blk].h0;
130          h1[blk] = hash[blk].h1;
131      }
132
133      PERF_SEC_START(parser_timer);
134      symbol_table.resolve(raw_buffer, groups, h0, h1, SEGMENT_BLOCKS, symbol_ary, SYMBOL_COUNT);
135      PERF_SEC_END(parser_timer, SEGMENT_SIZE);
136
137      // print_register("h1[S]",h1[SEGMENT_BLOCKS-1]);
138
139      memmove(lookback,raw_buffer+SEGMENT_SIZE-LOOKBACK_SIZE,LOOKBACK_SIZE); /* copy final block to lookback */
140      //memmove(lookback_h0,((uint8_t *)h0)+((SEGMENT_SIZE-LOOKBACK_SIZE)/sizeof(BitBlock)),sizeof(BitBlock)); /* copy final block to lookback */
141      //memmove(lookback_h1,((uint8_t *)h1)+((SEGMENT_SIZE-LOOKBACK_SIZE)/sizeof(BitBlock)),sizeof(BitBlock)); /* copy final block to lookback */
142
143      lookback_h0[0] = h0[SEGMENT_BLOCKS-1];
144      lookback_h1[0] = h1[SEGMENT_BLOCKS-1];
145      //print_register<BitBlock>("h1[S]",*(BitBlock *)&lookback_h1[0]);
146
147      //exit(1);
148
149      is.read ((char *)(raw_buffer), SEGMENT_SIZE);
150      chars_avail = is.gcount();
151
152    }
153
154    //PERF_SEC_START(parser_timer);
155    /* Partial Segments */
156
157    uint32_t remaining = chars_avail;
158
159    /* Full Blocks */
160    uint32_t blk = 0;
161    while (remaining >= BLOCK_SIZE) {
162        s2p_do_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk]);
163        markers_do_block(basis_bits[blk], markers[blk]);
164        hash_strms_do_block(basis_bits[blk], hash[blk]);
165        identity_group_do_block(markers[blk], groups[blk]);
166        blk++;
167        remaining -= BLOCK_SIZE;
168    }
169
170    /* Partial Block or carry */
171    if (remaining > 0 || @marker_strms_any_carry /*|| hash_strms_any_carry*/) {
172          BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining)); /* null padding byte */
173          s2p_do_final_block((BytePack *) &raw_buffer[blk*BLOCK_SIZE], basis_bits[blk], EOF_mask);
174          markers_do_final_block(basis_bits[blk], markers[blk], EOF_mask);
175          hash_strms_do_final_block(basis_bits[blk], hash[blk], EOF_mask);
176          identity_group_do_final_block(markers[blk], groups[blk], EOF_mask);
177          blk++;
178    }
179
180//    for(int k=0;k<blk;k++) {
181//      cout << "RAW " << string((((char*)&raw_buffer[0])+k*BLOCK_SIZE),BLOCK_SIZE) << endl;
182//    }
183
184    /* Write contiguous hash bit streams */
185    uint32_t segment_size = blk;
186
187    for(int blk=0;blk<segment_size;blk++) {
188        h0[blk] = hash[blk].h0;
189        h1[blk] = hash[blk].h1;
190    }
191
192    PERF_SEC_START(parser_timer);
193    symbol_table.resolve(raw_buffer, groups, h0, h1, blk, symbol_ary, SYMBOL_COUNT);
194    PERF_SEC_END(parser_timer, chars_avail+1);
195
196    /* WARNING */
197    // if(remaining==0 && @markers_any_cary) {
198    // @ _any_carry - equivalent to single byte look ahead
199    // Any case in which we have a partial block, we need to know the boundary of the partial block to know when to store carry information.
200    // Any case in which we must evaluate a bit value at position 'one past a boudary' can be handled within final block logic.
201    // }
202
203    // PBS Modules
204    // Pablo block processing structure.
205    // Four Cases --- Move to xml.dnsdojo.com
206    // do_init_block(), do_block(), do_final_block(), do_all()
207    // Four cases
208    // - do_init_block() restrict to initialization only, do_block() does not execute, do_final_block() executes
209
210    // Process full segments in sub modules, ie. sizeof(BitBlock) * 8 bytes,
211    // do_segment()
212    // - do_segment(uint8_t * buffer, BitBlock * strm_1, ..., BitBlock * strm_k, uint32_t byte_count)
213    //
214    // Handle 'while(segments)', while(full blocks), if(partial or carry) on the main application module.
215
216    // Lookahead/Lookback
217    // Current implementation bit stream length grouping strategy leverage 'end' markers.
218    // 'end' markers in a sense are precomputed 'lookahead'
219    // True 'lookahead' would compute the current block and number of 'lookahead' position to
220    // support 'shift back' and the mark the 'start' rather than the 'end' positions of lexical items.
221
222    // The current implementation 'expects' that the previous block will be located in a contiguous
223    // memory location that may be indexed as some negative offset of the base address of the current
224    // block.
225
226    // Max hash table size, negative shift values.
227    // Both in symbols that cross boundaries as well as in hash_strategy classes, hard coding.
228    // Template parameters on Length L,
229    // Bits vs. Bytes?
230
231    PERF_SEC_DUMP(parser_timer);
232    PERF_SEC_DESTROY(parser_timer);
233
234    is.close();
235
236    return 1;
237}
238
239
Note: See TracBrowser for help on using the repository browser.