1 | #include <stdio.h> |
---|
2 | #include <stdlib.h> |
---|
3 | #include <errno.h> |
---|
4 | #include <string.h> |
---|
5 | #include <sys/types.h> |
---|
6 | #include <sys/stat.h> |
---|
7 | #include <unistd.h> |
---|
8 | #include <sched.h> |
---|
9 | |
---|
10 | // Test Repetitions |
---|
11 | #define RUNS 1 |
---|
12 | |
---|
13 | // stream2runs versions |
---|
14 | //#define BRANCH_REDUCTION |
---|
15 | #include "../stream2runs/src/stream2runs.h" |
---|
16 | |
---|
17 | // Performance Measurement |
---|
18 | #ifdef PAPI |
---|
19 | #include "lib/cclib/clocker/cc.h" |
---|
20 | #include "lib/cclib/clocker/cc.cxx" |
---|
21 | CC * code_clocker; |
---|
22 | #endif |
---|
23 | |
---|
24 | // Parabix 1 Symbol Table implementation (STL Hash Map) |
---|
25 | #include "symtab.h" |
---|
26 | |
---|
27 | // Length Sorted Symbol Table |
---|
28 | |
---|
29 | #include "library_conversion.h" |
---|
30 | #include "ls_symbol_table.h" |
---|
31 | |
---|
32 | #include "symtab.h" |
---|
33 | #include "ls_symbol_table_util.h" |
---|
34 | |
---|
35 | #define SIMD_REGISTER_BIT_WIDTH (sizeof(SIMD_type) << 3) |
---|
36 | #define REGISTER_BIT_WIDTH (sizeof(void *) << 3) |
---|
37 | |
---|
38 | #ifdef TEMPLATED_SIMD_LIB |
---|
39 | #define s2p_step(s0, s1, hi_mask, shift, p0, p1) \ |
---|
40 | {\ |
---|
41 | BitBlock t0, t1;\ |
---|
42 | t0 = simd<16>::pack<h,h>(s0, s1);\ |
---|
43 | t1 = simd<16>::pack<l,l>(s0, s1);\ |
---|
44 | p0 = simd_if(hi_mask, t0, simd<16>::srli<shift>(t1));\ |
---|
45 | p1 = simd_if(hi_mask, simd<16>::slli<shift>(t0), t1);\ |
---|
46 | } |
---|
47 | #endif |
---|
48 | #ifndef TEMPLATED_SIMD_LIB |
---|
49 | #define s2p_step(s0, s1, hi_mask, shift, p0, p1) \ |
---|
50 | {\ |
---|
51 | BitBlock t0, t1;\ |
---|
52 | t0 = simd_pack_16_hh(s0, s1);\ |
---|
53 | t1 = simd_pack_16_ll(s0, s1);\ |
---|
54 | p0 = simd_if(hi_mask, t0, simd_srli_16(t1, shift));\ |
---|
55 | p1 = simd_if(hi_mask, simd_slli_16(t0, shift), t1);\ |
---|
56 | } |
---|
57 | #endif |
---|
58 | |
---|
59 | static inline void s2p_bytepack(BytePack s[], BitBlock p[]) { |
---|
60 | #ifdef TEMPLATED_SIMD_LIB |
---|
61 | BitBlock mask_2 = simd<2>::himask(); |
---|
62 | BitBlock mask_4 = simd<4>::himask(); |
---|
63 | BitBlock mask_8 = simd<8>::himask(); |
---|
64 | #endif |
---|
65 | #ifndef TEMPLATED_SIMD_LIB |
---|
66 | BitBlock mask_2 = simd_himask_2; |
---|
67 | BitBlock mask_4 = simd_himask_4; |
---|
68 | BitBlock mask_8 = simd_himask_8; |
---|
69 | #endif |
---|
70 | BitBlock bit00224466_0, bit00224466_1, bit00224466_2, bit00224466_3; |
---|
71 | BitBlock bit11335577_0, bit11335577_1, bit11335577_2, bit11335577_3; |
---|
72 | BitBlock bit00004444_0, bit22226666_0, bit00004444_1, bit22226666_1; |
---|
73 | BitBlock bit11115555_0, bit33337777_0, bit11115555_1, bit33337777_1; |
---|
74 | #if (BYTE_ORDER == BIG_ENDIAN) |
---|
75 | s2p_step(s[0], s[1], mask_2, 1, bit00224466_0, bit11335577_0); |
---|
76 | s2p_step(s[2], s[3], mask_2, 1, bit00224466_1, bit11335577_1); |
---|
77 | s2p_step(s[4], s[5], mask_2, 1, bit00224466_2, bit11335577_2); |
---|
78 | s2p_step(s[6], s[7], mask_2, 1, bit00224466_3, bit11335577_3); |
---|
79 | #endif |
---|
80 | #if (BYTE_ORDER == LITTLE_ENDIAN) |
---|
81 | s2p_step(s[7], s[6], mask_2, 1, bit00224466_0, bit11335577_0); |
---|
82 | s2p_step(s[5], s[4], mask_2, 1, bit00224466_1, bit11335577_1); |
---|
83 | s2p_step(s[3], s[2], mask_2, 1, bit00224466_2, bit11335577_2); |
---|
84 | s2p_step(s[1], s[0], mask_2, 1, bit00224466_3, bit11335577_3); |
---|
85 | #endif |
---|
86 | s2p_step(bit00224466_0, bit00224466_1, mask_4, 2, bit00004444_0, bit22226666_0); |
---|
87 | s2p_step(bit00224466_2, bit00224466_3, mask_4, 2, bit00004444_1, bit22226666_1); |
---|
88 | s2p_step(bit11335577_0, bit11335577_1, mask_4, 2, bit11115555_0, bit33337777_0); |
---|
89 | s2p_step(bit11335577_2, bit11335577_3, mask_4, 2, bit11115555_1, bit33337777_1); |
---|
90 | s2p_step(bit00004444_0, bit00004444_1, mask_8, 4, p[0], p[4]); |
---|
91 | s2p_step(bit11115555_0, bit11115555_1, mask_8, 4, p[1], p[5]); |
---|
92 | s2p_step(bit22226666_0, bit22226666_1, mask_8, 4, p[2], p[6]); |
---|
93 | s2p_step(bit33337777_0, bit33337777_1, mask_8, 4, p[3], p[7]); |
---|
94 | } |
---|
95 | |
---|
96 | #define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \ |
---|
97 | __asm__ ("sahf\n\t" \ |
---|
98 | "adc %[e1], %[z1]\n\t" \ |
---|
99 | "adc %[e2], %[z2]\n\t" \ |
---|
100 | "lahf\n\t" \ |
---|
101 | : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \ |
---|
102 | : "[z1]" (x1), "[z2]" (x2), \ |
---|
103 | [e1] "r" (y1), [e2] "r" (y2), \ |
---|
104 | "[carryflag]" (carry) \ |
---|
105 | : "cc") |
---|
106 | |
---|
107 | #define adc128(first, second, carry, sum) \ |
---|
108 | do\ |
---|
109 | {\ |
---|
110 | union {__m128i bitblock;\ |
---|
111 | uint64_t int64[2];} rslt;\ |
---|
112 | \ |
---|
113 | union {__m128i bitblock;\ |
---|
114 | uint64_t int64[2];} x;\ |
---|
115 | \ |
---|
116 | union {__m128i bitblock;\ |
---|
117 | uint64_t int64[2];} y;\ |
---|
118 | \ |
---|
119 | x.bitblock = first;\ |
---|
120 | y.bitblock = second;\ |
---|
121 | \ |
---|
122 | double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\ |
---|
123 | sum = rslt.bitblock;\ |
---|
124 | }while(0) |
---|
125 | |
---|
126 | #define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \ |
---|
127 | __asm__ ("sahf\n\t" \ |
---|
128 | "sbb %[e1], %[z1]\n\t" \ |
---|
129 | "sbb %[e2], %[z2]\n\t" \ |
---|
130 | "lahf\n\t" \ |
---|
131 | : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \ |
---|
132 | : "[z1]" (x1), "[z2]" (x2), \ |
---|
133 | [e1] "r" (y1), [e2] "r" (y2), \ |
---|
134 | "[carryflag]" (carry) \ |
---|
135 | : "cc") |
---|
136 | |
---|
137 | #define sbb128(first, second, carry, sum) \ |
---|
138 | do\ |
---|
139 | { union {__m128i bitblock;\ |
---|
140 | uint64_t int64[2];} rslt;\ |
---|
141 | \ |
---|
142 | union {__m128i bitblock;\ |
---|
143 | uint64_t int64[2];} x;\ |
---|
144 | \ |
---|
145 | union {__m128i bitblock;\ |
---|
146 | uint64_t int64[2];} y;\ |
---|
147 | \ |
---|
148 | x.bitblock = first;\ |
---|
149 | y.bitblock = second;\ |
---|
150 | \ |
---|
151 | double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \ |
---|
152 | rslt.int64[0], rslt.int64[1], carry);\ |
---|
153 | sum = rslt.bitblock;\ |
---|
154 | }while(0) |
---|
155 | |
---|
156 | // Mark Comma bit positions |
---|
157 | static inline BitBlock bytepack2bitblock(BytePack U8[]); |
---|
158 | static inline BitBlock bytepack2bitblock(BytePack U8[]) { |
---|
159 | |
---|
160 | BitBlock result; |
---|
161 | BitBlock array_u8bit__5_; |
---|
162 | //BitBlock AllOne = simd_const_1(1); |
---|
163 | //BitBlock AllZero = simd_const_1(0); |
---|
164 | BitBlock array_u8bit__2_; |
---|
165 | BitBlock array_u8bit__3_; |
---|
166 | BitBlock array_u8bit__4_; |
---|
167 | BitBlock _strct_s2iclass__classify_bytes__temp4; |
---|
168 | BitBlock _strct_s2iclass__classify_bytes__temp5; |
---|
169 | BitBlock _strct_s2iclass__classify_bytes__temp2; |
---|
170 | BitBlock _strct_s2iclass__classify_bytes__temp3; |
---|
171 | BitBlock array_u8bit__6_; |
---|
172 | BitBlock _strct_s2iclass__classify_bytes__temp1; |
---|
173 | BitBlock array_u8bit__0_; |
---|
174 | BitBlock array_u8bit__1_; |
---|
175 | BitBlock array_u8bit__7_; |
---|
176 | |
---|
177 | BitBlock u8[8]; |
---|
178 | |
---|
179 | s2p_bytepack(U8,u8); |
---|
180 | array_u8bit__0_ = u8[0]; |
---|
181 | array_u8bit__1_ = u8[1]; |
---|
182 | array_u8bit__2_ = u8[2]; |
---|
183 | array_u8bit__3_ = u8[3]; |
---|
184 | array_u8bit__4_ = u8[4]; |
---|
185 | array_u8bit__5_ = u8[5]; |
---|
186 | array_u8bit__6_ = u8[6]; |
---|
187 | array_u8bit__7_ = u8[7]; |
---|
188 | |
---|
189 | _strct_s2iclass__classify_bytes__temp1 = simd_or(array_u8bit__0_,array_u8bit__1_); |
---|
190 | _strct_s2iclass__classify_bytes__temp2 = simd_and(array_u8bit__2_,array_u8bit__3_); |
---|
191 | _strct_s2iclass__classify_bytes__temp3 = simd_andc(_strct_s2iclass__classify_bytes__temp2,_strct_s2iclass__classify_bytes__temp1); |
---|
192 | _strct_s2iclass__classify_bytes__temp4 = simd_or(array_u8bit__5_,array_u8bit__6_); |
---|
193 | _strct_s2iclass__classify_bytes__temp5 = simd_and(array_u8bit__4_,_strct_s2iclass__classify_bytes__temp4); |
---|
194 | result = simd_andc(_strct_s2iclass__classify_bytes__temp3,_strct_s2iclass__classify_bytes__temp5); |
---|
195 | |
---|
196 | return result; |
---|
197 | } |
---|
198 | |
---|
199 | void print_chars(const char * str, size_t length); |
---|
200 | void print_chars(const char * str, size_t length) { |
---|
201 | |
---|
202 | for(int i=0;i<length;i++) { |
---|
203 | printf("%c",str[i]); |
---|
204 | } |
---|
205 | printf("\n"); |
---|
206 | |
---|
207 | } |
---|
208 | |
---|
209 | void SetCPUAffinity(); |
---|
210 | void SetCPUAffinity() { |
---|
211 | |
---|
212 | printf("Setting CPU Affinity...\n"); |
---|
213 | |
---|
214 | cpu_set_t mask; |
---|
215 | unsigned int len = sizeof(mask); |
---|
216 | if (sched_getaffinity(0, len, &mask) < 0) { |
---|
217 | perror("sched_getaffinity"); |
---|
218 | } |
---|
219 | |
---|
220 | printf("Original CPU Affinity Mask: %08lx\n", mask.__bits[0]); |
---|
221 | |
---|
222 | //CPU_CLR(0, &mask); // (CPU 1) |
---|
223 | //CPU_CLR(1, &mask); // (CPU 0) |
---|
224 | |
---|
225 | if (sched_setaffinity(0, len, &mask) < 0) { |
---|
226 | perror("sched_setaffinity"); |
---|
227 | } |
---|
228 | |
---|
229 | printf("Modified CPU Affinity Mask: %08lx\n", mask.__bits[0]); |
---|
230 | } |
---|
231 | |
---|
232 | int main(int argc, char * argv[]) { |
---|
233 | |
---|
234 | if (argc < 2) { |
---|
235 | printf("Usage: %s <filename> [<outputfile>]\n", argv[0]); |
---|
236 | exit(-1); |
---|
237 | } |
---|
238 | char * filename = argv[1]; |
---|
239 | |
---|
240 | FILE *infile, *outfile; |
---|
241 | infile = fopen(filename, "rb"); |
---|
242 | if (!infile) { |
---|
243 | fprintf(stderr, "Error: cannot open %s for input.\n", filename); |
---|
244 | exit(-1); |
---|
245 | } |
---|
246 | |
---|
247 | if (argc < 3) outfile = stdout; |
---|
248 | else { |
---|
249 | outfile = fopen(argv[2], "wb"); |
---|
250 | if (!outfile) { |
---|
251 | fprintf(stderr, "Error: cannot open %s for writing.\n", argv[2]); |
---|
252 | exit(-1); |
---|
253 | } |
---|
254 | } |
---|
255 | |
---|
256 | #ifdef PAPI |
---|
257 | SetCPUAffinity(); |
---|
258 | char * src_filename = argv[1]; |
---|
259 | char * cmdline = new char[strlen(argv[0]) + strlen(argv[1]) +1 +1]; |
---|
260 | strcat(cmdline, argv[0]); |
---|
261 | strcat(cmdline," "); |
---|
262 | strcat(cmdline,argv[1]); |
---|
263 | |
---|
264 | #define NUM_EVENTS 2 |
---|
265 | int Events[NUM_EVENTS] = {PAPI_TOT_CYC, PAPI_BR_MSP}; |
---|
266 | int cal_size = 1000; |
---|
267 | code_clocker = new CC(Events,NUM_EVENTS,cal_size); |
---|
268 | code_clocker->set_cmd(cmdline); |
---|
269 | |
---|
270 | #endif |
---|
271 | |
---|
272 | struct stat st; |
---|
273 | stat(filename, &st); |
---|
274 | int filesize = st.st_size; |
---|
275 | size_t bytes = filesize; |
---|
276 | |
---|
277 | bytes += sizeof(SIMD_type); |
---|
278 | |
---|
279 | // allocate byte buffer and pad with trailing zeroes |
---|
280 | unsigned char * byte_buffer = (unsigned char *)simd_new(bytes); |
---|
281 | |
---|
282 | |
---|
283 | // slurp file |
---|
284 | int chars_read = fread(byte_buffer, sizeof(char), filesize, infile); |
---|
285 | while(chars_read > 0) { |
---|
286 | chars_read = fread(byte_buffer+chars_read, sizeof(char), filesize, infile); |
---|
287 | } |
---|
288 | |
---|
289 | // mask trailing zeroes |
---|
290 | memset(byte_buffer + filesize, 0, sizeof(SIMD_type)); |
---|
291 | |
---|
292 | // allocate bit stream buffer |
---|
293 | int simd_packs = bytes/sizeof(SIMD_type); |
---|
294 | |
---|
295 | #ifdef BRANCH_REDUCTION |
---|
296 | BitBlock * bit_stream_buffer = simd_new(simd_packs + 1); // pad at least an additional general width of bytes |
---|
297 | #else |
---|
298 | BitBlock * bit_stream_buffer = simd_new(simd_packs); |
---|
299 | #endif |
---|
300 | |
---|
301 | if(bit_stream_buffer == NULL) { |
---|
302 | fprintf(stderr, "Error: out of memory.\n"); |
---|
303 | exit(-1); |
---|
304 | } |
---|
305 | |
---|
306 | cout << "Source Bytes: " << filesize << endl; |
---|
307 | cout << "Allocated Bytes: " << bytes << endl; |
---|
308 | cout << "SIMD Pack Count: " << simd_packs << endl; |
---|
309 | cout << "SIMD Pack Bytes: " << simd_packs * sizeof(SIMD_type) << endl << endl; |
---|
310 | |
---|
311 | #ifdef PAPI |
---|
312 | code_clocker->start_interval(); |
---|
313 | #endif |
---|
314 | |
---|
315 | for(int i=0;i<RUNS;i++) { |
---|
316 | |
---|
317 | #ifdef PAPI |
---|
318 | code_clocker->start_interval(); |
---|
319 | #endif |
---|
320 | |
---|
321 | // convert to bit streams |
---|
322 | for(int i=0,j=0;i<simd_packs;i++,j+=SIMD_REGISTER_BIT_WIDTH) { |
---|
323 | bit_stream_buffer[i] = bytepack2bitblock((BytePack *)(&byte_buffer[j])); |
---|
324 | } |
---|
325 | |
---|
326 | #ifdef BRANCH_REDUCTION |
---|
327 | bit_stream_buffer[simd_packs] = simd<8>::constant<0>(); // initialize the final block |
---|
328 | #endif |
---|
329 | |
---|
330 | #ifdef PAPI |
---|
331 | code_clocker->end_interval(bytes); |
---|
332 | #endif |
---|
333 | |
---|
334 | } |
---|
335 | |
---|
336 | size_t max_span_count = simd_packs * SIMD_REGISTER_BIT_WIDTH/2; |
---|
337 | size_t * starts = new size_t[max_span_count]; |
---|
338 | size_t * lengths = new size_t[max_span_count]; |
---|
339 | size_t span_count = 0; |
---|
340 | |
---|
341 | size_t bit_stream_buffer_length; |
---|
342 | |
---|
343 | //#ifdef PAPI |
---|
344 | // code_clocker->start_interval(); |
---|
345 | //#endif |
---|
346 | |
---|
347 | #ifdef BRANCH_REDUCTION |
---|
348 | bit_stream_buffer_length = simd_packs * sizeof(SIMD_type); |
---|
349 | stream2runs((unsigned char *)bit_stream_buffer, bit_stream_buffer_length, starts, lengths, &span_count); |
---|
350 | #else |
---|
351 | bit_stream_buffer_length = simd_packs * SIMD_REGISTER_BIT_WIDTH / REGISTER_BIT_WIDTH; |
---|
352 | stream2runs((size_t *)bit_stream_buffer, bit_stream_buffer_length, starts, lengths, &span_count); |
---|
353 | #endif |
---|
354 | |
---|
355 | //#ifdef PAPI |
---|
356 | // code_clocker->end_interval(bytes); |
---|
357 | //#endif |
---|
358 | |
---|
359 | // for(int i =0; i<span_count; i++) { |
---|
360 | // printf("Value (%zu,%zu) = ", starts[i], lengths[i]); |
---|
361 | // print_chars(((char *) byte_buffer) + starts[i], lengths[i]); |
---|
362 | // } |
---|
363 | |
---|
364 | // for(int i=0;i<RUNS;i++) { |
---|
365 | // |
---|
366 | // #ifdef PAPI |
---|
367 | // code_clocker->start_interval(); |
---|
368 | // #endif |
---|
369 | |
---|
370 | SymbolTable symbol_table; |
---|
371 | |
---|
372 | int * gids = new int[span_count]; |
---|
373 | |
---|
374 | int gid = 0; |
---|
375 | for(int i =0; i<span_count; i++) { |
---|
376 | gids[i] = symbol_table.Lookup_or_Insert_Name((char *)(byte_buffer + starts[i]), lengths[i]); |
---|
377 | } |
---|
378 | |
---|
379 | cout << "STL Symbol Table: " << endl; |
---|
380 | cout << "DOC_IDX_GID[] => "; |
---|
381 | |
---|
382 | for(int i=0;i<span_count;i++) { |
---|
383 | cout << gids[i] << " "; |
---|
384 | } |
---|
385 | cout << endl << endl; |
---|
386 | |
---|
387 | delete [] gids; |
---|
388 | |
---|
389 | // #ifdef PAPI |
---|
390 | // code_clocker->end_interval(bytes); |
---|
391 | // #endif |
---|
392 | // |
---|
393 | // } |
---|
394 | |
---|
395 | // for(int i=0;i<RUNS;i++) { |
---|
396 | |
---|
397 | |
---|
398 | LSSymbolTable ls_symbol_table; |
---|
399 | for(int i=0; i<span_count; i++) { |
---|
400 | ls_symbol_table.put(byte_buffer + starts[i], lengths[i]); |
---|
401 | } |
---|
402 | |
---|
403 | // #ifdef PAPI |
---|
404 | // code_clocker->start_interval(); |
---|
405 | // #endif |
---|
406 | |
---|
407 | ls_symbol_table.bind(); |
---|
408 | ls_symbol_table.finalize(); |
---|
409 | |
---|
410 | // #ifdef PAPI |
---|
411 | // code_clocker->end_interval(bytes); |
---|
412 | // #endif |
---|
413 | |
---|
414 | cout << "Length Sorted Symbol Table:" << endl; |
---|
415 | ls_symbol_table.display_flattened_symbol_values(); |
---|
416 | ls_symbol_table.display_flattened_gids(); |
---|
417 | ls_symbol_table.clear(); |
---|
418 | // } |
---|
419 | |
---|
420 | delete [] starts; |
---|
421 | delete [] lengths; |
---|
422 | |
---|
423 | if(byte_buffer != NULL) { |
---|
424 | simd_delete((SIMD_type *)byte_buffer); |
---|
425 | } |
---|
426 | |
---|
427 | if(bit_stream_buffer != NULL) { |
---|
428 | simd_delete(bit_stream_buffer); |
---|
429 | } |
---|
430 | |
---|
431 | if(infile != NULL) { |
---|
432 | fclose(infile); |
---|
433 | } |
---|
434 | |
---|
435 | if(argc > 3) { |
---|
436 | fclose(outfile); |
---|
437 | } |
---|
438 | |
---|
439 | #ifdef PAPI |
---|
440 | code_clocker->write_xml_file(); |
---|
441 | code_clocker->display_system_info(); |
---|
442 | code_clocker->display_raw_event_data(); |
---|
443 | delete code_clocker; |
---|
444 | #endif |
---|
445 | |
---|
446 | fprintf(stdout, "Done.\n"); |
---|
447 | |
---|
448 | return(0); |
---|
449 | } |
---|