source: proto/s2k/trunk/framework/input/templates/cpplang/grep.template @ 4025

Last change on this file since 4025 was 4025, checked in by ksherdy, 5 years ago

Reverted grep template.

File size: 14.0 KB
Line 
1/**
2 * grep static fixed string search C++ template.
3 *
4 * Author:   Ken Herdy
5 *
6 * Usage: ./grep <infile> [-o outfile]
7 *
8 * Description: Segment-at-a-time template. Implements a line-at-a-time copyback processing model.
9 *
10 * '_offset' => 0-based byte offsets
11 * '_pos'    => 1-based bytes offsets
12 *
13 * Future Work:
14 *
15 * a. Lookahead.
16 * b. Support multiple segments a.k.a. buffer-at-a-time.
17 * c. Compile in s2k. <--
18 * d. s2k iterators for (start, follow) in (starts_stream, follows_stream) do { }
19 *
20 * Segment-at-a-time Processing Issues:
21 *
22 * 1. Start-of-stream or equivalently start-of-segment clear()...
23 *    The ScanTo and EOF_mask processing ensure a 'fence post' at the
24 *    end of each segment as well as at the end of file.
25 *
26 * 'grep.py' and 'grep_template.cpp' are tightly coupled on a number of variables:
27 *
28 * 1. Tranpose.do_block expects 'byte_data' and 'basis_bits'.
29 * 2. All StreamFunction.do_final_block methods expect 'EOF_mask'.
30 * 3. Sequential iterators (scanners) expect 'output.matches'.
31 * 4. Sequential iterators (scanners) expect 'lex.LF'.
32 *
33 **/
34
35#define DEBUG 0
36
37#define DEBUG 0
38
39// Runtime directives
40#define BASIS_BITS
41
42// Runtime libraries
43#include <simd-lib/bitblock.hpp>
44#include <simd-lib/runtime.hpp>
45#include <simd-lib/pabloSupport.hpp>
46#include <simd-lib/bitblock_iterator.hpp>
47#include <simd-lib/transpose.hpp>
48
49// C/C++ libraries
50#include <stdio.h>
51#include <stdlib.h>
52#include <string>
53#include <iostream>
54using namespace std;
55
56
57// Fixed pattern.
58const char * fixed_pattern  = "apple";
59const int pattern_size      = strlen(fixed_pattern);
60
61// Platform dependent definitions. // Example only, defined in simd-lib/builtins.hpp.
62typedef __m128i BitBlock;
63typedef BitBlock BytePack;
64typedef uint32_t ScanWord;
65
66// Segment-at-a-time buffered stream processing parameters.
67const int SCANBLOCK_SIZE   = sizeof(ScanWord) * 8;
68const int SCANFIELD_SIZE   = sizeof(ScanWord) * 8;
69// const int BLOCK_SIZE       =  sizeof(BitBlock) * 8;
70const int SEGMENT_BLOCKS   = SCANBLOCK_SIZE * SCANFIELD_SIZE / BLOCK_SIZE;
71const int SEGMENT_SIZE     = SEGMENT_BLOCKS * BLOCK_SIZE;
72
73//const int CACHE_SIZE       = 32768;
74//const int BUFFER_SEGMENTS  = CACHE_SIZE / SEGMENT_SIZE;
75//const int BUFFER_SIZE      = BUFFER_SEGMENTS * SEGMENT_SIZE; // SEGMENT_SIZE; //
76
77// @ global - Parameter replaced with C++ translation of stream structs
78//            and struct functions definitions
79###@global ###
80
81// @ decl - Replaced with a set of C++ stream struct declarations.
82###@struct_decls ###
83
84  // S2K Generated
85###@filter_decls ###
86
87// @ stream_stmts - Replaced with C++ stream functions declarations.
88//@ stream_stmts
89
90// Segment-at-a-time parameters.
91int bytes_read              = 0;
92int bytes_avail             = 0;  // wrt current segment
93int bytes_remaining         = 0;
94
95int copy_back_size          = 0;
96int copy_back_offset        = 0;
97
98int block_index             = 0;
99int block_base              = 0;
100
101//int segment_index           = 0; // segment index wrt current buffer  // unused
102//int segment_base            = 0; // segment offset wrt current buffer // unused
103
104int stream_base             = 0;
105
106int match_offset            = 0; // 0-based
107int line_start_offset       = 0; 
108int line_end_offset         = 0;
109
110int line_final_start_offset = 0;
111int line_final_end_offset   = 0;
112
113int main(int argc, char * argv[]) {
114   
115    char * infilename, * outfilename;
116    FILE * infile, * outfile;
117       
118    int opt_code;
119    int byte_offset             = 0;
120    int count_only_option       = 0;
121    int only_matching           = 0;
122    int print_version_option    = 0;
123   
124    while ((opt_code = getopt(argc, argv, "bcov?")) != -1) {
125        switch (opt_code) {
126        case 'b':
127            byte_offset = 1;
128            break;
129        case 'c':
130            count_only_option = 1;
131            printf("Not implemented.");
132            break;
133        case 'o':
134            only_matching = 1;
135            break;           
136        case 'v':
137            print_version_option = 1;
138            break;
139        case '?':
140            printf("Usage: %s [-c] [-v] <inputfile> [<outputfile>]\n", argv[0]);
141            exit(-1);
142            break;
143        default:
144            printf ("Invalid option: %c\n", opt_code);
145            printf("Usage: %s [-c] [-v] <inputfile> [<outputfile>]\n", argv[0]);
146            exit(-1);
147        }
148    }
149   
150  if (optind >= argc) {
151    printf ("Too few arguments\n");
152    printf("Usage: %s [-c] [-v] <regex> <inputfile> [<outputfile>]\n", argv[0]);
153    exit(-1);
154  }
155     
156  infilename = argv[optind++];
157  infile = fopen(infilename, "rb");
158  if (!infile) {
159      fprintf(stderr, "Error: cannot open %s.\n", infilename);
160      exit(-1);
161  }
162
163  if(optind >= argc) {
164      outfile = stdout;
165  } else {
166      outfilename = argv[optind++];
167      if (optind != argc) {
168          printf ("Too many arguments\n");
169          printf("Usage: %s [-c] [-v] <regex> <inputfile> [<outputfile>]\n", argv[0]);
170          exit(-1);
171      }
172      outfile = fopen(outfilename, "wb");
173      if (!outfile) {
174          fprintf(stderr, "Error: cannot open %s.\n", outfilename);
175          exit(-1);
176      }
177  }
178
179  if(print_version_option) {
180      fprintf(outfile, "grep static fixed-string parallel bit streams.: March 2014\n");
181  }
182
183  ATTRIBUTE_SIMD_ALIGN char buffer[SEGMENT_SIZE];
184  // Pablo transpose.do_block(), transpose.do_final_block()
185  // expect 'byte_data' and 'basis' names as input and output arguments.
186  char * byte_data = buffer;
187
188  // Scanners
189  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> matches_scanner;
190  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> line_starts_scanner;
191  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> line_ends_scanner;
192
193  // Segment-at-a-time processing.
194  while(!feof(infile)) {
195    // Read Stream in SEGMENT_SIZE - strlen("needle") byte chunks.
196    bytes_read      = fread(buffer + copy_back_size, 1, SEGMENT_SIZE - copy_back_size, infile);
197    bytes_avail     = bytes_read + copy_back_size;
198    bytes_remaining = bytes_avail;
199   
200//    if(feof(infile))
201//        && (0 == bytes_remaining)) {
202//        if(infile) { fclose(infile); infile=NULL;}
203//        if(outfile) { fclose(outfile); outfile=NULL;}
204//        break;
205//    }
206   
207    if(ferror(infile)) { perror( "io error" ); exit(1); }
208
209    // Process full segment.
210   
211    //assert(("fread exceeded segment size.", bytes_avail <= SEGMENT_SIZE));
212   
213    if (bytes_remaining == SEGMENT_SIZE) { // (bytes_remaining >= SEGMENT_SIZE)
214   
215      block_base      = 0;     
216       
217      if(only_matching) { 
218        matches_scanner.init();
219      }
220     
221      if(!only_matching) {
222        line_starts_scanner.init();
223        line_ends_scanner.init();
224      }
225
226      for(block_index = 0;
227          block_index < SEGMENT_BLOCKS;
228          block_index++, block_base+=BLOCK_SIZE) {
229
230        byte_data = &buffer[block_base];
231       
232        //Compiled to 'do_block()' calls.
233        ###@filter_do_block ###
234
235        if(only_matching) { 
236          matches_scanner.load_block(output.match_follows, block_index);
237        }
238
239        if(!only_matching) {
240            line_starts_scanner.load_block(output.line_starts, block_index);
241            line_ends_scanner.load_block(output.line_ends, block_index);
242        }
243      }
244   
245      if(only_matching) {
246        while(matches_scanner.has_next()) {
247          match_offset = matches_scanner.scan_to_next() - pattern_size;
248          if(byte_offset) {
249              int match_stream_offset = stream_base + match_offset;
250              fprintf(outfile, "%d:", match_stream_offset);
251          }
252         
253          // KH: Lookahead.
254          fwrite(&buffer[match_offset], 1, pattern_size, outfile);
255         
256          //fprintf(outfile, "%s\n", fixed_pattern);
257          fprintf(outfile, "\n");
258        }
259       
260        copy_back_size      = pattern_size + 1;           
261        copy_back_offset    = bytes_avail - copy_back_size;           
262      }
263 
264      if(!only_matching) {
265
266        assert(("Line length exceeds segment size.", line_ends_scanner.has_next() && line_starts_scanner.has_next())); 
267         
268        //if(has_line_start) {
269            line_final_start_offset = line_starts_scanner.get_final_pos();
270        //}
271        //if(has_line_end) {
272            line_final_end_offset = line_ends_scanner.get_final_pos();
273        //}
274           
275        // if(!has_line_start && !has_line_end) {/* Set flag to buffer entire segment. */;}
276         
277        while(line_starts_scanner.has_next() && line_ends_scanner.has_next()) {
278             
279          line_start_offset  = line_starts_scanner.scan_to_next();
280          line_end_offset    = line_ends_scanner.scan_to_next();
281             
282          if(byte_offset) {
283            fprintf(outfile, "%d:", stream_base + line_start_offset);
284          }
285             
286          fwrite(&buffer[line_start_offset], 1, line_end_offset - line_start_offset + 1, outfile);
287        }
288       
289        copy_back_offset   = (line_final_start_offset > line_final_end_offset) ? line_final_start_offset : (line_final_end_offset + 1) ;
290        copy_back_size     = bytes_avail - copy_back_offset;   
291
292        assert(("copy_back_offset", (copy_back_offset >= 0)));
293        assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));           
294        assert(("copy_back_size", (copy_back_size >= 0)));
295        assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));
296       
297      }
298     
299      bytes_remaining -= SEGMENT_SIZE;
300    }
301   
302    // Process a partial segment.
303    if(bytes_remaining > 0) {
304               
305        block_index = 0;   
306        block_base  = 0;   
307       
308        if(only_matching) {
309          matches_scanner.init();
310        }
311       
312        if(!only_matching) {
313          line_starts_scanner.init();
314          line_ends_scanner.init();
315        }
316       
317        // Process full blocks.
318        while (bytes_remaining >= BLOCK_SIZE) {
319          byte_data = &buffer[block_base];
320         
321          // Compiler 'do_block()' calls.
322          ###@filter_do_block ###
323   
324          if(only_matching) {
325            matches_scanner.load_block(output.match_follows, block_index);
326          }       
327         
328          if(!only_matching) {
329            line_starts_scanner.load_block(output.line_starts, block_index);
330            line_ends_scanner.load_block(output.line_ends, block_index);
331          }
332   
333          block_base += BLOCK_SIZE;
334          bytes_remaining -= BLOCK_SIZE;
335          block_index++;
336       }
337
338       // Process a partial final block. // KH: Not required.     
339       BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE - bytes_remaining));   
340       byte_data = &buffer[block_base];
341         
342       // Compiler 'do_final_block()' calls.
343       ###@filter_do_final_block ###
344       
345       if(only_matching) {
346          matches_scanner.load_block(output.match_follows & EOF_mask, block_index);
347       }
348         
349       if(!only_matching) {   
350         line_starts_scanner.load_block(output.line_starts & EOF_mask, block_index);
351         line_ends_scanner.load_block(output.line_ends & EOF_mask, block_index);           
352       }
353
354       if(only_matching) {
355          while(matches_scanner.has_next()) {
356            match_offset = matches_scanner.scan_to_next() - pattern_size;
357            if(byte_offset) {
358                int match_stream_offset = stream_base + match_offset;
359                fprintf(outfile, "%d:", match_stream_offset);
360            }
361           
362            // KH: Lookahead.
363            fwrite(&buffer[match_offset], 1, pattern_size, outfile);
364            fprintf(outfile, "\n");
365          }
366         
367          copy_back_size      = pattern_size + 1;           
368          copy_back_offset    = bytes_avail - copy_back_size;             
369        }
370     
371        if(!only_matching) {
372           
373            assert(("Line length exceeds segment size.", line_ends_scanner.has_next() && line_starts_scanner.has_next())); 
374                       
375            //if(has_line_start) {
376                line_final_start_offset = line_starts_scanner.get_final_pos();
377            //}
378            //if(has_line_end) {
379                line_final_end_offset = line_ends_scanner.get_final_pos();
380            //}
381               
382            // if(!has_line_start && !has_line_end) {/* Set flag to buffer entire segment. */;}
383   
384            while(line_starts_scanner.has_next() && line_ends_scanner.has_next()) {
385               
386                line_start_offset  = line_starts_scanner.scan_to_next();
387                line_end_offset    = line_ends_scanner.scan_to_next();
388               
389                if(byte_offset) {
390                    fprintf(outfile, "%d:", stream_base + line_start_offset);
391                }
392               
393                fwrite(&buffer[line_start_offset], 1, line_end_offset - line_start_offset + 1, outfile);
394            }
395
396            copy_back_offset   = (line_final_start_offset > line_final_end_offset) ? line_final_start_offset : (line_final_end_offset + 1) ;
397            copy_back_size     = bytes_avail - copy_back_offset;
398           
399            assert(("copy_back_offset", (copy_back_offset >= 0)));
400            assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));           
401            assert(("copy_back_size", (copy_back_size >= 0)));
402            assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));
403           
404        }       
405    }
406   
407    if(DEBUG) {
408        printf("bytes_avail: %d\n", bytes_avail);
409        printf("bytes_remaining: %d\n", bytes_remaining);
410        printf("copy_back_offset: %d\n", copy_back_offset);
411        printf("copy_back_size: %d\n", copy_back_size);       
412        printf("final_line_starts_offset: %d\n", line_final_start_offset);
413        printf("final_line_ends_offset: %d\n", line_final_end_offset);
414    }
415   
416    memmove(&buffer[0], &buffer[copy_back_offset], copy_back_size);
417   
418    // pablo.ScanToFirst() must clear carry-in at the start of each segment
419    classifyBytes.clear();
420    match.clear();
421    matchLines.clear();
422   
423    stream_base += bytes_avail;
424    stream_base -= copy_back_size;
425
426  }
427
428  if(infile) { fclose(infile); infile=NULL;}
429  if(outfile) { fclose(outfile); outfile=NULL;}
430 
431  return 0;
432}
Note: See TracBrowser for help on using the repository browser.