source: proto/s2k/trunk/framework/input/templates/cpplang/grep.template @ 4031

Last change on this file since 4031 was 4031, checked in by ksherdy, 5 years ago

Refactored grep template for bit-space / byte-space compilation strategy.

File size: 13.6 KB
Line 
1/*
2 * grep exact-string search C++ in \s2k{}.
3 *
4 * Usage:    grep [OPTIONS] <infile> [outfile]
5 *
6 * Warning:
7 *
8 * Template implementation limits line length to SEGMENT_SIZE bytes, i.e.,
9 * 1K on 32-bit architecture with 128-bit SIMD registers, and
10 * 4K on 64-bit with 128-bit SIMD registers.
11 *
12 * Program Description:   
13 *
14 * The \s2k{} demo grep program searches a file for lines containing matches
15 * to the fixed target pattern, `apple'.
16 * The default behaviour of \s2k{} grep is to print all matching lines.
17 *
18 * The flag -o specifies that only 
19 * the matched parts of a matching line are produced, with each
20 * such part on a separate output line.
21 *
22 * The flag -b, specifies that the 0-based byte offset within
23 * the input file before each line of output.
24 
25 * If -b is specified with -o, \s2k{} grep prints the offset of
26 * each match, formatted as `offset:apple', where `apple' is the target
27 * pattern read from memory and offset is the zero-index offset
28 * of the first character the match.
29 *
30 * For example, given an input file containing three occurrences of `apple'
31 * at zero-indexed offsets 47,107, and 189, the \s2k{} grep
32 * application writes the following results to standard output.
33 *
34 * 47:apple
35 * 107:apple
36 * 189:apple
37 *
38 * This behaviour is equivalent to the following GNU grep utility command.
39 *
40 * grep needle filename --only-matching --byte-offset
41 *
42 */
43/*
44 * Template Attributes:
45 *
46 * The \s2k{} compiler and substitutes generated code fragments for
47 * the following predefined template attributes bracked with
48 * hashhashhashamp and hashhashash.
49 *
50 * @global                - Stream structure and filter definitions.
51 *
52 * @struct_decls          - Stream structure declarations.
53 *
54 * @filter_decls          - Stream filter declarations.
55 *
56 * @filter_do_block       - Stream filter `do_block()' calls.
57 *
58 * @filter_do_final_block - Stream filter `do_final_block()' calls.
59 *
60 * @filter_clear          - Stream filter `clear()' calls.
61 */
62 
63/*
64 * ###@warningComment ###
65 */
66 
67#define DEBUG 0
68
69// Runtime directives
70#define BASIS_BITS
71
72// Runtime libraries
73#include <simd-lib/bitblock.hpp>
74#include <simd-lib/runtime.hpp>
75#include <simd-lib/pabloSupport.hpp>
76#include <simd-lib/bitblock_iterator.hpp>
77#include <simd-lib/transpose.hpp>
78
79// C/C++ libraries
80#include <stdio.h>
81#include <stdlib.h>
82#include <string>
83#include <iostream>
84using namespace std;
85
86// S2K Generated
87###@global ###
88
89// Segment-at-a-time buffered stream processing parameters.
90const int SCANBLOCK_SIZE     = sizeof(ScanWord) * 8;
91const int SCANFIELD_SIZE     = sizeof(ScanWord) * 8;
92const int SEGMENT_BLOCKS     = SCANBLOCK_SIZE * SCANFIELD_SIZE / BLOCK_SIZE;
93const int SEGMENT_SIZE       = SEGMENT_BLOCKS * BLOCK_SIZE;
94
95const char * fixed_pattern   = "apple";
96const int pattern_size       = strlen(fixed_pattern);
97
98int main(int argc, char * argv[]) {
99
100  char * infilename, * outfilename;
101  FILE * infile, * outfile;
102
103  int opt_code;
104  int byte_offset             = 0;
105  int pattern_only_matching   = 0;
106  int line_matching           = 1;   
107
108  while ((opt_code = getopt(argc, argv, "bo?")) != -1) {
109    switch (opt_code) {
110      case 'b':
111        byte_offset = 1;
112        break;
113      case 'o':
114        pattern_only_matching = 1;
115        line_matching = 0;
116        break;
117      case '?':
118      default:
119        printf ("Invalid option: %c\n", opt_code);
120        printf("Usage: %s [OPTION] <inputfile> [<outputfile>]\n", argv[0]);
121        printf("\t-b,\tprint the byte offset with output lines\n");
122        printf("\t-o,\tshow only the part of a line matching PATTERN\n");
123        printf("\t-V,\tprint version information and exit");           
124        exit(-1);
125    }
126  }
127 
128  if (optind >= argc) {
129    printf ("Too few arguments\n");
130    printf("Usage: %s [-c] [-v] <regex> <inputfile> [<outputfile>]\n", argv[0]);
131    exit(-1);
132  }
133
134  infilename = argv[optind++];
135  infile = fopen(infilename, "rb");
136  if (!infile) {
137    fprintf(stderr, "Error: cannot open %s.\n", infilename);
138    exit(-1);
139  }
140
141  if(optind >= argc) {
142      outfile = stdout;
143  } else {
144    outfilename = argv[optind++];
145    if (optind != argc) {
146      printf ("Too many arguments\n");
147      printf("Usage: %s [-c] [-v] <regex> <inputfile> [<outputfile>]\n", argv[0]);
148      exit(-1);
149    }
150    outfile = fopen(outfilename, "wb");
151    if (!outfile) {
152      fprintf(stderr, "Error: cannot open %s.\n", outfilename);
153      exit(-1);
154    }
155  }
156
157  // { // ___loop_preheader___
158
159
160// S2K Generated
161###@struct_decls ###
162
163// Initialize struct members
164basis_bits.bit_7 = simd<1>::constant<0>();
165basis_bits.bit_6 = simd<1>::constant<0>();
166basis_bits.bit_5 = simd<1>::constant<0>();
167basis_bits.bit_4 = simd<1>::constant<0>();
168basis_bits.bit_3 = simd<1>::constant<0>();
169basis_bits.bit_2 = simd<1>::constant<0>();
170basis_bits.bit_1 = simd<1>::constant<0>();
171basis_bits.bit_0 = simd<1>::constant<0>();
172
173lex.a = simd<1>::constant<0>();
174lex.p = simd<1>::constant<0>();
175lex.l = simd<1>::constant<0>();
176lex.e = simd<1>::constant<0>();
177lex.LF = simd<1>::constant<0>();
178
179lex.a = simd<1>::constant<0>();
180lex.p = simd<1>::constant<0>();
181lex.l = simd<1>::constant<0>();
182lex.e = simd<1>::constant<0>();
183lex.LF = simd<1>::constant<0>();
184
185output.match_follows = simd<1>::constant<0>();
186output.lines= simd<1>::constant<0>();
187output.line_starts= simd<1>::constant<0>();
188output.line_ends= simd<1>::constant<0>();
189
190// S2K Generated
191###@filter_decls ###
192
193  // \s2k{} transpose.do_block() / transpose.do_final_block()
194  // bind to `char * byte_data' and `struct Basis_bits 'basis' argument names.
195  ATTRIBUTE_SIMD_ALIGN char buffer[SEGMENT_SIZE];
196  char * byte_data = buffer;
197
198  // Iterators
199  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> matches_scanner;
200  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> line_starts_scanner;
201  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> line_ends_scanner;
202
203  // Segment-at-a-time variables
204  int bytes_read              = 0;
205  int bytes_avail             = 0;
206  int bytes_remaining         = 0;
207
208  int copy_back_size          = 0;
209  int copy_back_offset        = 0;
210
211  // Segment-at-a-time offset
212  int block_index             = 0;
213  int segment_base            = 0;
214
215  // Iterator offset
216  int match_offset            = 0;
217  int line_start_offset       = 0;
218  int line_end_offset         = 0;
219
220  int line_final_start_offset = 0;
221  int line_final_end_offset   = 0;
222
223  // { // ___loop_preheader___
224
225
226  while(!feof(infile)) {
227 
228    // __loop_header__ {
229    block_index = 0;
230
231    // initialize scanners
232    matches_scanner.init();
233    line_starts_scanner.init();
234    line_ends_scanner.init(); 
235 
236    bytes_read      = fread(buffer + copy_back_size, 1,
237                        SEGMENT_SIZE - copy_back_size, infile);
238    bytes_avail     = bytes_read + copy_back_size;
239    bytes_remaining = bytes_avail;
240
241    if(ferror(infile)) { perror( "io error" ); exit(1); }
242    // } __loop_header__
243
244
245    // __loop_body__ {
246   
247    // Process full segment
248    if (bytes_remaining == SEGMENT_SIZE) {
249
250      for(block_index = 0; block_index < SEGMENT_BLOCKS; block_index++) {
251
252        byte_data = &buffer[block_index * BLOCK_SIZE];
253       
254        // S2K Generated
255        ###@filter_do_block ###
256
257        if(pattern_only_matching) {
258          matches_scanner.load_block(output.match_follows, block_index);
259        }
260
261        if(line_matching) {
262          line_starts_scanner.load_block(output.line_starts, block_index);
263          line_ends_scanner.load_block(output.line_ends, block_index);
264        }
265      }
266
267      if(pattern_only_matching) {
268        while(matches_scanner.has_next()) {
269          match_offset = matches_scanner.scan_to_next() - pattern_size;
270          if(byte_offset) {
271            int match_stream_offset = segment_base + match_offset;
272            fprintf(outfile, "%d:", match_stream_offset);
273          }
274
275          fwrite(&buffer[match_offset], 1, pattern_size, outfile); // lookahead
276          //fprintf(outfile, "%s\n", fixed_pattern);
277          fprintf(outfile, "\n");
278        }
279
280        copy_back_size      = pattern_size + 1;
281        copy_back_offset    = bytes_avail - copy_back_size;
282      }
283 
284      if(line_matching) {
285
286        assert(("Input line length exceeds segment size.",
287          line_ends_scanner.has_next() && line_starts_scanner.has_next()));
288        line_final_start_offset = line_starts_scanner.get_final_pos();
289        line_final_end_offset = line_ends_scanner.get_final_pos();
290
291        while(line_starts_scanner.has_next() && line_ends_scanner.has_next()) {
292
293          line_start_offset  = line_starts_scanner.scan_to_next();
294          line_end_offset    = line_ends_scanner.scan_to_next();
295
296          if(byte_offset) {
297            fprintf(outfile, "%d:", segment_base + line_start_offset);
298          }
299
300          fwrite(&buffer[line_start_offset], 1,
301            line_end_offset - line_start_offset + 1, outfile);
302        }
303
304        copy_back_offset = (line_final_start_offset > line_final_end_offset)
305                         ? line_final_start_offset : (line_final_end_offset + 1);
306        copy_back_size   = bytes_avail - copy_back_offset;
307
308        assert(("copy_back_offset", (copy_back_offset >= 0)));
309        assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));
310        assert(("copy_back_size", (copy_back_size >= 0)));
311        assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));
312      }
313     
314      bytes_remaining -= SEGMENT_SIZE;
315    }
316   
317    // Process a partial segment.
318    if(bytes_remaining > 0) {
319
320      while (bytes_remaining >= BLOCK_SIZE) {
321        byte_data = &buffer[block_index * BLOCK_SIZE];
322       
323        // Compiler 'do_block()' calls.
324        ###@filter_do_block ###
325 
326        if(pattern_only_matching) {
327          matches_scanner.load_block(output.match_follows, block_index);
328        }       
329       
330        if(line_matching) {
331          line_starts_scanner.load_block(output.line_starts, block_index);
332          line_ends_scanner.load_block(output.line_ends, block_index);
333        }
334 
335        bytes_remaining -= BLOCK_SIZE;
336        block_index++;
337       }   
338
339       // Process a partial block.
340       BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(),
341                                        convert(BLOCK_SIZE - bytes_remaining));
342       byte_data = &buffer[block_index * BLOCK_SIZE];
343         
344       // Compiler 'do_final_block()' calls.
345       ###@filter_do_final_block ###
346       
347       if(pattern_only_matching) {
348          matches_scanner.load_block(output.match_follows & EOF_mask, block_index);
349       }
350
351       if(line_matching) {   
352         line_starts_scanner.load_block(output.line_starts & EOF_mask, block_index);
353         line_ends_scanner.load_block(output.line_ends & EOF_mask, block_index);           
354       }
355 
356       if(pattern_only_matching) {
357         while(matches_scanner.has_next()) {
358           match_offset = matches_scanner.scan_to_next() - pattern_size;
359           if(byte_offset) {
360             int match_stream_offset = segment_base + match_offset;
361             fprintf(outfile, "%d:", match_stream_offset);
362           }
363
364           fwrite(&buffer[match_offset], 1, pattern_size, outfile); // lookahead
365           fprintf(outfile, "\n");
366         }
367   
368          copy_back_size      = pattern_size + 1;
369          copy_back_offset    = bytes_avail - copy_back_size;
370       }
371       
372       if(line_matching) {
373         assert(("Input line exceeds segment size.",
374                  line_ends_scanner.has_next() &&
375                  line_starts_scanner.has_next()));
376         line_final_start_offset = line_starts_scanner.get_final_pos();
377         line_final_end_offset = line_ends_scanner.get_final_pos();
378       
379        while(line_starts_scanner.has_next() && line_ends_scanner.has_next()) {
380          line_start_offset  = line_starts_scanner.scan_to_next();
381          line_end_offset    = line_ends_scanner.scan_to_next();
382   
383          if(byte_offset) {
384            fprintf(outfile, "%d:", segment_base + line_start_offset,
385                     segment_base + line_end_offset);
386          }
387   
388          fwrite(&buffer[line_start_offset], 1,
389                 line_end_offset - line_start_offset + 1, outfile);
390        }
391   
392        copy_back_offset   = (line_final_start_offset > line_final_end_offset)
393                              ? line_final_start_offset
394                              : (line_final_end_offset + 1);
395                             
396        copy_back_size     = bytes_avail - copy_back_offset;
397   
398        assert(("copy_back_offset", (copy_back_offset >= 0)));
399        assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));
400        assert(("copy_back_size", (copy_back_size >= 0)));
401        assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));           
402      }
403    }
404
405
406    // __loop_tail__ {
407    // S2K Generated 'clear()' calls.
408    ###@filter_clear ###
409
410    memmove(&buffer[0], &buffer[copy_back_offset], copy_back_size);
411
412    // segment_base()
413    segment_base += bytes_avail;
414    segment_base -= copy_back_size;
415    // } __loop_tail__
416
417
418  }
419
420
421  // __loop_exit__ {
422  if(infile) { fclose(infile); infile=NULL;}
423  if(outfile) { fclose(outfile); outfile=NULL;}
424  // } __loop_exit__
425
426
427  return 0;
428}
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458/*
459 * Segment-at-a-time Processing Issues:
460 *
461 * 1. Start-of-stream or equivalently start-of-segment clear()...
462 *    The ScanTo and EOF_mask processing ensure a 'fence post' at the
463 *    end of each segment as well as at the end of file.
464 *
465 * 'grep.py' and 'grep_template.cpp' are tightly coupled on a number of variables:
466 *
467 * 1. Tranpose.do_block expects 'byte_data' and 'basis_bits'.
468 * 2. All StreamFunction.do_final_block methods expect 'EOF_mask'.
469 * 3. Sequential iterators (scanners) expect 'output.matches'.
470 * 4. Sequential iterators (scanners) expect 'lex.LF'.
471 *
472 */
Note: See TracBrowser for help on using the repository browser.