source: proto/s2k/trunk/framework/input/templates/cpplang/push_grep.template @ 4043

Last change on this file since 4043 was 4043, checked in by ksherdy, 5 years ago

Initial check-in. Needs clean-up.

File size: 12.0 KB
Line 
1/*
2 * \s2k{} push model template.
3 *
4 * grep exact-string search C++ in \s2k{}.
5 *
6 * Usage:    grep [OPTIONS] <infile> [outfile]
7 *
8 * Warning:
9 *
10 * Template implementation limits line length to SEGMENT_SIZE bytes, i.e.,
11 * 1K on 32-bit architecture with 128-bit SIMD registers, and
12 * 4K on 64-bit with 128-bit SIMD registers.
13 *
14 * Program Description:   
15 *
16 * The \s2k{} demo grep program searches a file for lines containing matches
17 * to the fixed target pattern, `apple'.
18 * The default behaviour of \s2k{} grep is to print all matching lines.
19 *
20 * The flag -o specifies that only 
21 * the matched parts of a matching line are produced, with each
22 * such part on a separate output line.
23 *
24 * The flag -b, specifies that the 0-based byte offset within
25 * the input file before each line of output.
26 
27 * If -b is specified with -o, \s2k{} grep prints the offset of
28 * each match, formatted as `offset:apple', where `apple' is the target
29 * pattern read from memory and offset is the zero-index offset
30 * of the first character the match.
31 *
32 * For example, given an input file containing three occurrences of `apple'
33 * at zero-indexed offsets 47,107, and 189, the \s2k{} grep
34 * application writes the following results to standard output.
35 *
36 * 47:apple
37 * 107:apple
38 * 189:apple
39 *
40 * This behaviour is equivalent to the following GNU grep utility command.
41 *
42 * grep needle filename --only-matching --byte-offset
43 *
44 */
45/*
46 * Template Attributes:
47 *
48 * The \s2k{} compiler and substitutes generated code fragments for
49 * the following predefined template attributes bracked with
50 * hashhashhashamp and hashhashash.
51 *
52 * @global                - Stream structure and filter definitions.
53 *
54 * @struct_decls          - Stream structure declarations.
55 *
56 * @filter_decls          - Stream filter declarations.
57 *
58 * @filter_do_block       - Stream filter `do_block()' calls.
59 *
60 * @filter_do_final_block - Stream filter `do_final_block()' calls.
61 *
62 * @filter_clear          - Stream filter `clear()' calls.
63 */
64 
65/*
66 * ###@warningComment ###
67 */
68 
69#define DEBUG 0
70
71// Runtime directives
72#define BASIS_BITS
73
74// Runtime libraries
75#include <simd-lib/bitblock.hpp>
76#include <simd-lib/runtime.hpp>
77#include <simd-lib/pabloSupport.hpp>
78#include <simd-lib/transpose.hpp>
79#include <simd-lib/mmalloc.hpp>
80
81// C/C++ libraries
82#include <stdio.h>
83#include <stdlib.h>
84#include <string>
85#include <iostream>
86#include <sys/stat.h>
87#include <sys/mman.h>
88
89// mmap
90#include <fcntl.h>
91using namespace std;
92
93// -------------------
94
95template <class bitblock_t, class scanblock_t>
96class BitBlockStreamScanner {
97
98public:
99
100BitBlockStreamScanner():  bitblock_ary(NULL),
101                          bitblock_cnt(-1),
102                          bitblock_idx(-1),
103                          scanblock_idx(-1)
104{
105  remaining._bitblock = simd<1>::constant<0>();
106}
107
108void init(bitblock_t *s, uint32_t size) {
109  bitblock_ary           = s;
110  bitblock_cnt           = size;
111  bitblock_idx           = 0;
112  scanblock_idx          = 0;
113  remaining._bitblock    = bitblock_ary[0];
114}
115
116int scan_to_next() {
117
118  while(1) {
119   
120    for(scanblock_idx; scanblock_idx < (sizeof(bitblock_t)/sizeof(scanblock_t)); scanblock_idx++) {
121      scanblock_t scan_item = remaining._scanblock[scanblock_idx];
122      if(scan_item != 0) {
123        int bit_pos = scan_forward_zeroes(scan_item);
124        scan_item = scan_item & (scan_item - 1);
125        remaining._scanblock[scanblock_idx] = scan_item;
126        return (bitblock_idx * (BLOCK_SIZE)) + (scanblock_idx * sizeof(scanblock_t)*8) + bit_pos;
127      }
128    }
129   
130    scanblock_idx = 0;
131    bitblock_idx++;
132
133    if(bitblock_idx >= bitblock_cnt) {
134      return -1;
135    }
136    remaining._bitblock = bitblock_ary[bitblock_idx];
137  }
138
139}
140private:
141
142  union {  bitblock_t   _bitblock;
143           scanblock_t  _scanblock[sizeof(bitblock_t)/sizeof(scanblock_t)];} remaining;
144
145  bitblock_t * bitblock_ary;
146  uint32_t bitblock_cnt;
147  uint32_t bitblock_idx;
148  uint32_t scanblock_idx; 
149 
150};
151
152// -------------------
153
154// S2K Generated
155###@global ###
156
157// Segment-at-a-time buffered stream processing parameters.
158const int SCANBLOCK_SIZE     = sizeof(ScanWord) * 8;
159const int SCANFIELD_SIZE     = sizeof(ScanWord) * 8;
160const int SEGMENT_BLOCKS     = SCANBLOCK_SIZE * SCANFIELD_SIZE / BLOCK_SIZE;
161const int SEGMENT_SIZE       = SEGMENT_BLOCKS * BLOCK_SIZE;
162
163// // \s2k{} bind
164// ATTRIBUTE_SIMD_ALIGN char buffer[SEGMENT_SIZE];
165// ATTRIBUTE_SIMD_ALIGN BitBlock line_starts[SEGMENT_SIZE/8];
166// ATTRIBUTE_SIMD_ALIGN BitBlock line_ends[SEGMENT_SIZE/8];
167// ATTRIBUTE_SIMD_ALIGN BitBlock match_follows[SEGMENT_SIZE/8];
168
169char * buffer;
170BitBlock * line_starts;
171BitBlock * line_ends;
172BitBlock * match_follows;
173
174
175
176struct pipeline {
177 
178  public:
179 
180  pipeline()
181  {
182    // Initialize struct members
183    basis_bits.bit_7 = simd<1>::constant<0>();
184    basis_bits.bit_6 = simd<1>::constant<0>();
185    basis_bits.bit_5 = simd<1>::constant<0>();
186    basis_bits.bit_4 = simd<1>::constant<0>();
187    basis_bits.bit_3 = simd<1>::constant<0>();
188    basis_bits.bit_2 = simd<1>::constant<0>();
189    basis_bits.bit_1 = simd<1>::constant<0>();
190    basis_bits.bit_0 = simd<1>::constant<0>();
191   
192    lex.a   = simd<1>::constant<0>();
193    lex.p   = simd<1>::constant<0>();
194    lex.l   = simd<1>::constant<0>();
195    lex.e   = simd<1>::constant<0>();
196    lex.LF  = simd<1>::constant<0>();
197   
198    output.match_follows  = simd<1>::constant<0>();
199    output.lines          = simd<1>::constant<0>();
200    output.line_starts    = simd<1>::constant<0>();
201    output.line_ends      = simd<1>::constant<0>();
202  }
203 
204  ~pipeline() { }
205
206  void process(int buffer_size)
207  {
208      int bytes_remaining = buffer_size;
209      int block_index     = 0;
210     
211      while (bytes_remaining >= BLOCK_SIZE) {
212        byte_data = &buffer[block_index * BLOCK_SIZE];
213       
214        // \s2k{} 'do_block()' calls.
215        ###@filter_do_block ###
216 
217         // \s2k{} writes
218        match_follows[block_index]  = output.match_follows;
219        line_starts[block_index]    = output.line_starts;
220        line_ends[block_index]      = output.line_ends;
221         
222        bytes_remaining -= BLOCK_SIZE;
223        block_index++;
224      }   
225
226      // Process a partial block.
227      BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(),
228                                        convert(BLOCK_SIZE - bytes_remaining));
229      byte_data = &buffer[block_index * BLOCK_SIZE];
230         
231      // \s2k{} 'do_final_block()' calls.
232      ###@filter_do_final_block ###
233     
234      // \s2k{} writes
235      match_follows[block_index]  = output.match_follows & EOF_mask;
236      line_starts[block_index]    = output.line_starts & EOF_mask;
237      line_ends[block_index]      = output.line_ends & EOF_mask;
238
239  }
240 
241  private:
242 
243  char * byte_data; /* fix, bind buffer on byte_data */
244   
245// S2K Generated
246###@struct_decls ###
247
248// S2K Generated
249###@filter_decls ###   
250
251};
252
253struct grep {
254
255  public:
256 
257  grep():infile(stdin), outfile(stdout),
258         pattern_size(sizeof("apple")-1),
259         byte_offset(0),
260         pattern_only_matching(0),
261         line_matching(1) {
262  }
263 
264  ~grep() {
265    destroy();
266  }
267 
268  void open_infile(char * infilename) {
269
270    fdSrc = open(infilename, O_RDONLY);
271    if (fdSrc == -1) {
272        fprintf(stderr, "Error: cannot open %s for processing.\n", infilename);
273        exit(-1);
274    }
275    if (fstat(fdSrc, &infile_sb) == -1) {
276        fprintf(stderr, "Error: cannot stat %s for processing.\n", infilename);
277        exit(-1);
278    }
279
280    buffer = (char *) mmap(NULL, infile_sb.st_size, PROT_READ, MAP_PRIVATE, fdSrc, 0);
281    if (infile == MAP_FAILED) {
282        fprintf(stderr, "Error: mmap of %s failure.\n", infilename);
283        exit(-1);
284    }
285  }
286 
287  int read() {
288 
289    int buffer_size = 0;
290 
291    if((buffer_size = infile_sb.st_size) > 0 ) {
292      allocate();
293      return buffer_size;
294    };
295   
296    return buffer_size;
297  }
298 
299  void open_outfile(char * outfilename) {
300    outfile = fopen(outfilename, "wb");
301    if (!outfile) {
302      fprintf(stderr, "Error: cannot open %s.\n", outfilename);
303      exit(-1);
304    } 
305  }
306 
307  void close_infile() {
308    close(fdSrc);
309  }
310 
311  void close_outfile() {
312    if(outfile) { fclose(outfile); outfile=NULL;}
313  }
314 
315  void do_byte_offset() {
316    byte_offset = 1;
317  }
318 
319  void do_line_matching() {
320    pattern_only_matching = 0;
321    line_matching         = 1;
322  }
323 
324  void do_pattern_only_matching() {
325    pattern_only_matching = 1;
326    line_matching         = 0;
327  }
328 
329  void process(int buffer_size) {
330   
331    // Segment-at-a-time variables
332    int bytes_remaining = buffer_size;
333    int block_index     = 0;
334    int block_base      = 0;
335    int block_count     = ((bytes_remaining % BLOCK_SIZE) == 0) // rounddown
336                          ? (bytes_remaining/BLOCK_SIZE)
337                          : (bytes_remaining/BLOCK_SIZE) + 1;
338   
339    BitBlockStreamScanner<BitBlock,ScanWord> matches_scanner;
340    BitBlockStreamScanner<BitBlock,ScanWord> line_starts_scanner;
341    BitBlockStreamScanner<BitBlock,ScanWord> line_ends_scanner;
342   
343    matches_scanner.init(match_follows, block_count);
344    line_starts_scanner.init(line_starts, block_count);
345    line_ends_scanner.init(line_ends, block_count);
346     
347     if(pattern_only_matching) {
348       int match_offset = 0;
349       while((match_offset = matches_scanner.scan_to_next()) > -1) {
350         match_offset -= pattern_size;
351         if(byte_offset) {
352           int match_stream_offset = block_base + match_offset;
353           fprintf(outfile, "%d:", match_stream_offset);
354         }
355
356         fwrite(&buffer[match_offset], 1, pattern_size, outfile); // lookahead
357         fprintf(outfile, "\n");
358   
359       }
360     } 
361
362     if(line_matching) {
363       int line_start_offset = 0;
364       int line_end_offset = 0;
365       
366       while( ((line_start_offset = line_starts_scanner.scan_to_next()) > -1)
367            &&
368              ((line_end_offset = line_ends_scanner.scan_to_next()) > -1)
369              ) {
370             
371         if(byte_offset) {
372           fprintf(outfile, "%d:", block_base + line_start_offset,
373                    block_base + line_end_offset);
374         }
375 
376        fwrite(&buffer[line_start_offset], 1,
377               line_end_offset - line_start_offset + 1, outfile);
378       }
379   
380     }
381     
382  }
383 
384  private:
385
386  // \s2k bind helpers
387  void allocate() {
388    line_starts   = simd_malloc<BitBlock>(infile_sb.st_size/8);
389    line_ends     = simd_malloc<BitBlock>(infile_sb.st_size/8);
390    match_follows = simd_malloc<BitBlock>(infile_sb.st_size/8);
391  }
392 
393  void destroy() {
394    simd_free<BitBlock>(line_starts);
395    simd_free<BitBlock>(line_ends);
396    simd_free<BitBlock>(match_follows);
397  }
398
399  int fdSrc;
400  struct stat infile_sb;
401 
402  FILE *infile, * outfile;
403 
404  int pattern_size;
405  int byte_offset;
406  int pattern_only_matching;
407  int line_matching;
408
409};
410
411int main(int argc, char * argv[]) {
412
413  struct pipeline pipeline;
414  struct grep grep;
415
416  int opt_code;
417  while ((opt_code = getopt(argc, argv, "bo?")) != -1) {
418    switch (opt_code) {
419      case 'b':
420        grep.do_byte_offset();
421        break;
422      case 'o':
423        grep.do_pattern_only_matching();
424        break;
425      case '?':
426      default:
427        printf ("Invalid option: %c\n", opt_code);
428        printf("Usage: %s [OPTION] <inputfile> [<outputfile>]\n", argv[0]);
429        printf("\t-b,\tprint the byte offset with output lines\n");
430        printf("\t-o,\tshow only the part of a line matching PATTERN\n");
431        printf("\t-V,\tprint version information and exit");           
432        exit(-1);
433    }
434  }
435 
436  if (optind >= argc) {
437    printf ("Too few arguments\n");
438    printf("Usage: %s [-c] [-v] <regex> <inputfile> [<outputfile>]\n", argv[0]);
439    exit(-1);
440  }
441
442  char * infilename = argv[optind++];
443  grep.open_infile(infilename);
444
445  if(!(optind >= argc)) {
446    char * outfilename = argv[optind++];
447    if (optind != argc) {
448      printf ("Too many arguments\n");
449      printf("Usage: %s [-c] [-v] <regex> <inputfile> [<outputfile>]\n", argv[0]);
450      exit(-1);
451    }
452    grep.open_outfile(outfilename);
453  }
454
455
456  grep.open_infile(infilename);
457  int segment_bytes = 0;
458  if( (segment_bytes = grep.read()) > 0) {
459    pipeline.process(segment_bytes);
460    grep.process(segment_bytes);
461  }
462  grep.close_infile();
463 
464  return 0;
465}
Note: See TracBrowser for help on using the repository browser.