source: proto/s2k/trunk/framework/input/templates/cpplang/grep.template @ 3796

Last change on this file since 3796 was 3796, checked in by ksherdy, 5 years ago

Clean up.

File size: 12.7 KB
Line 
1//
2// grep static string search C++ template.
3//
4// Author:  Ken Herdy
5//
6// Usage: ./grep <infile> [-o outfile]
7//
8// Description: Segment-at-a-time template.
9//
10// Implements a line-at-a-time or fixed length match copyback processing model.
11//
12// This solution is limited. It does not propagate information across
13// inter-segment of EOF boundaries. A more processing model must
14// address this limitation using any_carry() or correctly handling
15// maximum scanner line length.
16//
17// (c) 2014 Ken Herdy
18// All rights reserved.
19// Licensed to International Characters, Inc. under Academic Free License 3.0
20//
21// ###@warningComment ###
22//
23//////////////////////////////////////////////////////////////////////////////////////////
24
25#define DEBUG 0
26
27// runtime directives
28#define BASIS_BITS
29
30// runtime libraries
31#include <simd-lib/bitblock.hpp>
32#include <simd-lib/carrySet.hpp>
33#include <simd-lib/pabloSupport.hpp>
34#include <simd-lib/bitblock_iterator.hpp>
35#include <simd-lib/transpose.hpp>
36
37// C/C++
38#include <stdio.h>
39#include <stdlib.h>
40#include <string>
41#include <iostream>
42using namespace std;
43
44// S2K Generated
45###@global ###
46
47// Segment-at-a-time buffered stream processing parameters.
48const int SCANBLOCK_SIZE   = sizeof(ScanWord) * 8;
49const int SCANFIELD_SIZE   = sizeof(ScanWord) * 8;
50const int SEGMENT_BLOCKS   = SCANBLOCK_SIZE * SCANFIELD_SIZE / BLOCK_SIZE;
51const int SEGMENT_SIZE     = SEGMENT_BLOCKS * BLOCK_SIZE;
52//const int CACHE_SIZE       = 32768;
53//const int BUFFER_SEGMENTS  = CACHE_SIZE / SEGMENT_SIZE;
54//const int BUFFER_SIZE      = BUFFER_SEGMENTS * SEGMENT_SIZE; // SEGMENT_SIZE; //
55
56const char * fixed_pattern  = "apple";
57const int pattern_size      = strlen(fixed_pattern);
58
59int main(int argc, char * argv[]) {
60
61    char * infilename, * outfilename;
62    FILE * infile, * outfile;
63
64    int opt_code;
65    int byte_offset             = 0;
66    int count_only_option       = 0;
67    int only_matching           = 0;
68    int print_version_option    = 0;
69
70    while ((opt_code = getopt(argc, argv, "bcov?")) != -1) {
71        switch (opt_code) {
72        case 'b':
73            byte_offset = 1;
74            break;
75        case 'c':
76            count_only_option = 1;
77            printf("Not implemented.");
78            break;
79        case 'o':
80            only_matching = 1;
81            break;
82        case 'v':
83            print_version_option = 1;
84            break;
85        case '?':
86            printf("Usage: %s [-c] [-v] <inputfile> [<outputfile>]\n", argv[0]);
87            exit(-1);
88            break;
89        default:
90            printf ("Invalid option: %c\n", opt_code);
91            printf("Usage: %s [-c] [-v] <inputfile> [<outputfile>]\n", argv[0]);
92            exit(-1);
93        }
94    }
95
96  if (optind >= argc) {
97    printf ("Too few arguments\n");
98    printf("Usage: %s [-c] [-v] <regex> <inputfile> [<outputfile>]\n", argv[0]);
99    exit(-1);
100  }
101
102  infilename = argv[optind++];
103  infile = fopen(infilename, "rb");
104  if (!infile) {
105      fprintf(stderr, "Error: cannot open %s.\n", infilename);
106      exit(-1);
107  }
108
109  if(optind >= argc) {
110      outfile = stdout;
111  } else {
112      outfilename = argv[optind++];
113      if (optind != argc) {
114          printf ("Too many arguments\n");
115          printf("Usage: %s [-c] [-v] <regex> <inputfile> [<outputfile>]\n", argv[0]);
116          exit(-1);
117      }
118      outfile = fopen(outfilename, "wb");
119      if (!outfile) {
120          fprintf(stderr, "Error: cannot open %s.\n", outfilename);
121          exit(-1);
122      }
123  }
124
125  if(print_version_option) {
126      fprintf(outfile, "grep static fixed-string parallel bit streams.: March 2014\n");
127  }
128
129  // loop_preheader()
130  // {
131
132  // Pablo transpose.do_block(), transpose.do_final_block()
133  // expect char * 'byte_data' and struct Basis_bits 'basis'
134  // names as input and output arguments.
135  ATTRIBUTE_SIMD_ALIGN char buffer[SEGMENT_SIZE];
136  char * byte_data = buffer;
137
138  // Input / Output Streams or Buffers // AST Decls SOURCE SINK
139  //
140  //
141
142  // Stream struct declarations. // AST Decls
143
144// S2K Generated
145###@struct_decls ###
146
147// S2K Generated
148###@kernel_decls ###
149
150  // Iterators. // AST Decls ?
151  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> matches_scanner;
152  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> line_starts_scanner;
153  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> line_ends_scanner;
154
155  // Segment-at-a-time control
156  int bytes_read              = 0;
157  int bytes_avail             = 0;
158  int bytes_remaining         = 0;
159
160  int copy_back_size          = 0;
161  int copy_back_offset        = 0;
162
163  // Segment-at-a-time offset
164  // int block_base_offset    = 0;
165  int block_index             = 0;
166  int segment_base            = 0;
167
168  // Iterator offset
169  int match_offset            = 0;
170  int line_start_offset       = 0;
171  int line_end_offset         = 0;
172
173  int line_final_start_offset = 0;
174  int line_final_end_offset   = 0;
175
176  // } // end loop_preheader()
177
178  // Segment-at-a-time processing.
179  while(!feof(infile)) {
180
181        // loop_header()
182        // {
183
184        block_index = 0;
185
186        // scanners
187        matches_scanner.init();
188        line_starts_scanner.init();
189        line_ends_scanner.init();
190
191        // ReadStream(), BufferSource(), BufferSink(). Read input stream segment-at-a-time.
192        bytes_read      = fread(buffer + copy_back_size, 1, SEGMENT_SIZE - copy_back_size, infile);
193        bytes_avail     = bytes_read + copy_back_size;
194        bytes_remaining = bytes_avail;
195
196
197
198        if(ferror(infile)) { perror( "io error" ); exit(1); }
199        assert(("fread exceeded segment size.", bytes_avail <= SEGMENT_SIZE));
200
201        // } // end loop_header()
202
203        // loop_body()
204
205        // Process full segment.
206        if (bytes_remaining == SEGMENT_SIZE) { // (bytes_remaining >= SEGMENT_SIZE)
207
208          for(block_index = 0;
209              block_index < SEGMENT_BLOCKS;
210              block_index++) {
211
212            byte_data = &buffer[block_index * BLOCK_SIZE];
213
214//Compiled to 'do_block()' calls.
215###@kernel_do_block ###
216
217              if(only_matching) {
218                matches_scanner.load_block(output.match_follows, block_index);
219              }
220
221              if(!only_matching) {
222                  line_starts_scanner.load_block(output.line_starts, block_index);
223                  line_ends_scanner.load_block(output.line_ends, block_index);
224              }
225          }
226
227          if(only_matching) {
228            while(matches_scanner.has_next()) {
229              match_offset = matches_scanner.scan_to_next() - pattern_size;
230              if(byte_offset) {
231                  int match_stream_offset = segment_base + match_offset;
232                  fprintf(outfile, "%d:", match_stream_offset);
233              }
234
235              // KH: Lookahead.
236              fwrite(&buffer[match_offset], 1, pattern_size, outfile);
237              //fprintf(outfile, "%s\n", fixed_pattern);
238              fprintf(outfile, "\n");
239            }
240
241            copy_back_size      = pattern_size + 1;
242            copy_back_offset    = bytes_avail - copy_back_size;
243          }
244
245          if(!only_matching) {
246
247            assert(("Line length exceeds segment size.", line_ends_scanner.has_next() && line_starts_scanner.has_next()));
248            line_final_start_offset = line_starts_scanner.get_final_pos();
249            line_final_end_offset = line_ends_scanner.get_final_pos();
250            // if(!has_line_start && !has_line_end) {/* Set flag to buffer entire segment. */;}
251
252            while(line_starts_scanner.has_next() && line_ends_scanner.has_next()) {
253
254              line_start_offset  = line_starts_scanner.scan_to_next();
255              line_end_offset    = line_ends_scanner.scan_to_next();
256
257              if(byte_offset) {
258                fprintf(outfile, "%d:", segment_base + line_start_offset);
259              }
260
261              fwrite(&buffer[line_start_offset], 1, line_end_offset - line_start_offset + 1, outfile);
262            }
263
264            copy_back_offset   = (line_final_start_offset > line_final_end_offset) ? line_final_start_offset : (line_final_end_offset + 1) ;
265            copy_back_size     = bytes_avail - copy_back_offset;
266
267            assert(("copy_back_offset", (copy_back_offset >= 0)));
268            assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));
269            assert(("copy_back_size", (copy_back_size >= 0)));
270            assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));
271
272          }
273
274          bytes_remaining -= SEGMENT_SIZE;
275        }
276
277        // if(bytes_remaining > 0) { // KH: @ any_carry
278
279            // Process a partial segment.
280            while (bytes_remaining >= BLOCK_SIZE) {
281                  byte_data = &buffer[block_index * BLOCK_SIZE];
282
283// Compiler 'do_block()' calls.
284###@kernel_do_block ###
285
286                  if(only_matching) {
287                    matches_scanner.load_block(output.match_follows, block_index);
288                  }
289
290                  if(!only_matching) {
291                    line_starts_scanner.load_block(output.line_starts, block_index);
292                    line_ends_scanner.load_block(output.line_ends, block_index);
293                  }
294
295                  bytes_remaining -= BLOCK_SIZE;
296                  block_index++;
297            }
298
299
300        if(bytes_remaining > 0) { // KH: any_carry
301
302            BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE - bytes_remaining));
303            byte_data = &buffer[block_index * BLOCK_SIZE];
304
305// Compiler 'do_final_block()' calls.
306###@kernel_do_final_block ###
307
308            if(only_matching) {
309              matches_scanner.load_block(output.match_follows & EOF_mask, block_index);
310            }
311
312            if(!only_matching) {
313              line_starts_scanner.load_block(output.line_starts & EOF_mask, block_index);
314              line_ends_scanner.load_block(output.line_ends & EOF_mask, block_index);
315            }
316
317            if(only_matching) {
318              while(matches_scanner.has_next()) {
319                match_offset = matches_scanner.scan_to_next() - pattern_size;
320                if(byte_offset) {
321                    int match_stream_offset = segment_base + match_offset;
322                    fprintf(outfile, "%d:", match_stream_offset);
323                }
324
325                // KH: Lookahead.
326                fwrite(&buffer[match_offset], 1, pattern_size, outfile);
327                fprintf(outfile, "\n");
328              }
329
330              copy_back_size      = pattern_size + 1;
331              copy_back_offset    = bytes_avail - copy_back_size;
332            }
333
334            if(!only_matching) {
335
336                assert(("Line length exceeds segment size.", line_ends_scanner.has_next() && line_starts_scanner.has_next()));
337
338                line_final_start_offset = line_starts_scanner.get_final_pos();
339                line_final_end_offset = line_ends_scanner.get_final_pos();
340                // if(!has_line_start && !has_line_end) {/* Set flag to buffer entire segment. */;}
341
342                while(line_starts_scanner.has_next() && line_ends_scanner.has_next()) {
343
344                    line_start_offset  = line_starts_scanner.scan_to_next();
345                    line_end_offset    = line_ends_scanner.scan_to_next();
346
347                    if(byte_offset) {
348                        fprintf(outfile, "%d:", segment_base + line_start_offset);
349                    }
350
351                    fwrite(&buffer[line_start_offset], 1, line_end_offset - line_start_offset + 1, outfile);
352                }
353
354                copy_back_offset   = (line_final_start_offset > line_final_end_offset) ? line_final_start_offset : (line_final_end_offset + 1) ;
355                copy_back_size     = bytes_avail - copy_back_offset;
356
357                assert(("copy_back_offset", (copy_back_offset >= 0)));
358                assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));
359                assert(("copy_back_size", (copy_back_size >= 0)));
360                assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));
361
362              }
363        } // remaining > 0
364
365        if(DEBUG) {
366            printf("bytes_avail: %d\n", bytes_avail);
367            printf("bytes_remaining: %d\n", bytes_remaining);
368            printf("copy_back_offset: %d\n", copy_back_offset);
369            printf("copy_back_size: %d\n", copy_back_size);
370            printf("final_line_starts_offset: %d\n", line_final_start_offset);
371            printf("final_line_ends_offset: %d\n", line_final_end_offset);
372        }
373        // end loop_body()
374
375        // loop_tail()
376
377// Compiler 'clear()' calls.
378###@kernel_clear ###
379
380        // copy_back
381        // copy_back_offset = max ?
382        // copy_back_size   = max ?
383
384        memmove(&buffer[0], &buffer[copy_back_offset], copy_back_size);
385
386        // segment_base()
387        segment_base += bytes_avail;
388        segment_base -= copy_back_size;
389
390        // end loop_tail()
391
392  }
393
394  // loop_exit()
395  // {
396  if(infile) { fclose(infile); infile=NULL;}
397  if(outfile) { fclose(outfile); outfile=NULL;}
398
399  // end loop_exit()
400
401  return 0;
402}
403
Note: See TracBrowser for help on using the repository browser.