Changeset 4031 for proto


Ignore:
Timestamp:
Aug 16, 2014, 10:59:07 PM (5 years ago)
Author:
ksherdy
Message:

Refactored grep template for bit-space / byte-space compilation strategy.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/s2k/trunk/framework/input/templates/cpplang/grep.template

    r4029 r4031  
    1 /**
    2  * grep static fixed string search C++ template.
    3  *
    4  * Author:   Ken Herdy
    5  *
    6  * Usage: ./grep <infile> [-o outfile]
    7  *
    8  * Description: Segment-at-a-time template. Implements a line-at-a-time copyback processing model.
    9  *
    10  * '_offset' => 0-based byte offsets
    11  * '_pos'    => 1-based bytes offsets
    12  *
    13  * Future Work:
    14  *
    15  * a. Lookahead.
    16  * b. Support multiple segments a.k.a. buffer-at-a-time.
    17  * c. Compile in s2k. <--
    18  * d. s2k iterators for (start, follow) in (starts_stream, follows_stream) do { }
    19  *
    20  * Segment-at-a-time Processing Issues:
    21  *
    22  * 1. Start-of-stream or equivalently start-of-segment clear()...
    23  *    The ScanTo and EOF_mask processing ensure a 'fence post' at the
    24  *    end of each segment as well as at the end of file.
    25  *
    26  * 'grep.py' and 'grep_template.cpp' are tightly coupled on a number of variables:
    27  *
    28  * 1. Tranpose.do_block expects 'byte_data' and 'basis_bits'.
    29  * 2. All StreamFunction.do_final_block methods expect 'EOF_mask'.
    30  * 3. Sequential iterators (scanners) expect 'output.matches'.
    31  * 4. Sequential iterators (scanners) expect 'lex.LF'.
    32  *
    33  **/
    34 
    35 #define DEBUG 0
    36 
     1/*
     2 * grep exact-string search C++ in \s2k{}.
     3 *
     4 * Usage:    grep [OPTIONS] <infile> [outfile]
     5 *
     6 * Warning:
     7 *
     8 * Template implementation limits line length to SEGMENT_SIZE bytes, i.e.,
     9 * 1K on 32-bit architecture with 128-bit SIMD registers, and
     10 * 4K on 64-bit with 128-bit SIMD registers.
     11 *
     12 * Program Description:   
     13 *
     14 * The \s2k{} demo grep program searches a file for lines containing matches
     15 * to the fixed target pattern, `apple'.
     16 * The default behaviour of \s2k{} grep is to print all matching lines.
     17 *
     18 * The flag -o specifies that only 
     19 * the matched parts of a matching line are produced, with each
     20 * such part on a separate output line.
     21 *
     22 * The flag -b, specifies that the 0-based byte offset within
     23 * the input file before each line of output.
     24 
     25 * If -b is specified with -o, \s2k{} grep prints the offset of
     26 * each match, formatted as `offset:apple', where `apple' is the target
     27 * pattern read from memory and offset is the zero-index offset
     28 * of the first character the match.
     29 *
     30 * For example, given an input file containing three occurrences of `apple'
     31 * at zero-indexed offsets 47,107, and 189, the \s2k{} grep
     32 * application writes the following results to standard output.
     33 *
     34 * 47:apple
     35 * 107:apple
     36 * 189:apple
     37 *
     38 * This behaviour is equivalent to the following GNU grep utility command.
     39 *
     40 * grep needle filename --only-matching --byte-offset
     41 *
     42 */
     43/*
     44 * Template Attributes:
     45 *
     46 * The \s2k{} compiler and substitutes generated code fragments for
     47 * the following predefined template attributes bracked with
     48 * hashhashhashamp and hashhashash.
     49 *
     50 * @global                - Stream structure and filter definitions.
     51 *
     52 * @struct_decls          - Stream structure declarations.
     53 *
     54 * @filter_decls          - Stream filter declarations.
     55 *
     56 * @filter_do_block       - Stream filter `do_block()' calls.
     57 *
     58 * @filter_do_final_block - Stream filter `do_final_block()' calls.
     59 *
     60 * @filter_clear          - Stream filter `clear()' calls.
     61 */
     62 
     63/*
     64 * ###@warningComment ###
     65 */
     66 
    3767#define DEBUG 0
    3868
     
    5484using namespace std;
    5585
    56 
    57 // Fixed pattern.
    58 const char * fixed_pattern  = "apple";
    59 const int pattern_size      = strlen(fixed_pattern);
    60 
    61 // Platform dependent definitions. // Example only, defined in simd-lib/builtins.hpp.
    62 //typedef __m128i BitBlock;
    63 //typedef BitBlock BytePack;
    64 //typedef uint32_t ScanWord;
     86// S2K Generated
     87###@global ###
    6588
    6689// Segment-at-a-time buffered stream processing parameters.
    67 const int SCANBLOCK_SIZE   = sizeof(ScanWord) * 8;
    68 const int SCANFIELD_SIZE   = sizeof(ScanWord) * 8;
    69 // const int BLOCK_SIZE       =  sizeof(BitBlock) * 8;
    70 const int SEGMENT_BLOCKS   = SCANBLOCK_SIZE * SCANFIELD_SIZE / BLOCK_SIZE;
    71 const int SEGMENT_SIZE     = SEGMENT_BLOCKS * BLOCK_SIZE;
    72 
    73 //const int CACHE_SIZE       = 32768;
    74 //const int BUFFER_SEGMENTS  = CACHE_SIZE / SEGMENT_SIZE;
    75 //const int BUFFER_SIZE      = BUFFER_SEGMENTS * SEGMENT_SIZE; // SEGMENT_SIZE; //
    76 
    77 // @ global - Parameter replaced with C++ translation of stream structs
    78 //            and struct functions definitions
    79 ###@global ###
    80 
    81 // @ decl - Replaced with a set of C++ stream struct declarations.
    82 ###@struct_decls ###
    83 
    84   // S2K Generated
    85 ###@filter_decls ###
    86 
    87 // @ stream_stmts - Replaced with C++ stream functions declarations.
    88 //@ stream_stmts
    89 
    90 // Segment-at-a-time parameters.
    91 int bytes_read              = 0;
    92 int bytes_avail             = 0;  // wrt current segment
    93 int bytes_remaining         = 0;
    94 
    95 int copy_back_size          = 0;
    96 int copy_back_offset        = 0;
    97 
    98 int block_index             = 0;
    99 int block_base              = 0;
    100 
    101 //int segment_index           = 0; // segment index wrt current buffer  // unused
    102 //int segment_base            = 0; // segment offset wrt current buffer // unused
    103 
    104 int stream_base             = 0;
    105 
    106 int match_offset            = 0; // 0-based
    107 int line_start_offset       = 0; 
    108 int line_end_offset         = 0;
    109 
    110 int line_final_start_offset = 0;
    111 int line_final_end_offset   = 0;
     90const int SCANBLOCK_SIZE     = sizeof(ScanWord) * 8;
     91const int SCANFIELD_SIZE     = sizeof(ScanWord) * 8;
     92const int SEGMENT_BLOCKS     = SCANBLOCK_SIZE * SCANFIELD_SIZE / BLOCK_SIZE;
     93const int SEGMENT_SIZE       = SEGMENT_BLOCKS * BLOCK_SIZE;
     94
     95const char * fixed_pattern   = "apple";
     96const int pattern_size       = strlen(fixed_pattern);
    11297
    11398int main(int argc, char * argv[]) {
    114    
    115     char * infilename, * outfilename;
    116     FILE * infile, * outfile;
    117        
    118     int opt_code;
    119     int byte_offset             = 0;
    120     int count_only_option       = 0;
    121     int only_matching           = 0;
    122     int print_version_option    = 0;
    123    
    124     while ((opt_code = getopt(argc, argv, "bcov?")) != -1) {
    125         switch (opt_code) {
    126         case 'b':
    127             byte_offset = 1;
    128             break;
    129         case 'c':
    130             count_only_option = 1;
    131             printf("Not implemented.");
    132             break;
    133         case 'o':
    134             only_matching = 1;
    135             break;           
    136         case 'v':
    137             print_version_option = 1;
    138             break;
    139         case '?':
    140             printf("Usage: %s [-c] [-v] <inputfile> [<outputfile>]\n", argv[0]);
    141             exit(-1);
    142             break;
    143         default:
    144             printf ("Invalid option: %c\n", opt_code);
    145             printf("Usage: %s [-c] [-v] <inputfile> [<outputfile>]\n", argv[0]);
    146             exit(-1);
    147         }
     99
     100  char * infilename, * outfilename;
     101  FILE * infile, * outfile;
     102
     103  int opt_code;
     104  int byte_offset             = 0;
     105  int pattern_only_matching   = 0;
     106  int line_matching           = 1;   
     107
     108  while ((opt_code = getopt(argc, argv, "bo?")) != -1) {
     109    switch (opt_code) {
     110      case 'b':
     111        byte_offset = 1;
     112        break;
     113      case 'o':
     114        pattern_only_matching = 1;
     115        line_matching = 0;
     116        break;
     117      case '?':
     118      default:
     119        printf ("Invalid option: %c\n", opt_code);
     120        printf("Usage: %s [OPTION] <inputfile> [<outputfile>]\n", argv[0]);
     121        printf("\t-b,\tprint the byte offset with output lines\n");
     122        printf("\t-o,\tshow only the part of a line matching PATTERN\n");
     123        printf("\t-V,\tprint version information and exit");           
     124        exit(-1);
    148125    }
    149    
     126  }
     127 
    150128  if (optind >= argc) {
    151129    printf ("Too few arguments\n");
     
    153131    exit(-1);
    154132  }
    155      
     133
    156134  infilename = argv[optind++];
    157135  infile = fopen(infilename, "rb");
    158136  if (!infile) {
    159       fprintf(stderr, "Error: cannot open %s.\n", infilename);
    160       exit(-1);
     137    fprintf(stderr, "Error: cannot open %s.\n", infilename);
     138    exit(-1);
    161139  }
    162140
     
    164142      outfile = stdout;
    165143  } else {
    166       outfilename = argv[optind++];
    167       if (optind != argc) {
    168           printf ("Too many arguments\n");
    169           printf("Usage: %s [-c] [-v] <regex> <inputfile> [<outputfile>]\n", argv[0]);
    170           exit(-1);
    171       }
    172       outfile = fopen(outfilename, "wb");
    173       if (!outfile) {
    174           fprintf(stderr, "Error: cannot open %s.\n", outfilename);
    175           exit(-1);
    176       }
     144    outfilename = argv[optind++];
     145    if (optind != argc) {
     146      printf ("Too many arguments\n");
     147      printf("Usage: %s [-c] [-v] <regex> <inputfile> [<outputfile>]\n", argv[0]);
     148      exit(-1);
     149    }
     150    outfile = fopen(outfilename, "wb");
     151    if (!outfile) {
     152      fprintf(stderr, "Error: cannot open %s.\n", outfilename);
     153      exit(-1);
     154    }
    177155  }
    178156
    179   if(print_version_option) {
    180       fprintf(outfile, "grep static fixed-string parallel bit streams.: March 2014\n");
    181   }
    182 
     157  // { // ___loop_preheader___
     158
     159
     160// S2K Generated
     161###@struct_decls ###
     162
     163// Initialize struct members
     164basis_bits.bit_7 = simd<1>::constant<0>();
     165basis_bits.bit_6 = simd<1>::constant<0>();
     166basis_bits.bit_5 = simd<1>::constant<0>();
     167basis_bits.bit_4 = simd<1>::constant<0>();
     168basis_bits.bit_3 = simd<1>::constant<0>();
     169basis_bits.bit_2 = simd<1>::constant<0>();
     170basis_bits.bit_1 = simd<1>::constant<0>();
     171basis_bits.bit_0 = simd<1>::constant<0>();
     172
     173lex.a = simd<1>::constant<0>();
     174lex.p = simd<1>::constant<0>();
     175lex.l = simd<1>::constant<0>();
     176lex.e = simd<1>::constant<0>();
     177lex.LF = simd<1>::constant<0>();
     178
     179lex.a = simd<1>::constant<0>();
     180lex.p = simd<1>::constant<0>();
     181lex.l = simd<1>::constant<0>();
     182lex.e = simd<1>::constant<0>();
     183lex.LF = simd<1>::constant<0>();
     184
     185output.match_follows = simd<1>::constant<0>();
     186output.lines= simd<1>::constant<0>();
     187output.line_starts= simd<1>::constant<0>();
     188output.line_ends= simd<1>::constant<0>();
     189
     190// S2K Generated
     191###@filter_decls ###
     192
     193  // \s2k{} transpose.do_block() / transpose.do_final_block()
     194  // bind to `char * byte_data' and `struct Basis_bits 'basis' argument names.
    183195  ATTRIBUTE_SIMD_ALIGN char buffer[SEGMENT_SIZE];
    184   // Pablo transpose.do_block(), transpose.do_final_block()
    185   // expect 'byte_data' and 'basis' names as input and output arguments.
    186196  char * byte_data = buffer;
    187197
    188   // Scanners
     198  // Iterators
    189199  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> matches_scanner;
    190200  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> line_starts_scanner;
    191201  BitStreamScanner<BitBlock, ScanWord, ScanWord, SEGMENT_BLOCKS> line_ends_scanner;
    192202
    193   // Segment-at-a-time processing.
     203  // Segment-at-a-time variables
     204  int bytes_read              = 0;
     205  int bytes_avail             = 0;
     206  int bytes_remaining         = 0;
     207
     208  int copy_back_size          = 0;
     209  int copy_back_offset        = 0;
     210
     211  // Segment-at-a-time offset
     212  int block_index             = 0;
     213  int segment_base            = 0;
     214
     215  // Iterator offset
     216  int match_offset            = 0;
     217  int line_start_offset       = 0;
     218  int line_end_offset         = 0;
     219
     220  int line_final_start_offset = 0;
     221  int line_final_end_offset   = 0;
     222
     223  // { // ___loop_preheader___
     224
     225
    194226  while(!feof(infile)) {
    195     // Read Stream in SEGMENT_SIZE - strlen("needle") byte chunks.
    196     bytes_read      = fread(buffer + copy_back_size, 1, SEGMENT_SIZE - copy_back_size, infile);
     227 
     228    // __loop_header__ {
     229    block_index = 0;
     230
     231    // initialize scanners
     232    matches_scanner.init();
     233    line_starts_scanner.init();
     234    line_ends_scanner.init(); 
     235 
     236    bytes_read      = fread(buffer + copy_back_size, 1,
     237                        SEGMENT_SIZE - copy_back_size, infile);
    197238    bytes_avail     = bytes_read + copy_back_size;
    198239    bytes_remaining = bytes_avail;
    199    
    200 //    if(feof(infile))
    201 //        && (0 == bytes_remaining)) {
    202 //        if(infile) { fclose(infile); infile=NULL;}
    203 //        if(outfile) { fclose(outfile); outfile=NULL;}
    204 //        break;
    205 //    }
    206    
     240
    207241    if(ferror(infile)) { perror( "io error" ); exit(1); }
    208 
    209     // Process full segment.
    210    
    211     //assert(("fread exceeded segment size.", bytes_avail <= SEGMENT_SIZE));
    212    
    213     if (bytes_remaining == SEGMENT_SIZE) { // (bytes_remaining >= SEGMENT_SIZE)
    214    
    215       block_base      = 0;     
     242    // } __loop_header__
     243
     244
     245    // __loop_body__ {
     246   
     247    // Process full segment
     248    if (bytes_remaining == SEGMENT_SIZE) {
     249
     250      for(block_index = 0; block_index < SEGMENT_BLOCKS; block_index++) {
     251
     252        byte_data = &buffer[block_index * BLOCK_SIZE];
    216253       
    217       if(only_matching) { 
    218         matches_scanner.init();
     254        // S2K Generated
     255        ###@filter_do_block ###
     256
     257        if(pattern_only_matching) {
     258          matches_scanner.load_block(output.match_follows, block_index);
     259        }
     260
     261        if(line_matching) {
     262          line_starts_scanner.load_block(output.line_starts, block_index);
     263          line_ends_scanner.load_block(output.line_ends, block_index);
     264        }
    219265      }
    220      
    221       if(!only_matching) {
    222         line_starts_scanner.init();
    223         line_ends_scanner.init();
    224       }
    225 
    226       for(block_index = 0;
    227           block_index < SEGMENT_BLOCKS;
    228           block_index++, block_base+=BLOCK_SIZE) {
    229 
    230         byte_data = &buffer[block_base];
    231        
    232         //Compiled to 'do_block()' calls.
    233         ###@filter_do_block ###
    234 
    235         if(only_matching) { 
    236           matches_scanner.load_block(output.match_follows, block_index);
    237         }
    238 
    239         if(!only_matching) {
    240             line_starts_scanner.load_block(output.line_starts, block_index);
    241             line_ends_scanner.load_block(output.line_ends, block_index);
    242         }
    243       }
    244    
    245       if(only_matching) {
     266
     267      if(pattern_only_matching) {
    246268        while(matches_scanner.has_next()) {
    247269          match_offset = matches_scanner.scan_to_next() - pattern_size;
    248270          if(byte_offset) {
    249               int match_stream_offset = stream_base + match_offset;
    250               fprintf(outfile, "%d:", match_stream_offset);
    251           }
    252          
    253           // KH: Lookahead.
    254           fwrite(&buffer[match_offset], 1, pattern_size, outfile);
    255          
     271            int match_stream_offset = segment_base + match_offset;
     272            fprintf(outfile, "%d:", match_stream_offset);
     273          }
     274
     275          fwrite(&buffer[match_offset], 1, pattern_size, outfile); // lookahead
    256276          //fprintf(outfile, "%s\n", fixed_pattern);
    257277          fprintf(outfile, "\n");
    258278        }
    259        
    260         copy_back_size      = pattern_size + 1;           
    261         copy_back_offset    = bytes_avail - copy_back_size;           
     279
     280        copy_back_size      = pattern_size + 1;
     281        copy_back_offset    = bytes_avail - copy_back_size;
    262282      }
    263283 
    264       if(!only_matching) {
    265 
    266         assert(("Line length exceeds segment size.", line_ends_scanner.has_next() && line_starts_scanner.has_next())); 
    267          
    268         //if(has_line_start) {
    269             line_final_start_offset = line_starts_scanner.get_final_pos();
    270         //}
    271         //if(has_line_end) {
    272             line_final_end_offset = line_ends_scanner.get_final_pos();
    273         //}
    274            
    275         // if(!has_line_start && !has_line_end) {/* Set flag to buffer entire segment. */;}
    276          
     284      if(line_matching) {
     285
     286        assert(("Input line length exceeds segment size.",
     287          line_ends_scanner.has_next() && line_starts_scanner.has_next()));
     288        line_final_start_offset = line_starts_scanner.get_final_pos();
     289        line_final_end_offset = line_ends_scanner.get_final_pos();
     290
    277291        while(line_starts_scanner.has_next() && line_ends_scanner.has_next()) {
    278              
     292
    279293          line_start_offset  = line_starts_scanner.scan_to_next();
    280294          line_end_offset    = line_ends_scanner.scan_to_next();
    281              
     295
    282296          if(byte_offset) {
    283             fprintf(outfile, "%d:", stream_base + line_start_offset);
    284           }
    285              
    286           fwrite(&buffer[line_start_offset], 1, line_end_offset - line_start_offset + 1, outfile);
    287         }
    288        
    289         copy_back_offset   = (line_final_start_offset > line_final_end_offset) ? line_final_start_offset : (line_final_end_offset + 1) ;
    290         copy_back_size     = bytes_avail - copy_back_offset;   
     297            fprintf(outfile, "%d:", segment_base + line_start_offset);
     298          }
     299
     300          fwrite(&buffer[line_start_offset], 1,
     301            line_end_offset - line_start_offset + 1, outfile);
     302        }
     303
     304        copy_back_offset = (line_final_start_offset > line_final_end_offset)
     305                         ? line_final_start_offset : (line_final_end_offset + 1);
     306        copy_back_size   = bytes_avail - copy_back_offset;
    291307
    292308        assert(("copy_back_offset", (copy_back_offset >= 0)));
    293         assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));           
     309        assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));
    294310        assert(("copy_back_size", (copy_back_size >= 0)));
    295311        assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));
    296        
    297312      }
    298313     
     
    302317    // Process a partial segment.
    303318    if(bytes_remaining > 0) {
    304                
    305         block_index = 0;   
    306         block_base  = 0;   
     319
     320      while (bytes_remaining >= BLOCK_SIZE) {
     321        byte_data = &buffer[block_index * BLOCK_SIZE];
    307322       
    308         if(only_matching) {
    309           matches_scanner.init();
    310         }
     323        // Compiler 'do_block()' calls.
     324        ###@filter_do_block ###
     325 
     326        if(pattern_only_matching) {
     327          matches_scanner.load_block(output.match_follows, block_index);
     328        }       
    311329       
    312         if(!only_matching) {
    313           line_starts_scanner.init();
    314           line_ends_scanner.init();
    315         }
    316        
    317         // Process full blocks.
    318         while (bytes_remaining >= BLOCK_SIZE) {
    319           byte_data = &buffer[block_base];
    320          
    321           // Compiler 'do_block()' calls.
    322           ###@filter_do_block ###
    323    
    324           if(only_matching) {
    325             matches_scanner.load_block(output.match_follows, block_index);
    326           }       
    327          
    328           if(!only_matching) {
    329             line_starts_scanner.load_block(output.line_starts, block_index);
    330             line_ends_scanner.load_block(output.line_ends, block_index);
    331           }
    332    
    333           block_base += BLOCK_SIZE;
    334           bytes_remaining -= BLOCK_SIZE;
    335           block_index++;
    336        }
    337 
    338        // Process a partial final block. // KH: Not required.     
    339        BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE - bytes_remaining));   
    340        byte_data = &buffer[block_base];
     330        if(line_matching) {
     331          line_starts_scanner.load_block(output.line_starts, block_index);
     332          line_ends_scanner.load_block(output.line_ends, block_index);
     333        }
     334 
     335        bytes_remaining -= BLOCK_SIZE;
     336        block_index++;
     337       }   
     338
     339       // Process a partial block.
     340       BitBlock EOF_mask = bitblock::srl(simd<1>::constant<1>(),
     341                                        convert(BLOCK_SIZE - bytes_remaining));
     342       byte_data = &buffer[block_index * BLOCK_SIZE];
    341343         
    342344       // Compiler 'do_final_block()' calls.
    343345       ###@filter_do_final_block ###
    344346       
    345        if(only_matching) {
     347       if(pattern_only_matching) {
    346348          matches_scanner.load_block(output.match_follows & EOF_mask, block_index);
    347349       }
    348          
    349        if(!only_matching) {   
     350
     351       if(line_matching) {   
    350352         line_starts_scanner.load_block(output.line_starts & EOF_mask, block_index);
    351353         line_ends_scanner.load_block(output.line_ends & EOF_mask, block_index);           
    352354       }
    353 
    354        if(only_matching) {
    355           while(matches_scanner.has_next()) {
    356             match_offset = matches_scanner.scan_to_next() - pattern_size;
    357             if(byte_offset) {
    358                 int match_stream_offset = stream_base + match_offset;
    359                 fprintf(outfile, "%d:", match_stream_offset);
    360             }
    361            
    362             // KH: Lookahead.
    363             fwrite(&buffer[match_offset], 1, pattern_size, outfile);
    364             fprintf(outfile, "\n");
     355 
     356       if(pattern_only_matching) {
     357         while(matches_scanner.has_next()) {
     358           match_offset = matches_scanner.scan_to_next() - pattern_size;
     359           if(byte_offset) {
     360             int match_stream_offset = segment_base + match_offset;
     361             fprintf(outfile, "%d:", match_stream_offset);
     362           }
     363
     364           fwrite(&buffer[match_offset], 1, pattern_size, outfile); // lookahead
     365           fprintf(outfile, "\n");
     366         }
     367   
     368          copy_back_size      = pattern_size + 1;
     369          copy_back_offset    = bytes_avail - copy_back_size;
     370       }
     371       
     372       if(line_matching) {
     373         assert(("Input line exceeds segment size.",
     374                  line_ends_scanner.has_next() &&
     375                  line_starts_scanner.has_next()));
     376         line_final_start_offset = line_starts_scanner.get_final_pos();
     377         line_final_end_offset = line_ends_scanner.get_final_pos();
     378       
     379        while(line_starts_scanner.has_next() && line_ends_scanner.has_next()) {
     380          line_start_offset  = line_starts_scanner.scan_to_next();
     381          line_end_offset    = line_ends_scanner.scan_to_next();
     382   
     383          if(byte_offset) {
     384            fprintf(outfile, "%d:", segment_base + line_start_offset,
     385                     segment_base + line_end_offset);
    365386          }
    366          
    367           copy_back_size      = pattern_size + 1;           
    368           copy_back_offset    = bytes_avail - copy_back_size;             
    369         }
    370      
    371         if(!only_matching) {
    372            
    373             assert(("Line length exceeds segment size.", line_ends_scanner.has_next() && line_starts_scanner.has_next())); 
    374                        
    375             //if(has_line_start) {
    376                 line_final_start_offset = line_starts_scanner.get_final_pos();
    377             //}
    378             //if(has_line_end) {
    379                 line_final_end_offset = line_ends_scanner.get_final_pos();
    380             //}
    381                
    382             // if(!has_line_start && !has_line_end) {/* Set flag to buffer entire segment. */;}
    383    
    384             while(line_starts_scanner.has_next() && line_ends_scanner.has_next()) {
    385                
    386                 line_start_offset  = line_starts_scanner.scan_to_next();
    387                 line_end_offset    = line_ends_scanner.scan_to_next();
    388                
    389                 if(byte_offset) {
    390                     fprintf(outfile, "%d:", stream_base + line_start_offset);
    391                 }
    392                
    393                 fwrite(&buffer[line_start_offset], 1, line_end_offset - line_start_offset + 1, outfile);
    394             }
    395 
    396             copy_back_offset   = (line_final_start_offset > line_final_end_offset) ? line_final_start_offset : (line_final_end_offset + 1) ;
    397             copy_back_size     = bytes_avail - copy_back_offset;
    398            
    399             assert(("copy_back_offset", (copy_back_offset >= 0)));
    400             assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));           
    401             assert(("copy_back_size", (copy_back_size >= 0)));
    402             assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));
    403            
    404         }       
     387   
     388          fwrite(&buffer[line_start_offset], 1,
     389                 line_end_offset - line_start_offset + 1, outfile);
     390        }
     391   
     392        copy_back_offset   = (line_final_start_offset > line_final_end_offset)
     393                              ? line_final_start_offset
     394                              : (line_final_end_offset + 1);
     395                             
     396        copy_back_size     = bytes_avail - copy_back_offset;
     397   
     398        assert(("copy_back_offset", (copy_back_offset >= 0)));
     399        assert(("copy_back_offset", (copy_back_offset <= bytes_avail)));
     400        assert(("copy_back_size", (copy_back_size >= 0)));
     401        assert(("copy_back_size", (copy_back_size < SEGMENT_SIZE)));           
     402      }
    405403    }
    406    
    407     if(DEBUG) {
    408         printf("bytes_avail: %d\n", bytes_avail);
    409         printf("bytes_remaining: %d\n", bytes_remaining);
    410         printf("copy_back_offset: %d\n", copy_back_offset);
    411         printf("copy_back_size: %d\n", copy_back_size);       
    412         printf("final_line_starts_offset: %d\n", line_final_start_offset);
    413         printf("final_line_ends_offset: %d\n", line_final_end_offset);
    414     }
    415    
     404
     405
     406    // __loop_tail__ {
     407    // S2K Generated 'clear()' calls.
     408    ###@filter_clear ###
     409
    416410    memmove(&buffer[0], &buffer[copy_back_offset], copy_back_size);
    417    
    418     // pablo.ScanToFirst() must clear carry-in at the start of each segment
    419     classifyBytes.clear();
    420     match.clear();
    421     matchLines.clear();
    422    
    423     stream_base += bytes_avail;
    424     stream_base -= copy_back_size;
     411
     412    // segment_base()
     413    segment_base += bytes_avail;
     414    segment_base -= copy_back_size;
     415    // } __loop_tail__
     416
    425417
    426418  }
    427419
     420
     421  // __loop_exit__ {
    428422  if(infile) { fclose(infile); infile=NULL;}
    429423  if(outfile) { fclose(outfile); outfile=NULL;}
    430  
     424  // } __loop_exit__
     425
     426
    431427  return 0;
    432428}
     429
     430
     431
     432
     433
     434
     435
     436
     437
     438
     439
     440
     441
     442
     443
     444
     445
     446
     447
     448
     449
     450
     451
     452
     453
     454
     455
     456
     457
     458/*
     459 * Segment-at-a-time Processing Issues:
     460 *
     461 * 1. Start-of-stream or equivalently start-of-segment clear()...
     462 *    The ScanTo and EOF_mask processing ensure a 'fence post' at the
     463 *    end of each segment as well as at the end of file.
     464 *
     465 * 'grep.py' and 'grep_template.cpp' are tightly coupled on a number of variables:
     466 *
     467 * 1. Tranpose.do_block expects 'byte_data' and 'basis_bits'.
     468 * 2. All StreamFunction.do_final_block methods expect 'EOF_mask'.
     469 * 3. Sequential iterators (scanners) expect 'output.matches'.
     470 * 4. Sequential iterators (scanners) expect 'lex.LF'.
     471 *
     472 */
Note: See TracChangeset for help on using the changeset viewer.