Ignore:
Timestamp:
Dec 8, 2014, 10:09:03 AM (5 years ago)
Author:
cameron
Message:

Factor out grep execution engine to do_grep.h, do_grep.cpp

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4323 r4324  
    88#include "utf_encoding.h"
    99#include "compiler.h"
     10#include "pablo/pablo_compiler.h"
     11#include "do_grep.h"
    1012
    1113#include <fstream>
     
    2123#include <sys/types.h>
    2224#include <sys/stat.h>
    23 
    24 #include "include/simd-lib/bitblock.hpp"
    25 #include "include/simd-lib/carryQ.hpp"
    26 #include "include/simd-lib/pabloSupport.hpp"
    27 #include "include/simd-lib/s2p.hpp"
    28 #include "include/simd-lib/buffer.hpp"
    29 #include "include/simd-lib/bitblock_iterator.hpp"
    30 #include "include/simd-lib/transpose.hpp"
    31 
    32 #include "hrtime.h"
    33 
    34 // mmap system
    35 #include <sys/mman.h>
    36 #include <fcntl.h>
    37 
    38 #if (BLOCK_SIZE == 128)
    39 #define SEGMENT_BLOCKS 7
    40 #endif
    41 
    42 #if (BLOCK_SIZE == 256)
    43 #define SEGMENT_BLOCKS 15
    44 #endif
    45 
    46 #define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    47 
    48 #define BUFFER_SEGMENTS 15
    49 #define BUFFER_SIZE (BUFFER_SEGMENTS * SEGMENT_SIZE)
    50 
    51 #define BitBlock_declare(name)  BitBlock name
    52 
    53 #define ubitblock_declare(name, n) \
    54   ubitblock name[n];\
    55   do {int i;\
    56       for (i = 0; i < n; i++) name[i]._128 = simd<1>::constant<0>();\
    57      }\
    58   while (0)
    59 
    60 BitBlock EOF_mask = simd<1>::constant<1>();
    61 
    62 struct Output {
    63     BitBlock matches;
    64     BitBlock LF;
    65 };
    66 
    67 using namespace std;
    68 
    69 typedef void (*process_block_fcn)(const Basis_bits &basis_bits, BitBlock carry_q[], BitBlock advance_q[], Output &output);
    70 
    71 
    72 void do_process(char * infile_buffer, size_t infile_size, FILE *outfile, int count_only_option, int carry_count, int advance_count, process_block_fcn process_block);
    73 
    74 
    75 BitBlock get_category(Basis_bits &basis_bits, const char* category);
    7625
    7726int main(int argc, char *argv[])
     
    9241    bool enable_multiplexing = 0;
    9342    bool print_usage = 0;
    94 
    95     unsigned long long cycles = 0;
    9643
    9744
     
    172119    infilename = argv[optind++];
    173120
    174     if (optind >= argc) {
    175         outfile = stdout;
    176     }
    177     else {
    178         outfilename = argv[optind++];
    179         if (optind != argc)
    180         {
    181             printf("Too many arguments\n");
    182             printf("Usage: %s [-a] [-c] [-f] [-m] [-t] [-v] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
    183             exit(-1);
    184         }
    185         outfile = fopen(outfilename, "wb");
    186         if (!outfile)
    187         {
    188             fprintf(stderr, "Error: cannot open %s for writing.\n", outfilename);
    189             exit(-1);
    190         }
    191     }
    192 
    193     fdSrc = open(infilename, O_RDONLY);
    194     if (fdSrc == -1) {
    195         fprintf(stderr, "Error: cannot open %s for processing.\n", infilename);
    196         exit(-1);
    197     }
    198     if (fstat(fdSrc, &infile_sb) == -1) {
    199         fprintf(stderr, "Error: cannot stat %s for processing.\n", infilename);
    200         exit(-1);
    201     }
    202     if (infile_sb.st_size == 0) {
    203         if (count_only_option) fprintf(outfile, "Matching Lines: %d\n", 0);
    204         exit(0);
    205     }
    206     infile_buffer = (char *) mmap(NULL, infile_sb.st_size, PROT_READ, MAP_PRIVATE, fdSrc, 0);
    207     if (infile_buffer == MAP_FAILED) {
    208         fprintf(stderr, "Error: mmap of %s failure.\n", infilename);
    209         exit(-1);
    210     }
    211121
    212122    if (print_version_option)
     
    216126
    217127    Encoding encoding(ascii_only_option ? Encoding::Type::ASCII : Encoding::Type::UTF_8, 8);
    218     if (compile_time_option)
     128    const auto llvm_codegen = icgrep::compile(encoding, (regex_from_file_option ? fileregex : inregex), false, enable_multiplexing);
     129
     130    if (llvm_codegen.process_block_fptr != 0)
     131     
    219132    {
    220         cycles = get_hrcycles();
    221     }
    222     const auto llvm_codegen = icgrep::compile(encoding, (regex_from_file_option ? fileregex : inregex), compile_time_option, enable_multiplexing);
    223 
    224     if (compile_time_option)
    225     {
    226         cycles = get_hrcycles() - cycles;
    227         std::cout << "Total compile time - cycles:       " << cycles << std::endl;
     133        void (*FP)(const Basis_bits &basis_bits, BitBlock carry_q[], BitBlock advance_q[], Output &output) =
     134           (void (*)(const Basis_bits &basis_bits, BitBlock carry_q[], BitBlock advance_q[], Output &output))(void*)llvm_codegen.process_block_fptr;
     135        GrepExecutor grepEngine = GrepExecutor(llvm_codegen.carry_q_size, llvm_codegen.advance_q_size, FP);
     136        grepEngine.setCountOnlyOption(count_only_option);
     137        grepEngine.doGrep(infilename);
    228138    }
    229139
    230     if (llvm_codegen.process_block_fptr != 0)
    231     {
    232         void (*FP)(const Basis_bits &basis_bits, BitBlock carry_q[], BitBlock advance_q[], Output &output) = (void (*)(const Basis_bits &basis_bits, BitBlock carry_q[], BitBlock advance_q[], Output &output))(void*)llvm_codegen.process_block_fptr;
    233         do_process(infile_buffer, infile_sb.st_size, outfile, count_only_option, llvm_codegen.carry_q_size, llvm_codegen.advance_q_size, FP);
    234     }
    235 
    236     munmap((void *) infile_buffer, infile_sb.st_size);
    237     close(fdSrc);
    238     fclose(outfile);
    239140    if (regex_from_file_option) free(fileregex);
    240141
     
    242143}
    243144
    244 #if (BLOCK_SIZE == 256)
    245 typedef BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> ScannerT;
    246 #endif
    247 
    248 #if (BLOCK_SIZE == 128)
    249 typedef BitStreamScanner<BitBlock, uint32_t, uint32_t, SEGMENT_BLOCKS> ScannerT;
    250 #endif
    251 
    252 //
    253 // Write matched lines from a buffer to an output file, given segment
    254 // scanners for line ends and matches (where matches are a subset of line ends).
    255 // The buffer pointer must point to the first byte of the segment
    256 // corresponding to the scanner indexes.   The first_line_start is the
    257 // start position of the first line relative to the buffer start position.
    258 // It must be zero or negative;  if negative, the buffer must permit negative
    259 // indexing so that the lineup to the buffer start position can also be printed.
    260 // The start position of the final line in the processed segment is returned.
    261 //
    262 
    263 ssize_t write_matches(FILE * outfile, ScannerT line_scanner, ScannerT match_scanner, char * buffer, ssize_t first_line_start) {
    264 
    265   ssize_t line_start = first_line_start;
    266   size_t match_pos;
    267   size_t line_end;
    268   while (match_scanner.has_next()) {
    269     match_pos = match_scanner.scan_to_next();
    270     // If we found a match, it must be at a line end.
    271     line_end = line_scanner.scan_to_next();
    272     while (line_end < match_pos) {
    273       line_start = line_end + 1;
    274       line_end = line_scanner.scan_to_next();
    275     }
    276     fwrite(&buffer[line_start], 1, line_end - line_start + 1, outfile);
    277     line_start = line_end + 1;
    278 
    279   }
    280   while(line_scanner.has_next()) {
    281     line_end = line_scanner.scan_to_next();
    282     line_start = line_end+1;
    283   }
    284   return line_start;
    285 }
    286 
    287 
    288 
    289 void do_process(char * infile_buffer, size_t infile_size, FILE *outfile, int count_only_option, int carry_count, int advance_count, process_block_fcn process_block) {
    290 
    291     struct Basis_bits basis_bits;
    292     struct Output output;
    293     BitBlock match_vector;
    294     BitBlock carry_q[carry_count];
    295     BitBlock advance_q[advance_count];
    296     int match_count=0;
    297     int blk = 0;
    298     int block_base  = 0;
    299     int block_pos   = 0;
    300     int buffer_pos  = 0;
    301     int chars_avail = 0;
    302     int chars_read  = 0;
    303     int line_start = 0;
    304     int line_end = 0;
    305     int match_pos = 0;
    306     int line_no = 0;
    307 
    308     ScannerT LF_scanner;
    309     ScannerT match_scanner;
    310 
    311     match_vector = simd<1>::constant<0>();
    312     memset (carry_q, 0, sizeof(BitBlock) * carry_count);
    313     memset (advance_q, 0, sizeof(BitBlock) * advance_count);
    314 
    315     char * buffer_ptr;
    316     int segment = 0;
    317     int segment_base = 0;
    318     chars_avail = infile_size;
    319 
    320 //////////////////////////////////////////////////////////////////////////////////////////
    321 // Full Segments
    322 //////////////////////////////////////////////////////////////////////////////////////////
    323 
    324 
    325 
    326     while (chars_avail >= SEGMENT_SIZE) {
    327 
    328         segment_base = segment * SEGMENT_SIZE;
    329         LF_scanner.init();
    330         match_scanner.init();
    331 
    332         for (blk = 0; blk < SEGMENT_BLOCKS; blk++) {
    333             block_base = blk*BLOCK_SIZE + segment_base;
    334             s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);
    335             process_block(basis_bits, carry_q, advance_q, output);
    336 
    337             LF_scanner.load_block(output.LF, blk);
    338             match_scanner.load_block(output.matches, blk);
    339             if (count_only_option){
    340                 if (bitblock::any(output.matches))
    341                 {
    342                     if (bitblock::any(simd_and(match_vector, output.matches))){
    343                         match_count += bitblock::popcount(match_vector);
    344                         match_vector = output.matches;
    345                     }
    346                     else
    347                     {
    348                         match_vector = simd_or(match_vector, output.matches);
    349                     }
    350                 }
    351             }
    352         }
    353 
    354     buffer_ptr = &infile_buffer[segment_base];
    355 
    356         if (!count_only_option) {
    357           line_start = write_matches(outfile, LF_scanner, match_scanner, buffer_ptr, line_start);
    358         }
    359         segment++;
    360         line_start -= SEGMENT_SIZE;  /* Will be negative offset for use within next segment. */
    361         chars_avail -= SEGMENT_SIZE;
    362     }
    363 
    364 
    365 //////////////////////////////////////////////////////////////////////////////////////////
    366 // For the Final Partial Segment.
    367 //////////////////////////////////////////////////////////////////////////////////////////
    368 
    369     segment_base = segment * SEGMENT_SIZE;
    370     int remaining = chars_avail;
    371 
    372     LF_scanner.init();
    373     match_scanner.init();
    374 
    375     /* Full Blocks */
    376     blk = 0;
    377     while (remaining >= BLOCK_SIZE) {
    378     //fprintf(outfile, "Remaining = %i\n", remaining);
    379         block_base = block_pos + segment_base;
    380         s2p_do_block((BytePack *) &infile_buffer[block_base], basis_bits);
    381         process_block(basis_bits, carry_q, advance_q, output);
    382 
    383         LF_scanner.load_block(output.LF, blk);
    384         match_scanner.load_block(output.matches, blk);
    385         if (count_only_option)
    386         {
    387             if (bitblock::any(output.matches))
    388             {
    389                 if (bitblock::any(simd_and(match_vector, output.matches)))
    390                 {
    391                     match_count += bitblock::popcount(match_vector);
    392                     match_vector = output.matches;
    393                 }
    394                 else
    395                 {
    396                     match_vector = simd_or(match_vector, output.matches);
    397                 }
    398             }
    399         }
    400 
    401         block_pos += BLOCK_SIZE;
    402         remaining -= BLOCK_SIZE;
    403         blk++;
    404     }
    405     block_base = block_pos;
    406     //fprintf(stderr, "Remaining = %i\n", remaining);
    407 
    408     //For the last partial block, or for any carry.
    409     EOF_mask = bitblock::srl(simd<1>::constant<1>(), convert(BLOCK_SIZE-remaining));
    410      block_base = block_pos + segment_base;
    411      s2p_do_final_block((BytePack *) &infile_buffer[block_base], basis_bits, EOF_mask);
    412     process_block(basis_bits, carry_q, advance_q, output);
    413 
    414     if (count_only_option)
    415     {
    416         match_count += bitblock::popcount(match_vector);
    417         if (bitblock::any(output.matches))
    418         {
    419             match_count += bitblock::popcount(output.matches);
    420         }
    421         fprintf(outfile, "Matching Lines:%d\n", match_count);
    422     }
    423     else
    424     {
    425         LF_scanner.load_block(output.LF, blk);
    426         match_scanner.load_block(output.matches, blk);
    427         blk++;
    428         for (int i = blk; i < SEGMENT_BLOCKS; i++)
    429         {
    430             LF_scanner.load_block(simd<1>::constant<0>(), i);
    431             match_scanner.load_block(simd<1>::constant<0>(), i);
    432         }
    433         buffer_ptr = &infile_buffer[segment_base];
    434         line_start = write_matches(outfile, LF_scanner, match_scanner, buffer_ptr, line_start);
    435     }
    436 
    437     buffer_pos += chars_avail;
    438 }
    439 
    440 
    441 
Note: See TracChangeset for help on using the changeset viewer.