Changeset 4325


Ignore:
Timestamp:
Dec 8, 2014, 1:17:21 PM (4 years ago)
Author:
cameron
Message:

New icgrep option parser based on LLVM CommandLine? system.

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/do_grep.cpp

    r4324 r4325  
    2525#include "include/simd-lib/s2p.hpp"
    2626#include "include/simd-lib/buffer.hpp"
    27 #include "include/simd-lib/bitblock_iterator.hpp"
    2827
    2928// mmap system
     
    3130#include <fcntl.h>
    3231
    33 #if (BLOCK_SIZE == 128)
    34 #define SEGMENT_BLOCKS 7
    35 #endif
    36 
    37 #if (BLOCK_SIZE == 256)
    38 #define SEGMENT_BLOCKS 15
    39 #endif
    40 
    41 #define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
    4232
    4333#define BUFFER_SEGMENTS 15
     
    5444
    5545BitBlock EOF_mask = simd<1>::constant<1>();
    56 
    57 
    58 #if (BLOCK_SIZE == 256)
    59 typedef BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> ScannerT;
    60 #endif
    61 
    62 #if (BLOCK_SIZE == 128)
    63 typedef BitStreamScanner<BitBlock, uint32_t, uint32_t, SEGMENT_BLOCKS> ScannerT;
    64 #endif
    6546
    6647//
     
    7556//
    7657
    77 ssize_t write_matches(FILE * outfile, ScannerT line_scanner, ScannerT match_scanner, char * buffer, ssize_t first_line_start) {
     58ssize_t GrepExecutor::write_matches(char * buffer, ssize_t first_line_start) {
    7859
    7960  ssize_t line_start = first_line_start;
     
    8364    match_pos = match_scanner.scan_to_next();
    8465    // If we found a match, it must be at a line end.
    85     line_end = line_scanner.scan_to_next();
     66    line_end = LF_scanner.scan_to_next();
    8667    while (line_end < match_pos) {
    8768      line_start = line_end + 1;
    88       line_end = line_scanner.scan_to_next();
     69      line_end = LF_scanner.scan_to_next();
     70    }
     71    if (mShowFileNameOption) {
     72      std::cout << currentFileName;
    8973    }
    9074    fwrite(&buffer[line_start], 1, line_end - line_start + 1, outfile);
     
    9276
    9377  }
    94   while(line_scanner.has_next()) {
    95     line_end = line_scanner.scan_to_next();
     78  while(LF_scanner.has_next()) {
     79    line_end = LF_scanner.scan_to_next();
    9680    line_start = line_end+1;
    9781  }
     
    10185
    10286
    103 void GrepExecutor::doGrep(char * infilename) {
     87void GrepExecutor::doGrep(const std::string infilename) {
    10488
    10589    struct Basis_bits basis_bits;
     
    10892    BitBlock carry_q[mCarries];
    10993    BitBlock advance_q[mAdvances];
     94   
     95   
     96    currentFileName = infilename + ":";
     97   
    11098    int match_count=0;
    11199    int blk = 0;
     
    120108    int line_no = 0;
    121109
    122     ScannerT LF_scanner;
    123     ScannerT match_scanner;
    124 
    125110    match_vector = simd<1>::constant<0>();
    126111    memset (carry_q, 0, sizeof(BitBlock) * mCarries);
    127112    memset (advance_q, 0, sizeof(BitBlock) * mAdvances);
    128113   
    129     FILE * outfile = stdout;
    130 
    131114    int fdSrc;
    132115    struct stat infile_sb;
    133116    char * infile_buffer;
    134     fdSrc = open(infilename, O_RDONLY);
     117    fdSrc = open(infilename.c_str(), O_RDONLY);
    135118    if (fdSrc == -1) {
    136         fprintf(stderr, "Error: cannot open %s for processing.\n", infilename);
     119        std::cerr << "Error: cannot open " << infilename << " for processing.\n";
    137120        exit(-1);
    138121    }
    139122    if (fstat(fdSrc, &infile_sb) == -1) {
    140         fprintf(stderr, "Error: cannot stat %s for processing.\n", infilename);
     123        std::cerr << "Error: cannot stat " << infilename << " for processing.\n";
    141124        exit(-1);
    142125    }
    143126    if (infile_sb.st_size == 0) {
    144         if (mCountOnlyOption) fprintf(outfile, "Matching Lines: %d\n", 0);
     127        if (mShowFileNameOption) {
     128            std::cout << currentFileName;
     129        }
     130        if (mCountOnlyOption) fprintf(outfile, "%d\n", 0);
    145131        exit(0);
    146132    }
    147133    infile_buffer = (char *) mmap(NULL, infile_sb.st_size, PROT_READ, MAP_PRIVATE, fdSrc, 0);
    148134    if (infile_buffer == MAP_FAILED) {
    149         fprintf(stderr, "Error: mmap of %s failure.\n", infilename);
     135        std::cerr << "Error: mmap of " << infilename << "failed.\n";
    150136        exit(-1);
    151137    }
     
    156142    int segment_base = 0;
    157143    chars_avail = infile_sb.st_size;
    158 
     144   
    159145//////////////////////////////////////////////////////////////////////////////////////////
    160146// Full Segments
    161147//////////////////////////////////////////////////////////////////////////////////////////
    162 
    163 
    164148
    165149    while (chars_avail >= SEGMENT_SIZE) {
     
    191175        }
    192176
    193     buffer_ptr = &infile_buffer[segment_base];
     177        buffer_ptr = &infile_buffer[segment_base];
    194178
    195179        if (!mCountOnlyOption) {
    196           line_start = write_matches(outfile, LF_scanner, match_scanner, buffer_ptr, line_start);
     180          line_start = write_matches(buffer_ptr, line_start);
    197181        }
    198182        segment++;
     
    200184        chars_avail -= SEGMENT_SIZE;
    201185    }
    202 
    203186
    204187//////////////////////////////////////////////////////////////////////////////////////////
     
    258241            match_count += bitblock::popcount(output.matches);
    259242        }
    260         fprintf(outfile, "Matching Lines:%d\n", match_count);
     243        if (mShowFileNameOption) {
     244            std::cout << currentFileName;
     245        }
     246        fprintf(outfile, "%d\n", match_count);
    261247    }
    262248    else
     
    271257        }
    272258        buffer_ptr = &infile_buffer[segment_base];
    273         line_start = write_matches(outfile, LF_scanner, match_scanner, buffer_ptr, line_start);
     259        line_start = write_matches(buffer_ptr, line_start);
    274260    }
    275261   
    276262    munmap((void *) infile_buffer, infile_sb.st_size);
    277263    close(fdSrc);
    278     fclose(outfile);
    279264   
    280265}
    281 
    282 
    283 
  • icGREP/icgrep-devel/icgrep/do_grep.h

    r4324 r4325  
    1515#include "include/simd-lib/bitblock.hpp"
    1616#include "include/simd-lib/transpose.hpp"
     17#include "include/simd-lib/bitblock_iterator.hpp"
    1718
    1819struct Output {
     
    2021    BitBlock LF;
    2122};
     23
     24#if (BLOCK_SIZE == 128)
     25#define SEGMENT_BLOCKS 7
     26#endif
     27
     28#if (BLOCK_SIZE == 256)
     29#define SEGMENT_BLOCKS 15
     30#endif
     31
     32#define SEGMENT_SIZE (BLOCK_SIZE * SEGMENT_BLOCKS)
     33
     34
     35#if (BLOCK_SIZE == 256)
     36typedef BitStreamScanner<BitBlock, uint64_t, uint64_t, SEGMENT_BLOCKS> ScannerT;
     37#endif
     38
     39#if (BLOCK_SIZE == 128)
     40typedef BitStreamScanner<BitBlock, uint32_t, uint32_t, SEGMENT_BLOCKS> ScannerT;
     41#endif
     42
    2243
    2344typedef void (*process_block_fcn)(const Basis_bits &basis_bits, BitBlock carry_q[], BitBlock advance_q[], Output &output);
     
    3051    mCarries(carry_count), mAdvances(advance_count),
    3152    mCountOnlyOption(false), mShowFileNameOption(false), mShowLineNumberingOption(false),
    32     mProcessBlockFcn(process_block)
     53    mProcessBlockFcn(process_block), outfile(stdout)
    3354    {}
    3455   
     
    3758    void setShowLineNumberOption(bool showN = true) {mShowLineNumberingOption = showN;}
    3859   
    39     void doGrep(char * fileName);
     60    void doGrep(std::string fileName);
    4061private:
     62    ssize_t write_matches(char * buffer, ssize_t first_line_start);
     63
    4164    bool mCountOnlyOption;
    4265    bool mShowFileNameOption;
     
    4669    process_block_fcn mProcessBlockFcn;
    4770   
     71    std::string currentFileName;
     72    FILE * outfile;
     73    ScannerT LF_scanner;
     74    ScannerT match_scanner;
    4875};
    4976
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4324 r4325  
    2424#include <sys/stat.h>
    2525
    26 int main(int argc, char *argv[])
    27 {
    28     char * inregex, * fileregex, * infilename, * outfilename;
    29     FILE *infile, *outfile, *regexfile;
    30 
    31     int fdSrc;
    32     struct stat infile_sb;
    33     char * infile_buffer;
    34 
    35     int opt_code;
    36     bool count_only_option = 0;
    37     bool print_version_option = 0;
    38     bool regex_from_file_option = 0;
    39     bool ascii_only_option = 0;
    40     bool compile_time_option = 0;
    41     bool enable_multiplexing = 0;
    42     bool print_usage = 0;
     26#include "llvm/Support/CommandLine.h"
    4327
    4428
    45     long lSize = 0;
     29   
     30int main(int argc, char *argv[]) {
     31    static cl::opt<std::string> regexp1(cl::Positional, cl::Required, cl::desc("<regexp>"));
     32    static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore);
     33   
     34    static cl::opt<bool>CountOnly("c", cl::desc("Count and display the matching lines per file only."));
     35    cl::alias CountOnlyLong("-count", cl::desc("Alias for -c"), cl::aliasopt(CountOnly));
     36   
     37    static cl::opt<bool>ShowFileNames("H", cl::desc("Show the file name with each matching line."));
     38    cl::alias ShowFileNamesLong("-with-filename", cl::desc("Alias for -H"), cl::aliasopt(ShowFileNames));
     39   
     40    static cl::opt<bool>ShowLineNumbers("n", cl::desc("Show the line number with each matching line."));
     41    cl::alias ShowLineNumbersLong("-line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
     42   
     43    //cl::opt<std::string> RegexFilename("f", cl::desc("Take regular expressions (one per line) from a file"), cl::value_desc("<filename>"));
     44 
     45    // Does -b mean the byte offset of the line, or the byte offset of the match start within the line?
     46    //static cl::opt<bool>ShowByteOffsets("b", cl::desc("Show the byte offset with each matching line."));
     47    //cl::alias ShowByteOffsetsLong("-byte-offset", cl::desc("Alias for -b"), cl::aliasopt(ShowByteOffsets));
    4648
    47     size_t result;
     49    Encoding encoding(Encoding::Type::UTF_8, 8);
    4850
    49     while ((opt_code = getopt(argc, argv, "cvftam")) != -1)
    50     {
    51         switch (opt_code)
    52         {
    53         case 'c':
    54             count_only_option = 1;
    55             break;
    56         case 'v':
    57             print_version_option = 1;
    58             break;
    59         case 'f':
    60             regex_from_file_option = 1;
    61             break;
    62         case 't':
    63             compile_time_option = 1;
    64             break;
    65         case 'a':
    66             ascii_only_option = 1;
    67             break;
    68         case 'm':
    69             enable_multiplexing = 1;
    70             break;
    71         case '?':
    72             break;
    73         default:
    74             printf ("Invalid option: %c\n", opt_code);
    75             print_usage = 1;
    76         }
    77     }
     51    cl::ParseCommandLineOptions(argc, argv);
    7852
    79     if (optind >= argc)
    80     {
    81         printf ("Too few arguments\n");
    82         print_usage = 1;
    83     }
    84 
    85     if (print_usage) {
    86         printf("Usage: %s [-a] [-c] [-f] [-m] [-t] [-v] <regex|regexfile> <inputfile> [<outputfile>]\n", argv[0]);
    87         exit(-1);
    88     }
    89 
    90     inregex = argv[optind++];
    91     if (inregex == 0)
    92     {
    93         fprintf(stderr, "Error: cannot read the regular expression.\n");
    94         exit(-1);
    95     }
    96 
    97     if (regex_from_file_option)
    98     {
    99         regexfile = fopen(inregex, "rb");
    100         if (!regexfile){
    101             fprintf(stderr, "Error: cannot open %s for processing.\n", inregex);
    102             exit(-1);
    103         }
    104 
    105         fseek (regexfile , 0 , SEEK_END);
    106         lSize = ftell (regexfile);
    107         rewind (regexfile);
    108 
    109         fileregex = (char*) malloc (sizeof(char)*lSize);
    110         if (fileregex == NULL) {fputs ("Memory error",stderr); exit (2);}
    111 
    112         result = fread (fileregex, 1, lSize, regexfile);
    113         if (result != lSize) {fputs ("Reading error",stderr); exit (3);}
    114         fclose(regexfile);
    115 
    116         if (fileregex[lSize - 1] == '\n') fileregex[lSize - 1] = '\0';
    117     }
    118 
    119     infilename = argv[optind++];
    120 
    121 
    122     if (print_version_option)
    123     {
    124         fprintf(outfile, "Parabix icgrep implementation: August 2014\n");
    125     }
    126 
    127     Encoding encoding(ascii_only_option ? Encoding::Type::ASCII : Encoding::Type::UTF_8, 8);
    128     const auto llvm_codegen = icgrep::compile(encoding, (regex_from_file_option ? fileregex : inregex), false, enable_multiplexing);
     53    const auto llvm_codegen = icgrep::compile(encoding, regexp1, false, false);
    12954
    13055    if (llvm_codegen.process_block_fptr != 0)
     
    13459           (void (*)(const Basis_bits &basis_bits, BitBlock carry_q[], BitBlock advance_q[], Output &output))(void*)llvm_codegen.process_block_fptr;
    13560        GrepExecutor grepEngine = GrepExecutor(llvm_codegen.carry_q_size, llvm_codegen.advance_q_size, FP);
    136         grepEngine.setCountOnlyOption(count_only_option);
    137         grepEngine.doGrep(infilename);
     61        grepEngine.setCountOnlyOption(CountOnly);
     62        if (inputFiles.size() > 1) grepEngine.setShowFileNameOption();
     63        for (unsigned i = 0; i != inputFiles.size(); ++i) {
     64            grepEngine.doGrep(inputFiles[i].c_str());
     65        }
    13866    }
    139 
    140     if (regex_from_file_option) free(fileregex);
    14167
    14268    return 0;
Note: See TracChangeset for help on using the changeset viewer.