Ignore:
Timestamp:
May 24, 2017, 10:42:09 AM (2 years ago)
Author:
cameron
Message:

Initial support for -q, -l, -L modes, grep options reorganization

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5464 r5473  
    3838                                       "These are standard grep options intended for compatibility with typical grep usage.");
    3939
    40 #ifdef FUTURE
    41 static cl::OptionCategory RegexpOptions("Regular Expression Interpretation", "These options control regular expression interpretation");
    42 static cl::opt<re::RE_Syntax> RegexpSyntax(cl::desc("Regular expression syntax:"),
    43     cl::values(
    44         clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
    45         clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
    46         clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
    47         clEnumValN(re::RE_Syntax::PROSITE, "PRO", "PROSITE protein patterns syntax"),
    48         clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax - default"),
    49                clEnumValEnd), cl::cat(LegacyGrepOptions), cl::Grouping, cl::init(re::RE_Syntax::PCRE));
    50 #endif
    51 
    52 static cl::opt<bool> EntireLineMatching("x", cl::desc("Require that entire lines be matched."), cl::cat(LegacyGrepOptions), cl::Grouping);
    53 static cl::alias  EntireLineMatchingAlias("line-regexp", cl::desc("Alias for -x"), cl::aliasopt(EntireLineMatching));
    54 
    55 static cl::opt<bool> WholeWordMatching("w", cl::desc("Require that whole words be matched."), cl::cat(LegacyGrepOptions), cl::Grouping);
    56 static cl::alias WholeWordMatchingAlias("word-regexp", cl::desc("Alias for -w"), cl::aliasopt(WholeWordMatching));
    5740
    5841static cl::opt<bool> UTF_16("UTF-16", cl::desc("Regular expressions over the UTF-16 representation of Unicode."), cl::cat(LegacyGrepOptions));
     
    6043                                       "These are additional options for icgrep functionality and performance.");
    6144
    62 static cl::opt<bool> FileNamesOnly("l", cl::desc("Display only the names of matching files."), cl::cat(LegacyGrepOptions), cl::Grouping);
    63 static cl::alias FileNamesAlias("files-with-matches", cl::desc("Alias for -l"), cl::aliasopt(FileNamesOnly));
    64 
    65 static cl::opt<bool> NonMatchingFileNamesOnly("L", cl::desc("Display only the names of nonmatching files."), cl::cat(LegacyGrepOptions), cl::Grouping);
    66 static cl::alias NonMatchingFileNamesAlias("files-without-match", cl::desc("Alias for -L"), cl::aliasopt(NonMatchingFileNamesOnly));
    67 
    68 
    69 static cl::opt<bool> CountOnly("c", cl::desc("Count and display the matching lines per file only."), cl::cat(LegacyGrepOptions), cl::Grouping);
    70 static cl::alias CountOnlyLong("count", cl::desc("Alias for -c"), cl::aliasopt(CountOnly));
    71 
    7245
    7346static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<regex> <input file ...>"), cl::OneOrMore);
    74 
    75 static cl::opt<bool> EnterDirectoriesRecursively("r", cl::desc("Recursively process files within directories, (but follow only top-level symlinks unless -R)."), cl::cat(LegacyGrepOptions), cl::Grouping);
    76 static cl::opt<bool> FollowSubdirectorySymlinks("R", cl::desc("Recursively process files within directories, following symlinks at all levels."), cl::cat(LegacyGrepOptions), cl::Grouping);
    77 static cl::opt<bool> CaseInsensitive("i", cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(LegacyGrepOptions), cl::Grouping);
    78 static cl::alias CaseInsensitiveAlisas("ignore-case", cl::desc("Ignore case distinctions in the pattern and the file."), cl::aliasopt(CaseInsensitive));
    7947
    8048static cl::list<std::string> regexVector("e", cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(LegacyGrepOptions));
    8149static cl::opt<std::string> RegexFilename("f", cl::desc("Take regular expressions (one per line) from a file"), cl::value_desc("regex file"), cl::init(""), cl::cat(LegacyGrepOptions));
    82 static cl::opt<std::string> IRFileName("precompiled", cl::desc("Use precompiled regular expression"), cl::value_desc("LLVM IR file"), cl::init(""));
    8350
    8451static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(1));
    85 
    86 static cl::opt<bool> GrepSupport("gs", cl::desc("Grep support. Pipe the output of icgrep into grep. \
    87          Gives you colored output + back-referencing capability."), cl::cat(EnhancedGrepOptions));
    8852
    8953static cl::opt<bool> MultiGrepKernels("enable-multigrep-kernels", cl::desc("Construct separated kernels for each regular expression"), cl::cat(EnhancedGrepOptions));
     
    13498        inputFiles.erase(inputFiles.begin());
    13599    }
    136     if (CaseInsensitive) {
     100    if (grep::IgnoreCaseFlag) {
    137101        globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
    138102    }
     
    140104    std::vector<re::RE *> REs;
    141105    for (unsigned i = 0; i < regexVector.size(); i++) {
    142 #ifdef FUTURE
    143         re::RE * re_ast = re::RE_Parser::parse(regexVector[i], globalFlags, RegexpSyntax);
    144 #else
    145         re::RE * re_ast = re::RE_Parser::parse(regexVector[i], globalFlags);
    146 #endif
     106        re::RE * re_ast = re::RE_Parser::parse(regexVector[i], globalFlags, grep::RegexpSyntax);
    147107        REs.push_back(re_ast);
    148108    }
     
    169129
    170130    for (re::RE *& re_ast : REs) {
    171         if (WholeWordMatching) {
     131        if (grep::WordRegexpFlag) {
    172132            re_ast = re::makeSeq({re::makeWordBoundary(), re_ast, re::makeWordBoundary()});
    173133        }
    174         if (EntireLineMatching) {
     134        if (grep::LineRegexpFlag) {
    175135            re_ast = re::makeSeq({re::makeStart(), re_ast, re::makeEnd()});
    176136        }
     
    180140}
    181141
    182 std::vector<size_t> total_CountOnly;
     142std::vector<size_t> total_Count;
    183143std::mutex count_mutex;
    184144size_t fileCount;
     
    186146{
    187147    size_t fileIdx;
    188     GrepEngine * grepEngine = (GrepEngine *)args;
     148    grep::GrepEngine * grepEngine = (grep::GrepEngine *)args;
    189149
    190150    count_mutex.lock();
     
    194154
    195155    while (fileIdx < allFiles.size()) {
    196         total_CountOnly[fileIdx] = grepEngine->doGrep(allFiles[fileIdx], fileIdx);
     156        total_Count[fileIdx] = grepEngine->doGrep(allFiles[fileIdx], fileIdx);
    197157       
    198158        count_mutex.lock();
     
    206166
    207167
    208 // Returns true if the command line argument shouldn't be passed to icGrep or Grep.
    209 bool isArgUnwantedForAll(char *argument) {
    210     std::vector<std::string> unwantedFlags = {"-gs"};
    211     for (unsigned i = 0; i < unwantedFlags.size(); ++i){
    212         if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
    213             return true;
    214         }
    215     }
    216     return false;
    217 }
    218 // Filters out the command line strings that shouldn't be passed on to Grep
    219 bool isArgUnwantedForGrep(char *argument) {
    220 #ifdef FUTURE
    221     std::vector<std::string> unwantedFlags = {"-n", "-P", "-G", "-E", "-PRO"};
    222 #else
    223     std::vector<std::string> unwantedFlags = {"-n"};
    224 #endif
    225 
    226     for (unsigned i = 0; i < unwantedFlags.size(); ++i){
    227         if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
    228             return true;
    229         }
    230     }
    231 
    232     for (unsigned i = 0; i < inputFiles.size(); ++i){    // filter out input content files.
    233         if (strcmp(argument, inputFiles[i].c_str()) == 0) {
    234             return true;
    235         }
    236     }
    237 
    238     return false;
    239 }
    240 // Filters out the command line strings that shouldn't be passed on to IcGrep
    241 bool isArgUnwantedForIcGrep(char *argument) {
    242     bool isUnwanted = false;
    243     std::vector<std::string> unwantedFlags = {"-c"};
    244 
    245     for (unsigned i = 0; i < unwantedFlags.size(); ++i){
    246         if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
    247             isUnwanted = true;
    248         }
    249     }
    250 
    251     return isUnwanted;
    252 }
    253 
    254 /*
    255 * Constructs a shell command that calls icgrep and then pipes the output to grep.
    256 * Then executs this shell command using the "system()" function.
    257 * This allows the output to be colored since all output is piped to grep.
    258 */
    259 void pipeIcGrepOutputToGrep(int argc, char *argv[]) {
    260     std::string icGrepArguments = "";
    261     std::string grepArguments = "";
    262 
    263     // Construct the shell arguments for icgrep and grep
    264     // by filtering out the command line arguments passed into this process.
    265     for (int i = 1; i < argc; i++) {
    266         if (!isArgUnwantedForAll(argv[i])) {
    267 
    268             if (!isArgUnwantedForIcGrep(argv[i])) {
    269                 // Wrap everything in quotes since the arguments passed into this program had them stripped by bash.
    270                 icGrepArguments.append("\"");       
    271                 icGrepArguments.append(argv[i]);
    272                 icGrepArguments.append("\" ");
    273             }
    274 
    275             if (!isArgUnwantedForGrep(argv[i])) {
    276                 grepArguments.append("\"");
    277                 grepArguments.append(argv[i]);
    278                 grepArguments.append("\" ");
    279             }
    280         }
    281     }
    282 
    283 #ifdef FUTURE
    284     switch (RegexpSyntax) {
    285         case re::RE_Syntax::BRE:
    286             grepArguments.append("\"-G\" ");
    287             break;
    288         case re::RE_Syntax::ERE:
    289             grepArguments.append("\"-E\" ");
    290             break;
    291         case re::RE_Syntax::PROSITE:
    292             grepArguments.append("\"-PRO\" ");
    293             break;
    294         case re::RE_Syntax::PCRE:
    295             grepArguments.append("\"-P\" ");
    296             break;
    297         default:
    298             //TODO: handle fix string
    299             break;
    300     }
    301 #endif
    302 
    303     std::string systemCall = argv[0];
    304     systemCall.append(" ");
    305     systemCall.append(icGrepArguments);
    306     systemCall.append(" ");
    307 #ifdef FUTURE
    308     systemCall.append(" | grep --color=always ");
    309 #else
    310     systemCall.append(" | grep --color=always -P ");
    311 #endif
    312     systemCall.append(grepArguments);
    313 
    314     const auto rc = system(systemCall.c_str());
    315     if (LLVM_UNLIKELY(rc < 0)) {
    316         throw std::runtime_error("Error calling grep: " + std::string(strerror(errno)));
    317     }
    318 }
    319 
    320 
    321168// This is a stub, to be expanded later.
    322169bool excludeDirectory(boost::filesystem::path dirpath) { return dirpath.filename() == ".svn";}
     
    324171std::vector<std::string> getFullFileList(cl::list<std::string> & inputFiles) {
    325172    using namespace boost::filesystem;
    326     symlink_option follow_symlink = FollowSubdirectorySymlinks ? symlink_option::recurse : symlink_option::none;
     173    symlink_option follow_symlink = grep::DereferenceRecursiveFlag ? symlink_option::recurse : symlink_option::none;
    327174    std::vector<std::string> expanded_paths;
    328175    boost::system::error_code errc;
    329     if (FollowSubdirectorySymlinks) {
    330         EnterDirectoriesRecursively = true;
     176    if (grep::DereferenceRecursiveFlag) {
     177        grep::RecursiveFlag = true;
    331178    }
    332179    for (const std::string & f : inputFiles) {
     
    335182//        }
    336183        path p(f);
    337         if (LLVM_UNLIKELY(EnterDirectoriesRecursively && is_directory(p))) {
     184        if (LLVM_UNLIKELY(grep::RecursiveFlag && is_directory(p))) {
    338185            if (!excludeDirectory(p)) {
    339186                recursive_directory_iterator di(p, follow_symlink, errc), end;
     
    367214
    368215int main(int argc, char *argv[]) {
     216   
    369217    llvm::install_fatal_error_handler(&icgrep_error_handler);
    370218    AddParabixVersionPrinter();
    371219#ifndef USE_LLVM_3_6
    372     cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&LegacyGrepOptions, &EnhancedGrepOptions, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
     220    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&LegacyGrepOptions, &EnhancedGrepOptions, grep::grep_regexp_flags(), grep::grep_output_flags(), re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
    373221#endif
    374222    cl::ParseCommandLineOptions(argc, argv);
    375 #ifdef FUTURE
    376     if (RegexpSyntax == re::RE_Syntax::FixedStrings) {
     223    if (grep::RegexpSyntax == re::RE_Syntax::FixedStrings) {
    377224        llvm::report_fatal_error("Sorry, FixedStrings syntax is not fully supported\n.");
    378225    }
    379 #endif
    380226
    381227    const auto REs = readExpressions();
    382228
    383     if (GrepSupport) {  // Calls icgrep again on command line and passes output to grep.
    384         pipeIcGrepOutputToGrep(argc, argv);
    385         return 0;   // icgrep is called again, so we need to end this process.
    386     }
    387 
    388 
    389229    allFiles = getFullFileList(inputFiles);
    390230
    391     GrepEngine grepEngine;
     231    grep::GrepEngine grepEngine;
    392232
    393233    if (allFiles.empty()) {
    394234
    395         grepEngine.grepCodeGen(REs, CountOnly, UTF_16, GrepSource::StdIn);
     235        grepEngine.grepCodeGen(REs, grep::Mode, UTF_16, GrepSource::StdIn);
    396236        allFiles = { "-" };
    397         initFileResult(allFiles);
    398         total_CountOnly.resize(1);
    399         total_CountOnly[0] = grepEngine.doGrep(STDIN_FILENO, 0);
     237        grep::initFileResult(allFiles);
     238        total_Count.resize(1);
     239        total_Count[0] = grepEngine.doGrep(STDIN_FILENO, 0);
    400240
    401241    } else {
     
    404244       
    405245        if(codegen::NVPTX){
    406             grepEngine.grepCodeGen_nvptx(REs, CountOnly, UTF_16);
     246            grepEngine.grepCodeGen_nvptx(REs, grep::Mode, UTF_16);
    407247            for (unsigned i = 0; i != allFiles.size(); ++i) {
    408248                grepEngine.doGrep(allFiles[i]);
     
    411251        }
    412252        else{
    413             grepEngine.grepCodeGen(REs, CountOnly, UTF_16, GrepSource::File);
    414         }
    415 
    416         if (FileNamesOnly && NonMatchingFileNamesOnly) {
    417             // Strange request: print names of all matching files and all non-matching files: i.e., all of them.
    418             // (Although GNU grep prints nothing.)
    419             for (auto & f : allFiles) {
    420                 if (boost::filesystem::exists(f)) {
    421                     std::cout << f << "\n";
    422                 } else {
    423                     std::cerr << "Error: cannot open " << f << " for processing. Skipped.\n";
    424                 }
    425             }
    426             exit(0);
    427         }
    428 
    429         if (FileNamesOnly) {
    430             llvm::report_fatal_error("Sorry, -l/-files-with-matches not yet supported\n.");
    431         }
    432         if (NonMatchingFileNamesOnly) {
    433             llvm::report_fatal_error("Sorry, -L/-files-without-match not yet supported\n.");
    434         }
    435         initFileResult(allFiles);
    436         total_CountOnly.resize(allFiles.size());
     253            grepEngine.grepCodeGen(REs, grep::Mode, UTF_16, GrepSource::File);
     254        }
     255
     256        grep::initFileResult(allFiles);
     257        total_Count.resize(allFiles.size());
    437258
    438259        if (Threads <= 1) {
    439260            for (unsigned i = 0; i != allFiles.size(); ++i) {
    440                 total_CountOnly[i] = grepEngine.doGrep(allFiles[i], i);
     261                total_Count[i] = grepEngine.doGrep(allFiles[i], i);
    441262            }
    442263        } else if (Threads > 1) {
     
    461282    }
    462283   
    463     PrintResult(CountOnly, total_CountOnly);
     284    grep::PrintResult(grep::Mode, total_Count);
    464285   
    465286    return 0;
Note: See TracChangeset for help on using the changeset viewer.