source: icGREP/icgrep-devel/icgrep/icgrep.cpp @ 5161

Last change on this file since 5161 was 5161, checked in by cameron, 3 years ago

Override LLVM error_handler for return code 2; convert ParseFailure? to LLVM fatal error.

File size: 10.7 KB
RevLine 
[3850]1/*
[4947]2 *  Copyright (c) 2016 International Characters.
[3850]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[4961]7#include <cstdio>
[5025]8#include <vector>
[4730]9#include <llvm/Support/CommandLine.h>
[5161]10#include <llvm/Support/ErrorHandling.h>
11#include <llvm/Support/Signals.h>
[4968]12#include <re/re_alt.h>
[4734]13#include <re/re_parser.h>
[4946]14#include <grep_engine.h>
[4968]15#include <fstream>
16#include <string>
[3850]17
[4961]18#include <boost/uuid/sha1.hpp>
[4967]19#include <toolchain.h>
[5030]20#include <re/re_toolchain.h>
[5031]21#include <pablo/pablo_toolchain.h>
[4972]22#include <mutex>
[4961]23
[5161]24
[5016]25#include <iostream> // MEEE
[5156]26
27#ifdef PRINT_TIMING_INFORMATION
28#include <hrtime.h>
29#include <util/papi_helper.hpp>
30#endif
31
[5026]32static cl::OptionCategory LegacyGrepOptions("A. Standard Grep Options",
33                                       "These are standard grep options intended for compatibility with typical grep usage.");
[5045]34static cl::opt<bool> UTF_16("UTF-16", cl::desc("Regular expressions over the UTF-16 representation of Unicode."), cl::cat(LegacyGrepOptions));
[5026]35static cl::OptionCategory EnhancedGrepOptions("B. Enhanced Grep Options",
36                                       "These are additional options for icgrep functionality and performance.");
37static cl::opt<bool> CountOnly("c", cl::desc("Count and display the matching lines per file only."), cl::cat(LegacyGrepOptions));
[5025]38static cl::alias CountOnlyLong("count", cl::desc("Alias for -c"), cl::aliasopt(CountOnly));
[5016]39
[4544]40static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<regex> <input file ...>"), cl::OneOrMore);
41
[5026]42static cl::opt<bool> CaseInsensitive("i", cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(LegacyGrepOptions));
[4544]43
[5016]44
[5026]45static cl::list<std::string> regexVector("e", cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(LegacyGrepOptions));
46static cl::opt<std::string> RegexFilename("f", cl::desc("Take regular expressions (one per line) from a file"), cl::value_desc("regex file"), cl::init(""), cl::cat(LegacyGrepOptions));
47static cl::opt<std::string> IRFileName("precompiled", cl::desc("Use precompiled regular expression"), cl::value_desc("LLVM IR file"), cl::init(""));
[4544]48
[4967]49static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(1));
[4544]50
[5016]51static cl::opt<bool> GrepSupport("gs", cl::desc("Grep support. Pipe the output of icgrep into grep. \
[5026]52         Gives you colored output + back-referencing capability."), cl::cat(EnhancedGrepOptions));
[5016]53
[5161]54//
55// Handler for errors reported through llvm::report_fatal_error.  Report
56// and signal error code 2 (grep convention).
57//
58static void icgrep_error_handler(void *UserData, const std::string &Message,
59                             bool GenCrashDiag) {
[5016]60
[5161]61    // Modified from LLVM's internal report_fatal_error logic.
62    SmallVector<char, 64> Buffer;
63    raw_svector_ostream OS(Buffer);
64    OS << "icgrep ERROR: " << Message << "\n";
65    StringRef MessageStr = OS.str();
66    ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
67    (void)written; // If something went wrong, we deliberately just give up.
68
69    // Run the interrupt handlers to make sure any special cleanups get done, in
70    // particular that we remove files registered with RemoveFileOnSignal.
71    llvm::sys::RunInterruptHandlers();
72    exit(2);
73}
74
[4961]75static std::string allREs;
[4963]76static re::ModeFlagSet globalFlags = 0;
[4734]77
78re::RE * get_icgrep_RE() {
79 
80    //std::vector<std::string> regexVector;
81    if (RegexFilename != "") {
82        std::ifstream regexFile(RegexFilename.c_str());
83        std::string r;
84        if (regexFile.is_open()) {
85            while (std::getline(regexFile, r)) {
86                regexVector.push_back(r);
87            }
88            regexFile.close();
89        }
90    }
91   
92    // if there are no regexes specified through -e or -f, the first positional argument
93    // must be a regex, not an input file.
94   
95    if (regexVector.size() == 0) {
96        regexVector.push_back(inputFiles[0]);
[5015]97        inputFiles.erase(inputFiles.begin());
[4734]98    }
99    if (CaseInsensitive) globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
100
101 
102    std::vector<re::RE *> REs;
103    re::RE * re_ast = nullptr;
[4750]104    for (unsigned i = 0; i < regexVector.size(); i++) {
[4868]105        re_ast = re::RE_Parser::parse(regexVector[i], globalFlags);
[4734]106        REs.push_back(re_ast);
[4961]107        allREs += regexVector[i] + "\n";
[4734]108    }
109    if (REs.size() > 1) {
110        re_ast = re::makeAlt(REs.begin(), REs.end());
111    }
112   
113    return re_ast;
114}
115
[4961]116std::string sha1sum(const std::string & str) {
117    char buffer[41];    // 40 hex-digits and the terminating null
118    unsigned int digest[5];     // 160 bits in total
[4775]119
[4961]120    boost::uuids::detail::sha1 sha1;
121    sha1.process_bytes(str.c_str(), str.size());
122    sha1.get_digest(digest);
123    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
124             digest[0], digest[1], digest[2], digest[3], digest[4]);
125    return std::string(buffer);
126}
127
[5107]128std::vector<size_t> total_CountOnly;
[4972]129std::mutex count_mutex;
130size_t fileCount;
[4979]131void *DoGrep(void *args)
[4967]132{
[4972]133    size_t fileIdx;
[4979]134    GrepEngine * grepEngine = (GrepEngine *)args;
[4967]135
[4972]136    count_mutex.lock();
[5028]137    fileIdx = fileCount;
[4972]138    fileCount++;
139    count_mutex.unlock();
[4979]140
[4972]141    while (fileIdx < inputFiles.size()){
[5045]142        grepEngine->doGrep(inputFiles[fileIdx], fileIdx, CountOnly, total_CountOnly, UTF_16);
[4972]143       
144        count_mutex.lock();
[5028]145        fileIdx = fileCount;
[4972]146        fileCount++;
147        count_mutex.unlock();
148    }
149
[4967]150    pthread_exit(NULL);
151}
152
[5016]153
154// Returns true if the command line argument shouldn't be passed to icGrep or Grep.
155bool isArgUnwantedForAll(char *argument) {
156    std::vector<std::string> unwantedFlags = {"-gs"};
[5037]157    for (unsigned i = 0; i < unwantedFlags.size(); ++i){
[5016]158        if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
[5037]159            return true;
[5016]160        }
161    }
[5037]162    return false;
[5016]163}
164// Filters out the command line strings that shouldn't be passed on to Grep
165bool isArgUnwantedForGrep(char *argument) {
166    std::vector<std::string> unwantedFlags = {"-n"};
167
[5037]168    for (unsigned i = 0; i < inputFiles.size(); ++i){
[5016]169        if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
[5037]170            return true;
[5016]171        }
172    }
173
[5037]174    for (unsigned i = 0; i < inputFiles.size(); ++i){    // filter out input content files.
[5016]175        if (strcmp(argument, inputFiles[i].c_str()) == 0) {
[5037]176            return true;
[5016]177        }
178    }
179
[5037]180    return false;
[5016]181}
182// Filters out the command line strings that shouldn't be passed on to IcGrep
183bool isArgUnwantedForIcGrep(char *argument) {
184    bool isUnwated = false;
185    std::vector<std::string> unwantedFlags = {"-c"};
186
[5037]187    for (unsigned i = 0; i < unwantedFlags.size(); ++i){
[5016]188        if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
189            isUnwated = true;
190        }
191    }
192
193    return isUnwated;
194}
195
196/*
197* Constructs a shell command that calls icgrep and then pipes the output to grep.
198* Then executs this shell command using the "system()" function.
199* This allows the output to be colored since all output is piped to grep.
200*/ 
201void pipeIcGrepOutputToGrep(int argc, char *argv[]) {
202    std::string icGrepArguments = "";
203    std::string grepArguments = "";
204
205    // Construct the shell arguments for icgrep and grep
206    // by filtering out the command line arguments passed into this process.
[5037]207    for (int i = 1; i < argc; i++) {
[5016]208        if (!isArgUnwantedForAll(argv[i])) {
209
210            if (!isArgUnwantedForIcGrep(argv[i])) {
[5138]211                // Wrap everything in quotes since the arguments passed into this program had them stripped by bash.
212                icGrepArguments.append("\"");       
[5016]213                icGrepArguments.append(argv[i]);
[5138]214                icGrepArguments.append("\" ");
[5016]215            }
216
217            if (!isArgUnwantedForGrep(argv[i])) {
[5138]218                grepArguments.append("\"");
[5016]219                grepArguments.append(argv[i]);
[5138]220                grepArguments.append("\" ");
[5016]221            }
222        }
223    }
224
[5154]225    std::string systemCall = argv[0];
226    systemCall.append(" ");
[5016]227    systemCall.append(icGrepArguments);
228    systemCall.append(" ");
229    systemCall.append(" | grep --color=always -P ");
230    systemCall.append(grepArguments);
[5138]231
[5016]232    system(systemCall.c_str());
233}
234
235
[4325]236int main(int argc, char *argv[]) {
[5161]237    llvm::install_fatal_error_handler(&icgrep_error_handler);
[5036]238    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&LegacyGrepOptions, &EnhancedGrepOptions, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
[4544]239    cl::ParseCommandLineOptions(argc, argv);
[4353]240   
[4939]241    re::RE * re_ast = get_icgrep_RE();
[4963]242    std::string module_name = "grepcode:" + sha1sum(allREs) + ":" + std::to_string(globalFlags);
[5016]243
244    if (GrepSupport) {  // Calls icgrep again on command line and passes output to grep.
245        pipeIcGrepOutputToGrep(argc, argv);
246        return 0;   // icgrep is called again, so we need to end this process.
247    }
[4730]248   
[4979]249    GrepEngine grepEngine;
[5045]250    grepEngine.grepCodeGen(module_name, re_ast, CountOnly, UTF_16);
[5052]251    //std::cerr << "grepCodeGen complete";
[5087]252
253    releaseSlabAllocatorMemory();
[5025]254    initResult(inputFiles);
[5037]255    for (unsigned i=0; i<inputFiles.size(); ++i){
[5025]256        total_CountOnly.push_back(0);
257    }
[4967]258
[4968]259    if (Threads <= 1) {
[5156]260
261        #ifdef PRINT_TIMING_INFORMATION
262        // PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY
263        // PAPI_RES_STL, PAPI_BR_MSP, PAPI_LST_INS, PAPI_L1_TCM
264        papi::PapiCounter<4> papiCounters({PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY});
265        #endif
[5015]266        for (unsigned i = 0; i != inputFiles.size(); ++i) {
[5156]267            #ifdef PRINT_TIMING_INFORMATION
268            papiCounters.start();
269            const timestamp_t execution_start = read_cycle_counter();
270            #endif
[5045]271            grepEngine.doGrep(inputFiles[i], i, CountOnly, total_CountOnly, UTF_16);
[5156]272            #ifdef PRINT_TIMING_INFORMATION
273            const timestamp_t execution_end = read_cycle_counter();
274            papiCounters.stop();
275            std::cerr << "EXECUTION TIME: " << inputFiles[i] << ":" << "CYCLES|" << (execution_end - execution_start) << papiCounters << std::endl;
276            #endif
[4967]277        }       
[4968]278    } else if (Threads > 1) {
279        const unsigned numOfThreads = Threads; // <- convert the command line value into an integer to allow stack allocation
280        pthread_t threads[numOfThreads];
[4967]281
[4968]282        for(unsigned long i = 0; i < numOfThreads; ++i){
[4979]283            const int rc = pthread_create(&threads[i], NULL, DoGrep, (void *)&grepEngine);
[4968]284            if (rc) {
[5161]285                llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
[4968]286            }
[4947]287        }
[4967]288
[4968]289        for(unsigned i = 0; i < numOfThreads; ++i) {
290            void * status = nullptr;
291            const int rc = pthread_join(threads[i], &status);
[4967]292            if (rc) {
[5161]293                llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
[4967]294            }
295        }
[3850]296    }
[5063]297   
[5025]298    PrintResult(CountOnly, total_CountOnly);
[4327]299   
[3850]300    return 0;
301}
Note: See TracBrowser for help on using the repository browser.