source: icGREP/icgrep-devel/icgrep/icgrep.cpp @ 5186

Last change on this file since 5186 was 5186, checked in by faldebey, 3 years ago

LLVM-3.6 Support

File size: 14.4 KB
RevLine 
[3850]1/*
[4947]2 *  Copyright (c) 2016 International Characters.
[3850]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[4961]7#include <cstdio>
[5025]8#include <vector>
[4730]9#include <llvm/Support/CommandLine.h>
[5161]10#include <llvm/Support/ErrorHandling.h>
11#include <llvm/Support/Signals.h>
[5186]12#include <llvm/Config/config.h>
[4968]13#include <re/re_alt.h>
[4734]14#include <re/re_parser.h>
[4946]15#include <grep_engine.h>
[4968]16#include <fstream>
17#include <string>
[3850]18
[4961]19#include <boost/uuid/sha1.hpp>
[4967]20#include <toolchain.h>
[5030]21#include <re/re_toolchain.h>
[5031]22#include <pablo/pablo_toolchain.h>
[4972]23#include <mutex>
[5163]24#include <boost/filesystem.hpp>
[4961]25
[5016]26#include <iostream> // MEEE
[5156]27
28#ifdef PRINT_TIMING_INFORMATION
29#include <hrtime.h>
30#include <util/papi_helper.hpp>
31#endif
32
[5026]33static cl::OptionCategory LegacyGrepOptions("A. Standard Grep Options",
34                                       "These are standard grep options intended for compatibility with typical grep usage.");
[5167]35
36#ifdef FUTURE
37static cl::OptionCategory RegexpOptions("Regular Expression Interpretation", "These options control regular expression interpretation");
[5180]38static cl::opt<re::RE_Syntax> RegexpSyntax(cl::desc("Regular expression syntax:"),
[5167]39    cl::values(
[5180]40        clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
41        clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
42        clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
43        clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax - default"),
44               clEnumValEnd), cl::cat(LegacyGrepOptions), cl::Grouping, cl::init(re::RE_Syntax::PCRE));
[5167]45#endif
46
[5045]47static cl::opt<bool> UTF_16("UTF-16", cl::desc("Regular expressions over the UTF-16 representation of Unicode."), cl::cat(LegacyGrepOptions));
[5026]48static cl::OptionCategory EnhancedGrepOptions("B. Enhanced Grep Options",
49                                       "These are additional options for icgrep functionality and performance.");
[5163]50static cl::opt<bool> CountOnly("c", cl::desc("Count and display the matching lines per file only."), cl::cat(LegacyGrepOptions), cl::Grouping);
[5025]51static cl::alias CountOnlyLong("count", cl::desc("Alias for -c"), cl::aliasopt(CountOnly));
[5016]52
[4544]53static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<regex> <input file ...>"), cl::OneOrMore);
54
[5163]55static cl::opt<bool> EnterDirectoriesRecursively("r", cl::desc("Recursively process files within directories, (but follow only top-level symlinks unless -R)."), cl::cat(LegacyGrepOptions), cl::Grouping);
56static cl::opt<bool> FollowSubdirectorySymlinks("R", cl::desc("Recursively process files within directories, following symlinks at all levels."), cl::cat(LegacyGrepOptions), cl::Grouping);
57static cl::opt<bool> CaseInsensitive("i", cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(LegacyGrepOptions), cl::Grouping);
[4544]58
[5016]59
[5026]60static cl::list<std::string> regexVector("e", cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(LegacyGrepOptions));
61static cl::opt<std::string> RegexFilename("f", cl::desc("Take regular expressions (one per line) from a file"), cl::value_desc("regex file"), cl::init(""), cl::cat(LegacyGrepOptions));
62static cl::opt<std::string> IRFileName("precompiled", cl::desc("Use precompiled regular expression"), cl::value_desc("LLVM IR file"), cl::init(""));
[4544]63
[4967]64static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(1));
[4544]65
[5016]66static cl::opt<bool> GrepSupport("gs", cl::desc("Grep support. Pipe the output of icgrep into grep. \
[5026]67         Gives you colored output + back-referencing capability."), cl::cat(EnhancedGrepOptions));
[5016]68
[5163]69
70static std::vector<std::string> allFiles;
[5161]71//
72// Handler for errors reported through llvm::report_fatal_error.  Report
73// and signal error code 2 (grep convention).
74//
75static void icgrep_error_handler(void *UserData, const std::string &Message,
76                             bool GenCrashDiag) {
[5016]77
[5161]78    // Modified from LLVM's internal report_fatal_error logic.
79    SmallVector<char, 64> Buffer;
80    raw_svector_ostream OS(Buffer);
81    OS << "icgrep ERROR: " << Message << "\n";
82    StringRef MessageStr = OS.str();
83    ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
84    (void)written; // If something went wrong, we deliberately just give up.
85
86    // Run the interrupt handlers to make sure any special cleanups get done, in
87    // particular that we remove files registered with RemoveFileOnSignal.
88    llvm::sys::RunInterruptHandlers();
89    exit(2);
90}
91
[4961]92static std::string allREs;
[4963]93static re::ModeFlagSet globalFlags = 0;
[4734]94
95re::RE * get_icgrep_RE() {
96 
97    //std::vector<std::string> regexVector;
98    if (RegexFilename != "") {
99        std::ifstream regexFile(RegexFilename.c_str());
100        std::string r;
101        if (regexFile.is_open()) {
102            while (std::getline(regexFile, r)) {
103                regexVector.push_back(r);
104            }
105            regexFile.close();
106        }
107    }
108   
109    // if there are no regexes specified through -e or -f, the first positional argument
110    // must be a regex, not an input file.
111   
112    if (regexVector.size() == 0) {
113        regexVector.push_back(inputFiles[0]);
[5015]114        inputFiles.erase(inputFiles.begin());
[4734]115    }
116    if (CaseInsensitive) globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
117
118 
119    std::vector<re::RE *> REs;
120    re::RE * re_ast = nullptr;
[4750]121    for (unsigned i = 0; i < regexVector.size(); i++) {
[5180]122#ifdef FUTURE
123        re_ast = re::RE_Parser::parse(regexVector[i], globalFlags, RegexpSyntax);
124#else
[4868]125        re_ast = re::RE_Parser::parse(regexVector[i], globalFlags);
[5180]126#endif
[4734]127        REs.push_back(re_ast);
[4961]128        allREs += regexVector[i] + "\n";
[4734]129    }
130    if (REs.size() > 1) {
131        re_ast = re::makeAlt(REs.begin(), REs.end());
132    }
133   
134    return re_ast;
135}
136
[4961]137std::string sha1sum(const std::string & str) {
138    char buffer[41];    // 40 hex-digits and the terminating null
139    unsigned int digest[5];     // 160 bits in total
[4775]140
[4961]141    boost::uuids::detail::sha1 sha1;
142    sha1.process_bytes(str.c_str(), str.size());
143    sha1.get_digest(digest);
144    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
145             digest[0], digest[1], digest[2], digest[3], digest[4]);
146    return std::string(buffer);
147}
148
[5107]149std::vector<size_t> total_CountOnly;
[4972]150std::mutex count_mutex;
151size_t fileCount;
[4979]152void *DoGrep(void *args)
[4967]153{
[4972]154    size_t fileIdx;
[4979]155    GrepEngine * grepEngine = (GrepEngine *)args;
[4967]156
[4972]157    count_mutex.lock();
[5028]158    fileIdx = fileCount;
[4972]159    fileCount++;
160    count_mutex.unlock();
[4979]161
[5163]162    while (fileIdx < allFiles.size()){
163        grepEngine->doGrep(allFiles[fileIdx], fileIdx, CountOnly, total_CountOnly, UTF_16);
[4972]164       
165        count_mutex.lock();
[5028]166        fileIdx = fileCount;
[4972]167        fileCount++;
168        count_mutex.unlock();
169    }
170
[4967]171    pthread_exit(NULL);
172}
173
[5016]174
175// Returns true if the command line argument shouldn't be passed to icGrep or Grep.
176bool isArgUnwantedForAll(char *argument) {
177    std::vector<std::string> unwantedFlags = {"-gs"};
[5037]178    for (unsigned i = 0; i < unwantedFlags.size(); ++i){
[5016]179        if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
[5037]180            return true;
[5016]181        }
182    }
[5037]183    return false;
[5016]184}
185// Filters out the command line strings that shouldn't be passed on to Grep
186bool isArgUnwantedForGrep(char *argument) {
[5180]187#ifdef FUTURE
188    std::vector<std::string> unwantedFlags = {"-n", "-P", "-G", "-E"};
189#else
[5016]190    std::vector<std::string> unwantedFlags = {"-n"};
[5180]191#endif
[5016]192
[5180]193    for (unsigned i = 0; i < unwantedFlags.size(); ++i){
[5016]194        if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
[5037]195            return true;
[5016]196        }
197    }
198
[5037]199    for (unsigned i = 0; i < inputFiles.size(); ++i){    // filter out input content files.
[5016]200        if (strcmp(argument, inputFiles[i].c_str()) == 0) {
[5037]201            return true;
[5016]202        }
203    }
204
[5037]205    return false;
[5016]206}
207// Filters out the command line strings that shouldn't be passed on to IcGrep
208bool isArgUnwantedForIcGrep(char *argument) {
[5163]209    bool isUnwanted = false;
[5016]210    std::vector<std::string> unwantedFlags = {"-c"};
211
[5037]212    for (unsigned i = 0; i < unwantedFlags.size(); ++i){
[5016]213        if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
[5163]214            isUnwanted = true;
[5016]215        }
216    }
217
[5163]218    return isUnwanted;
[5016]219}
220
221/*
222* Constructs a shell command that calls icgrep and then pipes the output to grep.
223* Then executs this shell command using the "system()" function.
224* This allows the output to be colored since all output is piped to grep.
225*/ 
226void pipeIcGrepOutputToGrep(int argc, char *argv[]) {
227    std::string icGrepArguments = "";
228    std::string grepArguments = "";
229
230    // Construct the shell arguments for icgrep and grep
231    // by filtering out the command line arguments passed into this process.
[5037]232    for (int i = 1; i < argc; i++) {
[5016]233        if (!isArgUnwantedForAll(argv[i])) {
234
235            if (!isArgUnwantedForIcGrep(argv[i])) {
[5138]236                // Wrap everything in quotes since the arguments passed into this program had them stripped by bash.
237                icGrepArguments.append("\"");       
[5016]238                icGrepArguments.append(argv[i]);
[5138]239                icGrepArguments.append("\" ");
[5016]240            }
241
242            if (!isArgUnwantedForGrep(argv[i])) {
[5138]243                grepArguments.append("\"");
[5016]244                grepArguments.append(argv[i]);
[5138]245                grepArguments.append("\" ");
[5016]246            }
247        }
248    }
249
[5180]250#ifdef FUTURE
251    switch (RegexpSyntax) {
252        case re::RE_Syntax::BRE:
253            grepArguments.append("\"-G\" ");
254            break;
255        case re::RE_Syntax::ERE:
256            grepArguments.append("\"-E\" ");
257            break;
258        case re::RE_Syntax::PCRE:
259            grepArguments.append("\"-P\" ");
260            break;
261        default:
262            //TODO: handle fix string
263            break;
264    }
265#endif
266
[5154]267    std::string systemCall = argv[0];
268    systemCall.append(" ");
[5016]269    systemCall.append(icGrepArguments);
270    systemCall.append(" ");
[5180]271#ifdef FUTURE
272    systemCall.append(" | grep --color=always ");
273#else
[5016]274    systemCall.append(" | grep --color=always -P ");
[5180]275#endif
[5016]276    systemCall.append(grepArguments);
[5138]277
[5016]278    system(systemCall.c_str());
279}
280
281
[5163]282// This is a stub, to be expanded later.
283bool excludeDirectory(boost::filesystem::path dirpath) { return dirpath.filename() == ".svn";}
284
285std::vector<std::string> getFullFileList(cl::list<std::string> & inputFiles) {
286    using namespace boost::filesystem;
287    symlink_option follow_symlink = FollowSubdirectorySymlinks ? symlink_option::recurse : symlink_option::none;
288    std::vector<std::string> expanded_paths;
289    boost::system::error_code errc;
290    if (FollowSubdirectorySymlinks) {
291        EnterDirectoriesRecursively = true;
292    }
293    for (auto & f : inputFiles) {
294        path p(f);
295        if (EnterDirectoriesRecursively && is_directory(p)) {
296            if (!excludeDirectory(p)) {
297                recursive_directory_iterator di(p, follow_symlink, errc), end;
298                if (errc) {
299                    // If we cannot enter the directory, keep it in the list of files.
300                    expanded_paths.push_back(f); 
301                    continue;
302                }
303                while (di != end) {
304                    auto & e = di->path();
305                    if (is_directory(e)) {
306                        if (excludeDirectory(e)) di.no_push();
307                    }
308                    else expanded_paths.push_back(e.string());
309                    di.increment(errc);
310                    if (errc) {
311                        expanded_paths.push_back(e.string()); 
312                    }
313                }
314            }
315        }
316        else expanded_paths.push_back(p.string());
317    }
318    return expanded_paths;
319}
320
321
[4325]322int main(int argc, char *argv[]) {
[5161]323    llvm::install_fatal_error_handler(&icgrep_error_handler);
[5186]324#if LLVM_VERSION_MINOR > 6
[5036]325    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&LegacyGrepOptions, &EnhancedGrepOptions, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
[5186]326#endif
[4544]327    cl::ParseCommandLineOptions(argc, argv);
[5167]328#ifdef FUTURE
[5180]329    if (RegexpSyntax == re::RE_Syntax::FixedStrings) {
330        llvm::report_fatal_error("Sorry, FixedStrings syntax is not fully supported\n.");
[5167]331    }
332#endif
[4939]333    re::RE * re_ast = get_icgrep_RE();
[4963]334    std::string module_name = "grepcode:" + sha1sum(allREs) + ":" + std::to_string(globalFlags);
[5016]335
336    if (GrepSupport) {  // Calls icgrep again on command line and passes output to grep.
337        pipeIcGrepOutputToGrep(argc, argv);
338        return 0;   // icgrep is called again, so we need to end this process.
339    }
[4730]340   
[4979]341    GrepEngine grepEngine;
[5045]342    grepEngine.grepCodeGen(module_name, re_ast, CountOnly, UTF_16);
[5052]343    //std::cerr << "grepCodeGen complete";
[5087]344
345    releaseSlabAllocatorMemory();
[5163]346   
347    allFiles = getFullFileList(inputFiles);
348   
349    initResult(allFiles);
350    for (unsigned i=0; i < allFiles.size(); ++i){
[5025]351        total_CountOnly.push_back(0);
352    }
[4967]353
[4968]354    if (Threads <= 1) {
[5156]355
356        #ifdef PRINT_TIMING_INFORMATION
357        // PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY
358        // PAPI_RES_STL, PAPI_BR_MSP, PAPI_LST_INS, PAPI_L1_TCM
359        papi::PapiCounter<4> papiCounters({PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY});
360        #endif
[5163]361        for (unsigned i = 0; i != allFiles.size(); ++i) {
[5156]362            #ifdef PRINT_TIMING_INFORMATION
363            papiCounters.start();
364            const timestamp_t execution_start = read_cycle_counter();
365            #endif
[5163]366            grepEngine.doGrep(allFiles[i], i, CountOnly, total_CountOnly, UTF_16);
[5156]367            #ifdef PRINT_TIMING_INFORMATION
368            const timestamp_t execution_end = read_cycle_counter();
369            papiCounters.stop();
[5163]370            std::cerr << "EXECUTION TIME: " << allFiles[i] << ":" << "CYCLES|" << (execution_end - execution_start) << papiCounters << std::endl;
[5156]371            #endif
[4967]372        }       
[4968]373    } else if (Threads > 1) {
374        const unsigned numOfThreads = Threads; // <- convert the command line value into an integer to allow stack allocation
375        pthread_t threads[numOfThreads];
[4967]376
[4968]377        for(unsigned long i = 0; i < numOfThreads; ++i){
[4979]378            const int rc = pthread_create(&threads[i], NULL, DoGrep, (void *)&grepEngine);
[4968]379            if (rc) {
[5161]380                llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
[4968]381            }
[4947]382        }
[4967]383
[4968]384        for(unsigned i = 0; i < numOfThreads; ++i) {
385            void * status = nullptr;
386            const int rc = pthread_join(threads[i], &status);
[4967]387            if (rc) {
[5161]388                llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
[4967]389            }
390        }
[3850]391    }
[5063]392   
[5025]393    PrintResult(CountOnly, total_CountOnly);
[4327]394   
[3850]395    return 0;
396}
Note: See TracBrowser for help on using the repository browser.