source: icGREP/icgrep-devel/icgrep/icgrep.cpp @ 5156

Last change on this file since 5156 was 5156, checked in by nmedfort, 3 years ago

Work on multiplexing and distribution passes + a few AST modification bug fixes.

File size: 9.8 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <cstdio>
8#include <vector>
9#include <llvm/Support/CommandLine.h>
10#include <re/re_alt.h>
11#include <re/re_parser.h>
12#include <grep_engine.h>
13#include <fstream>
14#include <string>
15
16#include <boost/uuid/sha1.hpp>
17#include <toolchain.h>
18#include <re/re_toolchain.h>
19#include <pablo/pablo_toolchain.h>
20#include <mutex>
21
22#include <iostream> // MEEE
23
24#ifdef PRINT_TIMING_INFORMATION
25#include <hrtime.h>
26#include <util/papi_helper.hpp>
27#endif
28
29static cl::OptionCategory LegacyGrepOptions("A. Standard Grep Options",
30                                       "These are standard grep options intended for compatibility with typical grep usage.");
31static cl::opt<bool> UTF_16("UTF-16", cl::desc("Regular expressions over the UTF-16 representation of Unicode."), cl::cat(LegacyGrepOptions));
32static cl::OptionCategory EnhancedGrepOptions("B. Enhanced Grep Options",
33                                       "These are additional options for icgrep functionality and performance.");
34static cl::opt<bool> CountOnly("c", cl::desc("Count and display the matching lines per file only."), cl::cat(LegacyGrepOptions));
35static cl::alias CountOnlyLong("count", cl::desc("Alias for -c"), cl::aliasopt(CountOnly));
36
37static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<regex> <input file ...>"), cl::OneOrMore);
38
39static cl::opt<bool> CaseInsensitive("i", cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(LegacyGrepOptions));
40
41
42static cl::list<std::string> regexVector("e", cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(LegacyGrepOptions));
43static cl::opt<std::string> RegexFilename("f", cl::desc("Take regular expressions (one per line) from a file"), cl::value_desc("regex file"), cl::init(""), cl::cat(LegacyGrepOptions));
44static cl::opt<std::string> IRFileName("precompiled", cl::desc("Use precompiled regular expression"), cl::value_desc("LLVM IR file"), cl::init(""));
45
46static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(1));
47
48static cl::opt<bool> GrepSupport("gs", cl::desc("Grep support. Pipe the output of icgrep into grep. \
49         Gives you colored output + back-referencing capability."), cl::cat(EnhancedGrepOptions));
50
51
52static std::string allREs;
53static re::ModeFlagSet globalFlags = 0;
54
55re::RE * get_icgrep_RE() {
56 
57    //std::vector<std::string> regexVector;
58    if (RegexFilename != "") {
59        std::ifstream regexFile(RegexFilename.c_str());
60        std::string r;
61        if (regexFile.is_open()) {
62            while (std::getline(regexFile, r)) {
63                regexVector.push_back(r);
64            }
65            regexFile.close();
66        }
67    }
68   
69    // if there are no regexes specified through -e or -f, the first positional argument
70    // must be a regex, not an input file.
71   
72    if (regexVector.size() == 0) {
73        regexVector.push_back(inputFiles[0]);
74        inputFiles.erase(inputFiles.begin());
75    }
76    if (CaseInsensitive) globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
77
78 
79    std::vector<re::RE *> REs;
80    re::RE * re_ast = nullptr;
81    for (unsigned i = 0; i < regexVector.size(); i++) {
82        re_ast = re::RE_Parser::parse(regexVector[i], globalFlags);
83        REs.push_back(re_ast);
84        allREs += regexVector[i] + "\n";
85    }
86    if (REs.size() > 1) {
87        re_ast = re::makeAlt(REs.begin(), REs.end());
88    }
89   
90    return re_ast;
91}
92
93std::string sha1sum(const std::string & str) {
94    char buffer[41];    // 40 hex-digits and the terminating null
95    unsigned int digest[5];     // 160 bits in total
96
97    boost::uuids::detail::sha1 sha1;
98    sha1.process_bytes(str.c_str(), str.size());
99    sha1.get_digest(digest);
100    snprintf(buffer, sizeof(buffer), "%.8x%.8x%.8x%.8x%.8x",
101             digest[0], digest[1], digest[2], digest[3], digest[4]);
102    return std::string(buffer);
103}
104
105std::vector<size_t> total_CountOnly;
106std::mutex count_mutex;
107size_t fileCount;
108void *DoGrep(void *args)
109{
110    size_t fileIdx;
111    GrepEngine * grepEngine = (GrepEngine *)args;
112
113    count_mutex.lock();
114    fileIdx = fileCount;
115    fileCount++;
116    count_mutex.unlock();
117
118    while (fileIdx < inputFiles.size()){
119        grepEngine->doGrep(inputFiles[fileIdx], fileIdx, CountOnly, total_CountOnly, UTF_16);
120       
121        count_mutex.lock();
122        fileIdx = fileCount;
123        fileCount++;
124        count_mutex.unlock();
125    }
126
127    pthread_exit(NULL);
128}
129
130
131// Returns true if the command line argument shouldn't be passed to icGrep or Grep.
132bool isArgUnwantedForAll(char *argument) {
133    std::vector<std::string> unwantedFlags = {"-gs"};
134    for (unsigned i = 0; i < unwantedFlags.size(); ++i){
135        if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
136            return true;
137        }
138    }
139    return false;
140}
141// Filters out the command line strings that shouldn't be passed on to Grep
142bool isArgUnwantedForGrep(char *argument) {
143    std::vector<std::string> unwantedFlags = {"-n"};
144
145    for (unsigned i = 0; i < inputFiles.size(); ++i){
146        if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
147            return true;
148        }
149    }
150
151    for (unsigned i = 0; i < inputFiles.size(); ++i){    // filter out input content files.
152        if (strcmp(argument, inputFiles[i].c_str()) == 0) {
153            return true;
154        }
155    }
156
157    return false;
158}
159// Filters out the command line strings that shouldn't be passed on to IcGrep
160bool isArgUnwantedForIcGrep(char *argument) {
161    bool isUnwated = false;
162    std::vector<std::string> unwantedFlags = {"-c"};
163
164    for (unsigned i = 0; i < unwantedFlags.size(); ++i){
165        if (strcmp(argument, unwantedFlags[i].c_str()) == 0) {
166            isUnwated = true;
167        }
168    }
169
170    return isUnwated;
171}
172
173/*
174* Constructs a shell command that calls icgrep and then pipes the output to grep.
175* Then executs this shell command using the "system()" function.
176* This allows the output to be colored since all output is piped to grep.
177*/ 
178void pipeIcGrepOutputToGrep(int argc, char *argv[]) {
179    std::string icGrepArguments = "";
180    std::string grepArguments = "";
181
182    // Construct the shell arguments for icgrep and grep
183    // by filtering out the command line arguments passed into this process.
184    for (int i = 1; i < argc; i++) {
185        if (!isArgUnwantedForAll(argv[i])) {
186
187            if (!isArgUnwantedForIcGrep(argv[i])) {
188                // Wrap everything in quotes since the arguments passed into this program had them stripped by bash.
189                icGrepArguments.append("\"");       
190                icGrepArguments.append(argv[i]);
191                icGrepArguments.append("\" ");
192            }
193
194            if (!isArgUnwantedForGrep(argv[i])) {
195                grepArguments.append("\"");
196                grepArguments.append(argv[i]);
197                grepArguments.append("\" ");
198            }
199        }
200    }
201
202    std::string systemCall = argv[0];
203    systemCall.append(" ");
204    systemCall.append(icGrepArguments);
205    systemCall.append(" ");
206    systemCall.append(" | grep --color=always -P ");
207    systemCall.append(grepArguments);
208
209    system(systemCall.c_str());
210}
211
212
213int main(int argc, char *argv[]) {
214    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&LegacyGrepOptions, &EnhancedGrepOptions, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
215    cl::ParseCommandLineOptions(argc, argv);
216   
217    re::RE * re_ast = get_icgrep_RE();
218    std::string module_name = "grepcode:" + sha1sum(allREs) + ":" + std::to_string(globalFlags);
219
220    if (GrepSupport) {  // Calls icgrep again on command line and passes output to grep.
221        pipeIcGrepOutputToGrep(argc, argv);
222        return 0;   // icgrep is called again, so we need to end this process.
223    }
224   
225    GrepEngine grepEngine;
226    grepEngine.grepCodeGen(module_name, re_ast, CountOnly, UTF_16);
227    //std::cerr << "grepCodeGen complete";
228
229    releaseSlabAllocatorMemory();
230    initResult(inputFiles);
231    for (unsigned i=0; i<inputFiles.size(); ++i){
232        total_CountOnly.push_back(0);
233    }
234
235    if (Threads <= 1) {
236
237        #ifdef PRINT_TIMING_INFORMATION
238        // PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY
239        // PAPI_RES_STL, PAPI_BR_MSP, PAPI_LST_INS, PAPI_L1_TCM
240        papi::PapiCounter<4> papiCounters({PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY});
241        #endif
242        for (unsigned i = 0; i != inputFiles.size(); ++i) {
243            #ifdef PRINT_TIMING_INFORMATION
244            papiCounters.start();
245            const timestamp_t execution_start = read_cycle_counter();
246            #endif
247            grepEngine.doGrep(inputFiles[i], i, CountOnly, total_CountOnly, UTF_16);
248            #ifdef PRINT_TIMING_INFORMATION
249            const timestamp_t execution_end = read_cycle_counter();
250            papiCounters.stop();
251            std::cerr << "EXECUTION TIME: " << inputFiles[i] << ":" << "CYCLES|" << (execution_end - execution_start) << papiCounters << std::endl;
252            #endif
253        }       
254    } else if (Threads > 1) {
255        const unsigned numOfThreads = Threads; // <- convert the command line value into an integer to allow stack allocation
256        pthread_t threads[numOfThreads];
257
258        for(unsigned long i = 0; i < numOfThreads; ++i){
259            const int rc = pthread_create(&threads[i], NULL, DoGrep, (void *)&grepEngine);
260            if (rc) {
261                throw std::runtime_error("Failed to create thread: code " + std::to_string(rc));
262            }
263        }
264
265        for(unsigned i = 0; i < numOfThreads; ++i) {
266            void * status = nullptr;
267            const int rc = pthread_join(threads[i], &status);
268            if (rc) {
269                throw std::runtime_error("Failed to join thread: code " + std::to_string(rc));
270            }
271        }
272    }
273   
274    PrintResult(CountOnly, total_CountOnly);
275   
276    return 0;
277}
Note: See TracBrowser for help on using the repository browser.