source: icGREP/icgrep-devel/icgrep/toolchain.cpp @ 4944

Last change on this file since 4944 was 4944, checked in by lindanl, 3 years ago

64 bit version.

File size: 15.4 KB
RevLine 
[4801]1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <fstream>
10
11#include "utf_encoding.h"
12#include "pablo/pablo_compiler.h"
13#include <llvm/IR/Function.h>
14#include <llvm/IR/Module.h>
15#include <llvm/ExecutionEngine/ExecutionEngine.h>
16#include <llvm/ExecutionEngine/MCJIT.h>
17#include <llvm/IRReader/IRReader.h>
18#include <llvm/Support/CommandLine.h>
[4889]19#include <llvm/CodeGen/CommandFlags.h>
[4801]20#include <llvm/Support/SourceMgr.h>
21#include <llvm/Support/TargetSelect.h>
22#include <llvm/Support/Host.h>
23
[4900]24#include <IDISA/idisa_avx_builder.h>
25#include <IDISA/idisa_sse_builder.h>
[4944]26#include <IDISA/idisa_i64_builder.h>
[4801]27#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
28#include <UCD/precompiled_properties.h>
29#endif
30#include <re/re_cc.h>
31#include <re/re_nullable.h>
32#include <re/re_simplifier.h>
33#include <re/re_alt.h>
34#include <re/parsefailure.h>
35#include <re/re_parser.h>
36#include <re/re_compiler.h>
37#include <utf8_encoder.h>
38#include <cc/cc_compiler.h>
39#include <pablo/pablo_compiler.h>
40#include <pablo/optimizers/pablo_simplifier.hpp>
[4854]41#include <pablo/optimizers/codemotionpass.h>
[4876]42#include <pablo/passes/flattenassociativedfg.h>
[4885]43#include <pablo/passes/factorizedfg.h>
[4939]44#ifdef ENABLE_MULTIPLEXING
[4801]45#include <pablo/optimizers/pablo_automultiplexing.hpp>
46#include <pablo/optimizers/pablo_bddminimization.h>
[4887]47#include <pablo/optimizers/distributivepass.h>
[4896]48#include <pablo/optimizers/schedulingprepass.h>
[4801]49#endif
50#include <pablo/function.h>
51#include <pablo/analysis/pabloverifier.hpp>
52#include <re/printer_re.h>
53#include <pablo/printer_pablos.h>
54
[4939]55#include "do_grep.h"
[4801]56
[4870]57using namespace pablo;
58
[4939]59static cl::OptionCategory bGrepOutputOptions("Output Options",
60                                      "These options control the output.");
61
62static cl::opt<bool> CountOnly("c", cl::desc("Count and display the matching lines per file only."), cl::cat(bGrepOutputOptions));
63static cl::alias CountOnlyLong("count", cl::desc("Alias for -c"), cl::aliasopt(CountOnly));
64static cl::opt<bool> NormalizeLineBreaks("normalize-line-breaks", cl::desc("Normalize line breaks to std::endl."), cl::init(false),  cl::cat(bGrepOutputOptions));
65
66static cl::opt<bool> ShowFileNames("H", cl::desc("Show the file name with each matching line."), cl::cat(bGrepOutputOptions));
67static cl::alias ShowFileNamesLong("with-filename", cl::desc("Alias for -H"), cl::aliasopt(ShowFileNames));
68
69static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
70static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
71
72
[4801]73static cl::OptionCategory cRegexOutputOptions("Regex Dump Options",
74                                              "These options control printing of intermediate regular expression structures.");
75static cl::opt<bool> PrintAllREs("print-REs", cl::init(false), cl::desc("print regular expression passes"), cl::cat(cRegexOutputOptions));
76static cl::opt<bool> PrintParsedREs("print-parsed-REs", cl::init(false), cl::desc("print out parsed regular expressions"), cl::cat(cRegexOutputOptions));
77static cl::opt<bool> PrintStrippedREs("print-stripped-REs", cl::init(false), cl::desc("print out REs with nullable prefixes/suffixes removed"), cl::cat(cRegexOutputOptions));
78static cl::opt<bool> PrintNamedREs("print-named-REs", cl::init(false), cl::desc("print out named REs"), cl::cat(cRegexOutputOptions));
79static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
80static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
[4939]81static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options",
82                                            "These options control printing of intermediate Pablo code.");
[4801]83
84static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
85static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
86static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
87
88static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
89
[4939]90static cl::opt<bool> DisablePabloCSE("disable-CSE", cl::init(false),
91                                     cl::desc("Disable Pablo common subexpression elimination/dead code elimination"),
[4801]92                                     cl::cat(cPabloOptimizationsOptions));
93static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
94                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
95                                      cl::cat(cPabloOptimizationsOptions));
96
97#ifdef ENABLE_MULTIPLEXING
[4899]98static cl::opt<bool> PrintUnloweredCode("print-unlowered-pablo", cl::init(false), cl::desc("print Pablo output prior to lowering. "), cl::cat(dPabloDumpOptions));
99
[4890]100static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
[4801]101                                        cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
102                                        cl::cat(cPabloOptimizationsOptions));
103
[4885]104static cl::opt<bool> EnableLowering("lowering", cl::init(false),
105                                         cl::desc("coalesce associative functions prior to optimization passes."),
106                                         cl::cat(cPabloOptimizationsOptions));
[4899]107static cl::opt<bool> EnablePreDistribution("pre-dist", cl::init(false),
[4939]108                                         cl::desc("apply distribution law optimization."),
[4801]109                                         cl::cat(cPabloOptimizationsOptions));
[4899]110static cl::opt<bool> EnablePostDistribution("post-dist", cl::init(false),
[4939]111                                         cl::desc("apply distribution law optimization."),
[4899]112                                         cl::cat(cPabloOptimizationsOptions));
[4801]113#endif
114
[4848]115static cl::opt<bool> DisableAVX2("disable-AVX2", cl::init(false), cl::desc("disable AVX2 instruction set."), cl::cat(cPabloOptimizationsOptions));
[4801]116
117re::RE * regular_expression_passes(const Encoding encoding, re::RE * re_ast)  {
118    if (PrintAllREs || PrintParsedREs) {
119        std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
120    }
121
122    //Optimization passes to simplify the AST.
123    re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);
124    if (PrintAllREs || PrintStrippedREs) {
125        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
126    }
127    re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);
128    if (PrintAllREs || PrintStrippedREs) {
129        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
130    }
[4814]131
[4801]132    re_ast = re::RE_Simplifier::simplify(re_ast);
133    if (PrintAllREs || PrintSimplifiedREs) {
134        //Print to the terminal the AST that was generated by the simplifier.
135        std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
136    }
137    return re_ast;
138}
139   
[4870]140PabloFunction * re2pablo_compiler(const Encoding encoding, re::RE * re_ast) {
141    PabloFunction * function = PabloFunction::Create("process_block", 8, 2);
[4801]142    cc::CC_Compiler cc_compiler(*function, encoding);
143    re::RE_Compiler re_compiler(*function, cc_compiler);
144    re_compiler.initializeRequiredStreams();
[4808]145    re_compiler.compileUnicodeNames(re_ast);
[4801]146    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast));
147
148    if (PrintCompiledREcode) {
149        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
150        llvm::raw_os_ostream cerr(std::cerr);
151        cerr << "Initial Pablo AST:\n";
[4871]152        PabloPrinter::print(*function, cerr);
[4801]153    }
[4885]154    #ifndef NDEBUG
[4870]155    PabloVerifier::verify(*function, "creation");
[4885]156    #endif
[4801]157    return function;
158}
159
[4939]160void pablo_function_passes(PabloFunction * function) {
[4801]161    // Scan through the pablo code and perform DCE and CSE
[4939]162    if (!DisablePabloCSE) {
[4870]163        Simplifier::optimize(*function);
[4801]164    }
[4885]165#ifdef ENABLE_MULTIPLEXING
[4939]166    if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
167        FlattenAssociativeDFG::transform(*function);
[4885]168    }
[4939]169#endif
170    if (PabloSinkingPass) {
171        CodeMotionPass::optimize(*function);
172    }
173#ifdef ENABLE_MULTIPLEXING   
[4899]174    if (EnablePreDistribution) {
[4890]175        DistributivePass::optimize(*function);
176    }
[4801]177    if (EnableMultiplexing) {
[4937]178        MultiplexingPass::optimize(*function);
[4868]179    }
[4899]180    if (EnablePostDistribution) {
181        DistributivePass::optimize(*function);
182    }
[4939]183    SchedulingPrePass::optimize(*function);
[4899]184    if (PrintUnloweredCode) {
185        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
186        llvm::raw_os_ostream cerr(std::cerr);
187        cerr << "Unlowered Pablo AST:\n";
188        PabloPrinter::print(*function, cerr);
[4939]189    }
190    if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
[4885]191        FactorizeDFG::transform(*function);
192    }
[4887]193#endif
[4801]194    if (PrintOptimizedREcode) {
[4939]195        PabloVerifier::verify(*function, "post-optimization");
196        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
197        llvm::raw_os_ostream cerr(std::cerr);
198        cerr << "Final Pablo AST:\n";
199        PabloPrinter::print(*function, cerr);
[4801]200    }
201}
202
[4889]203// Dynamic AVX2 confirmation
204#if (BLOCK_SIZE == 256)
205#define ISPC_LLVM_VERSION ISPC_LLVM_3_6
206#include "ispc.cpp"
207#endif
208
209
[4900]210IDISA::IDISA_Builder * GetNativeIDISA_Builder(Module * mod, Type * bitBlockType) {
[4801]211
[4900]212#if (BLOCK_SIZE == 256)
213    if ((strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
214        return new IDISA::IDISA_AVX2_Builder(mod, bitBlockType);
215        //std::cerr << "IDISA_AVX2_Builder selected\n";
216    }
217    else{
218        return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
219        //std::cerr << "Generic IDISA_Builder selected\n";
220    }
[4944]221#elif (BLOCK_SIZE == 64)
222    return new IDISA::IDISA_I64_Builder(mod, bitBlockType);
[4900]223#else   
224    return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
225#endif
226}
227
228
229
230ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
231
[4801]232    InitializeNativeTarget();
233    InitializeNativeTargetAsmPrinter();
234    InitializeNativeTargetAsmParser();
235
236    std::string errMessage;
[4900]237    EngineBuilder builder(std::move(std::unique_ptr<Module>(m)));
[4801]238    builder.setErrorStr(&errMessage);
239    builder.setMCPU(sys::getHostCPUName());
[4939]240    builder.setOptLevel(CodeGenOpt::Level::None);
[4889]241
[4801]242#if (BLOCK_SIZE == 256)
[4889]243    if (!DisableAVX2 && (strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
[4801]244            std::vector<std::string> attrs;
245            attrs.push_back("avx2");
246            builder.setMAttrs(attrs);
[4889]247    //std::cerr << "+avx2 set" << std::endl;
[4801]248    }
249#endif
250    //builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
251    ExecutionEngine * engine = builder.create();
252    if (engine == nullptr) {
253        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
254    }
255
256    return engine;
257}
258
[4939]259int total_count = 0;
260
[4907]261extern "C" {
[4939]262    void wrapped_report_match(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer, int filesize, char * filename) {
263        if(CountOnly){
264            total_count++;
265            return;
266        }
267
268        llvm::raw_os_ostream out(std::cout);
269        if (ShowFileNames) {
270            out << filename << ':';
271        }
272        if (ShowLineNumbers) {
273            out << lineNum << ":";
274        }
275
276        if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
277            // The line "starts" on the LF of a CRLF.  Really the end of the last line.
278            line_start++;
279        }
280        if (line_end == filesize) {
281            // The match position is at end-of-file.   We have a final unterminated line.
282            out.write(&buffer[line_start], line_end - line_start);
283            if (NormalizeLineBreaks) {
284                out << '\n';  // terminate it
285            }
286            return;
287        }
288        unsigned char end_byte = (unsigned char)buffer[line_end]; 
289        if (NormalizeLineBreaks) {
290            if (end_byte == 0x85) {
291                // Line terminated with NEL, on the second byte.  Back up 1.
292                line_end--;
293            } else if (end_byte > 0xD) {
294                // Line terminated with PS or LS, on the third byte.  Back up 2.
295                line_end -= 2;
296            }
297            out.write(&buffer[line_start], line_end - line_start);
298            out << '\n';
299        }
300        else{   
301            if (end_byte == 0x0D) {
302                // Check for line_end on first byte of CRLF;  note that we don't
303                // want to access past the end of buffer.
304                if ((line_end + 1 < filesize) && (buffer[line_end + 1] == 0x0A)) {
305                    // Found CRLF; preserve both bytes.
306                    line_end++;
307                }
308            }
309            out.write(&buffer[line_start], line_end - line_start + 1);
310        }
[4907]311    }
312}
[4801]313
[4907]314
[4939]315void PrintTotalCount(){
316    if(CountOnly){
317        std::cout << total_count << std::endl;
318    }
319}
320
321re::CC * parsedCodePointSet;
322
[4801]323extern "C" {
[4939]324    void insert_codepoints(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer) {
325       re::codepoint_t c = 0;
326        ssize_t line_pos = line_start;
327        while (isxdigit(buffer[line_pos])) {
328            if (isdigit(buffer[line_pos])) {
329                c = (c << 4) | (buffer[line_pos] - '0');
330            }
331            else {
332                c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
333            }
334            line_pos++;
335        }
336        assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.       
337        parsedCodePointSet->insert(c);
338    }
339}
340
341void setParsedCodePointSet(){
342    parsedCodePointSet = re::makeCC();
343}
344
345re::CC * getParsedCodePointSet(){
346    return parsedCodePointSet;
347}
348
[4942]349// extern "C" {
350//   void wrapped_print_register(char * regName, BitBlock bit_block) {
351//       print_register<BitBlock>(regName, bit_block);
352//   }
353// }
[4801]354
355void icgrep_Linking(Module * m, ExecutionEngine * e) {
356    Module::FunctionListType & fns = m->getFunctionList();
357    for (Module::FunctionListType::iterator it = fns.begin(), it_end = fns.end(); it != it_end; ++it) {
358        std::string fnName = it->getName().str();
[4900]359        if (fnName == "s2p_block") continue;
[4801]360        if (fnName == "process_block") continue;
[4825]361        if (fnName == "process_block_initialize_carries") continue;
[4907]362       
[4942]363        // if (fnName == "wrapped_print_register") {
364        //     e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_print_register);
365        // }
[4907]366        if (fnName == "wrapped_report_match") {
367            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match);
368        }
[4939]369        if (fnName == "insert_codepoints") {
370            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&insert_codepoints);
371        }
[4801]372#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
373        else {
374            const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(fnName);
375            e->addGlobalMapping(cast<GlobalValue>(it), std::get<0>(ep));
376        }
377#endif
378    }
379}
380
Note: See TracBrowser for help on using the repository browser.