source: icGREP/icgrep-devel/icgrep/toolchain.cpp @ 4944

Last change on this file since 4944 was 4944, checked in by lindanl, 3 years ago

64 bit version.

File size: 15.4 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <fstream>
10
11#include "utf_encoding.h"
12#include "pablo/pablo_compiler.h"
13#include <llvm/IR/Function.h>
14#include <llvm/IR/Module.h>
15#include <llvm/ExecutionEngine/ExecutionEngine.h>
16#include <llvm/ExecutionEngine/MCJIT.h>
17#include <llvm/IRReader/IRReader.h>
18#include <llvm/Support/CommandLine.h>
19#include <llvm/CodeGen/CommandFlags.h>
20#include <llvm/Support/SourceMgr.h>
21#include <llvm/Support/TargetSelect.h>
22#include <llvm/Support/Host.h>
23
24#include <IDISA/idisa_avx_builder.h>
25#include <IDISA/idisa_sse_builder.h>
26#include <IDISA/idisa_i64_builder.h>
27#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
28#include <UCD/precompiled_properties.h>
29#endif
30#include <re/re_cc.h>
31#include <re/re_nullable.h>
32#include <re/re_simplifier.h>
33#include <re/re_alt.h>
34#include <re/parsefailure.h>
35#include <re/re_parser.h>
36#include <re/re_compiler.h>
37#include <utf8_encoder.h>
38#include <cc/cc_compiler.h>
39#include <pablo/pablo_compiler.h>
40#include <pablo/optimizers/pablo_simplifier.hpp>
41#include <pablo/optimizers/codemotionpass.h>
42#include <pablo/passes/flattenassociativedfg.h>
43#include <pablo/passes/factorizedfg.h>
44#ifdef ENABLE_MULTIPLEXING
45#include <pablo/optimizers/pablo_automultiplexing.hpp>
46#include <pablo/optimizers/pablo_bddminimization.h>
47#include <pablo/optimizers/distributivepass.h>
48#include <pablo/optimizers/schedulingprepass.h>
49#endif
50#include <pablo/function.h>
51#include <pablo/analysis/pabloverifier.hpp>
52#include <re/printer_re.h>
53#include <pablo/printer_pablos.h>
54
55#include "do_grep.h"
56
57using namespace pablo;
58
59static cl::OptionCategory bGrepOutputOptions("Output Options",
60                                      "These options control the output.");
61
62static cl::opt<bool> CountOnly("c", cl::desc("Count and display the matching lines per file only."), cl::cat(bGrepOutputOptions));
63static cl::alias CountOnlyLong("count", cl::desc("Alias for -c"), cl::aliasopt(CountOnly));
64static cl::opt<bool> NormalizeLineBreaks("normalize-line-breaks", cl::desc("Normalize line breaks to std::endl."), cl::init(false),  cl::cat(bGrepOutputOptions));
65
66static cl::opt<bool> ShowFileNames("H", cl::desc("Show the file name with each matching line."), cl::cat(bGrepOutputOptions));
67static cl::alias ShowFileNamesLong("with-filename", cl::desc("Alias for -H"), cl::aliasopt(ShowFileNames));
68
69static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
70static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
71
72
73static cl::OptionCategory cRegexOutputOptions("Regex Dump Options",
74                                              "These options control printing of intermediate regular expression structures.");
75static cl::opt<bool> PrintAllREs("print-REs", cl::init(false), cl::desc("print regular expression passes"), cl::cat(cRegexOutputOptions));
76static cl::opt<bool> PrintParsedREs("print-parsed-REs", cl::init(false), cl::desc("print out parsed regular expressions"), cl::cat(cRegexOutputOptions));
77static cl::opt<bool> PrintStrippedREs("print-stripped-REs", cl::init(false), cl::desc("print out REs with nullable prefixes/suffixes removed"), cl::cat(cRegexOutputOptions));
78static cl::opt<bool> PrintNamedREs("print-named-REs", cl::init(false), cl::desc("print out named REs"), cl::cat(cRegexOutputOptions));
79static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
80static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
81static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options",
82                                            "These options control printing of intermediate Pablo code.");
83
84static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
85static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
86static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
87
88static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
89
90static cl::opt<bool> DisablePabloCSE("disable-CSE", cl::init(false),
91                                     cl::desc("Disable Pablo common subexpression elimination/dead code elimination"),
92                                     cl::cat(cPabloOptimizationsOptions));
93static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
94                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
95                                      cl::cat(cPabloOptimizationsOptions));
96
97#ifdef ENABLE_MULTIPLEXING
98static cl::opt<bool> PrintUnloweredCode("print-unlowered-pablo", cl::init(false), cl::desc("print Pablo output prior to lowering. "), cl::cat(dPabloDumpOptions));
99
100static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
101                                        cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
102                                        cl::cat(cPabloOptimizationsOptions));
103
104static cl::opt<bool> EnableLowering("lowering", cl::init(false),
105                                         cl::desc("coalesce associative functions prior to optimization passes."),
106                                         cl::cat(cPabloOptimizationsOptions));
107static cl::opt<bool> EnablePreDistribution("pre-dist", cl::init(false),
108                                         cl::desc("apply distribution law optimization."),
109                                         cl::cat(cPabloOptimizationsOptions));
110static cl::opt<bool> EnablePostDistribution("post-dist", cl::init(false),
111                                         cl::desc("apply distribution law optimization."),
112                                         cl::cat(cPabloOptimizationsOptions));
113#endif
114
115static cl::opt<bool> DisableAVX2("disable-AVX2", cl::init(false), cl::desc("disable AVX2 instruction set."), cl::cat(cPabloOptimizationsOptions));
116
117re::RE * regular_expression_passes(const Encoding encoding, re::RE * re_ast)  {
118    if (PrintAllREs || PrintParsedREs) {
119        std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
120    }
121
122    //Optimization passes to simplify the AST.
123    re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);
124    if (PrintAllREs || PrintStrippedREs) {
125        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
126    }
127    re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);
128    if (PrintAllREs || PrintStrippedREs) {
129        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
130    }
131
132    re_ast = re::RE_Simplifier::simplify(re_ast);
133    if (PrintAllREs || PrintSimplifiedREs) {
134        //Print to the terminal the AST that was generated by the simplifier.
135        std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
136    }
137    return re_ast;
138}
139   
140PabloFunction * re2pablo_compiler(const Encoding encoding, re::RE * re_ast) {
141    PabloFunction * function = PabloFunction::Create("process_block", 8, 2);
142    cc::CC_Compiler cc_compiler(*function, encoding);
143    re::RE_Compiler re_compiler(*function, cc_compiler);
144    re_compiler.initializeRequiredStreams();
145    re_compiler.compileUnicodeNames(re_ast);
146    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast));
147
148    if (PrintCompiledREcode) {
149        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
150        llvm::raw_os_ostream cerr(std::cerr);
151        cerr << "Initial Pablo AST:\n";
152        PabloPrinter::print(*function, cerr);
153    }
154    #ifndef NDEBUG
155    PabloVerifier::verify(*function, "creation");
156    #endif
157    return function;
158}
159
160void pablo_function_passes(PabloFunction * function) {
161    // Scan through the pablo code and perform DCE and CSE
162    if (!DisablePabloCSE) {
163        Simplifier::optimize(*function);
164    }
165#ifdef ENABLE_MULTIPLEXING
166    if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
167        FlattenAssociativeDFG::transform(*function);
168    }
169#endif
170    if (PabloSinkingPass) {
171        CodeMotionPass::optimize(*function);
172    }
173#ifdef ENABLE_MULTIPLEXING   
174    if (EnablePreDistribution) {
175        DistributivePass::optimize(*function);
176    }
177    if (EnableMultiplexing) {
178        MultiplexingPass::optimize(*function);
179    }
180    if (EnablePostDistribution) {
181        DistributivePass::optimize(*function);
182    }
183    SchedulingPrePass::optimize(*function);
184    if (PrintUnloweredCode) {
185        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
186        llvm::raw_os_ostream cerr(std::cerr);
187        cerr << "Unlowered Pablo AST:\n";
188        PabloPrinter::print(*function, cerr);
189    }
190    if (EnableLowering || EnablePreDistribution || EnablePostDistribution || EnableMultiplexing) {
191        FactorizeDFG::transform(*function);
192    }
193#endif
194    if (PrintOptimizedREcode) {
195        PabloVerifier::verify(*function, "post-optimization");
196        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
197        llvm::raw_os_ostream cerr(std::cerr);
198        cerr << "Final Pablo AST:\n";
199        PabloPrinter::print(*function, cerr);
200    }
201}
202
203// Dynamic AVX2 confirmation
204#if (BLOCK_SIZE == 256)
205#define ISPC_LLVM_VERSION ISPC_LLVM_3_6
206#include "ispc.cpp"
207#endif
208
209
210IDISA::IDISA_Builder * GetNativeIDISA_Builder(Module * mod, Type * bitBlockType) {
211
212#if (BLOCK_SIZE == 256)
213    if ((strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
214        return new IDISA::IDISA_AVX2_Builder(mod, bitBlockType);
215        //std::cerr << "IDISA_AVX2_Builder selected\n";
216    }
217    else{
218        return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
219        //std::cerr << "Generic IDISA_Builder selected\n";
220    }
221#elif (BLOCK_SIZE == 64)
222    return new IDISA::IDISA_I64_Builder(mod, bitBlockType);
223#else   
224    return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
225#endif
226}
227
228
229
230ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
231
232    InitializeNativeTarget();
233    InitializeNativeTargetAsmPrinter();
234    InitializeNativeTargetAsmParser();
235
236    std::string errMessage;
237    EngineBuilder builder(std::move(std::unique_ptr<Module>(m)));
238    builder.setErrorStr(&errMessage);
239    builder.setMCPU(sys::getHostCPUName());
240    builder.setOptLevel(CodeGenOpt::Level::None);
241
242#if (BLOCK_SIZE == 256)
243    if (!DisableAVX2 && (strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
244            std::vector<std::string> attrs;
245            attrs.push_back("avx2");
246            builder.setMAttrs(attrs);
247    //std::cerr << "+avx2 set" << std::endl;
248    }
249#endif
250    //builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
251    ExecutionEngine * engine = builder.create();
252    if (engine == nullptr) {
253        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
254    }
255
256    return engine;
257}
258
259int total_count = 0;
260
261extern "C" {
262    void wrapped_report_match(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer, int filesize, char * filename) {
263        if(CountOnly){
264            total_count++;
265            return;
266        }
267
268        llvm::raw_os_ostream out(std::cout);
269        if (ShowFileNames) {
270            out << filename << ':';
271        }
272        if (ShowLineNumbers) {
273            out << lineNum << ":";
274        }
275
276        if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
277            // The line "starts" on the LF of a CRLF.  Really the end of the last line.
278            line_start++;
279        }
280        if (line_end == filesize) {
281            // The match position is at end-of-file.   We have a final unterminated line.
282            out.write(&buffer[line_start], line_end - line_start);
283            if (NormalizeLineBreaks) {
284                out << '\n';  // terminate it
285            }
286            return;
287        }
288        unsigned char end_byte = (unsigned char)buffer[line_end]; 
289        if (NormalizeLineBreaks) {
290            if (end_byte == 0x85) {
291                // Line terminated with NEL, on the second byte.  Back up 1.
292                line_end--;
293            } else if (end_byte > 0xD) {
294                // Line terminated with PS or LS, on the third byte.  Back up 2.
295                line_end -= 2;
296            }
297            out.write(&buffer[line_start], line_end - line_start);
298            out << '\n';
299        }
300        else{   
301            if (end_byte == 0x0D) {
302                // Check for line_end on first byte of CRLF;  note that we don't
303                // want to access past the end of buffer.
304                if ((line_end + 1 < filesize) && (buffer[line_end + 1] == 0x0A)) {
305                    // Found CRLF; preserve both bytes.
306                    line_end++;
307                }
308            }
309            out.write(&buffer[line_start], line_end - line_start + 1);
310        }
311    }
312}
313
314
315void PrintTotalCount(){
316    if(CountOnly){
317        std::cout << total_count << std::endl;
318    }
319}
320
321re::CC * parsedCodePointSet;
322
323extern "C" {
324    void insert_codepoints(uint64_t lineNum, uint64_t line_start, uint64_t line_end, const char * buffer) {
325       re::codepoint_t c = 0;
326        ssize_t line_pos = line_start;
327        while (isxdigit(buffer[line_pos])) {
328            if (isdigit(buffer[line_pos])) {
329                c = (c << 4) | (buffer[line_pos] - '0');
330            }
331            else {
332                c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
333            }
334            line_pos++;
335        }
336        assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.       
337        parsedCodePointSet->insert(c);
338    }
339}
340
341void setParsedCodePointSet(){
342    parsedCodePointSet = re::makeCC();
343}
344
345re::CC * getParsedCodePointSet(){
346    return parsedCodePointSet;
347}
348
349// extern "C" {
350//   void wrapped_print_register(char * regName, BitBlock bit_block) {
351//       print_register<BitBlock>(regName, bit_block);
352//   }
353// }
354
355void icgrep_Linking(Module * m, ExecutionEngine * e) {
356    Module::FunctionListType & fns = m->getFunctionList();
357    for (Module::FunctionListType::iterator it = fns.begin(), it_end = fns.end(); it != it_end; ++it) {
358        std::string fnName = it->getName().str();
359        if (fnName == "s2p_block") continue;
360        if (fnName == "process_block") continue;
361        if (fnName == "process_block_initialize_carries") continue;
362       
363        // if (fnName == "wrapped_print_register") {
364        //     e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_print_register);
365        // }
366        if (fnName == "wrapped_report_match") {
367            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match);
368        }
369        if (fnName == "insert_codepoints") {
370            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&insert_codepoints);
371        }
372#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
373        else {
374            const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(fnName);
375            e->addGlobalMapping(cast<GlobalValue>(it), std::get<0>(ep));
376        }
377#endif
378    }
379}
380
Note: See TracBrowser for help on using the repository browser.