source: icGREP/icgrep-devel/icgrep/toolchain.cpp @ 4926

Last change on this file since 4926 was 4922, checked in by nmedfort, 4 years ago

Incorporated a few common case boolean optimizations in the Simplifier.

File size: 20.9 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <fstream>
10
11#include "basis_bits.h"
12#include "utf_encoding.h"
13#include "pablo/pablo_compiler.h"
14#include <llvm/IR/Function.h>
15#include <llvm/IR/Module.h>
16#include <llvm/ExecutionEngine/ExecutionEngine.h>
17#include <llvm/ExecutionEngine/MCJIT.h>
18#include <llvm/IRReader/IRReader.h>
19#include <llvm/Support/CommandLine.h>
20#include <llvm/CodeGen/CommandFlags.h>
21#include <llvm/Support/SourceMgr.h>
22#include <llvm/Support/TargetSelect.h>
23#include <llvm/Support/Host.h>
24#include <llvm/Support/FileSystem.h>
25
26
27#include <IDISA/idisa_avx_builder.h>
28#include <IDISA/idisa_sse_builder.h>
29#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
30#include <UCD/precompiled_properties.h>
31#endif
32#include <re/re_cc.h>
33#include <re/re_nullable.h>
34#include <re/re_simplifier.h>
35#include <re/re_alt.h>
36#include <re/parsefailure.h>
37#include <re/re_parser.h>
38#include <re/re_compiler.h>
39#include <utf8_encoder.h>
40#include <cc/cc_compiler.h>
41#include <pablo/pablo_compiler.h>
42#include <pablo/optimizers/pablo_simplifier.hpp>
43#include <pablo/optimizers/codemotionpass.h>
44#ifdef ENABLE_MULTIPLEXING
45#include <pablo/passes/flattenassociativedfg.h>
46#include <pablo/passes/factorizedfg.h>
47#include <pablo/optimizers/pablo_automultiplexing.hpp>
48#include <pablo/optimizers/pablo_bddminimization.h>
49#include <pablo/optimizers/distributivepass.h>
50#include <pablo/optimizers/schedulingprepass.h>
51#endif
52#include <pablo/function.h>
53#include <pablo/analysis/pabloverifier.hpp>
54#include <re/printer_re.h>
55#include <pablo/printer_pablos.h>
56
57#include <hrtime.h>
58#include <do_grep.h>
59
60using namespace pablo;
61
62static cl::OptionCategory cRegexOutputOptions("Regex Dump Options",
63                                              "These options control printing of intermediate regular expression structures.");
64static cl::opt<bool> PrintAllREs("print-REs", cl::init(false), cl::desc("print regular expression passes"), cl::cat(cRegexOutputOptions));
65static cl::opt<bool> PrintParsedREs("print-parsed-REs", cl::init(false), cl::desc("print out parsed regular expressions"), cl::cat(cRegexOutputOptions));
66static cl::opt<bool> PrintStrippedREs("print-stripped-REs", cl::init(false), cl::desc("print out REs with nullable prefixes/suffixes removed"), cl::cat(cRegexOutputOptions));
67static cl::opt<bool> PrintNamedREs("print-named-REs", cl::init(false), cl::desc("print out named REs"), cl::cat(cRegexOutputOptions));
68static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
69static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
70static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options", "These options control printing of intermediate Pablo code.");
71
72static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
73static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
74static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
75static cl::opt<std::string> PabloOutputFilename("print-pablo-output", cl::init(""), cl::desc("output Pablo filename"), cl::cat(dPabloDumpOptions));
76
77static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
78
79static cl::opt<bool> DisableSimplification("disable-simplification", cl::init(false),
80                                     cl::desc("Disable Pablo Simplification pass (not recommended)"),
81                                     cl::cat(cPabloOptimizationsOptions));
82
83static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
84                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
85                                      cl::cat(cPabloOptimizationsOptions));
86
87static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
88
89
90static cl::opt<char> OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] (default = '-O0')"),
91                              cl::cat(cMachineCodeOptimization), cl::Prefix, cl::ZeroOrMore, cl::init('0'));
92
93#ifdef ENABLE_MULTIPLEXING
94static cl::opt<bool> PrintUnloweredCode("print-unlowered-pablo", cl::init(false), cl::desc("print Pablo output prior to lowering. "), cl::cat(dPabloDumpOptions));
95
96static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
97                                        cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
98                                        cl::cat(cPabloOptimizationsOptions));
99
100static cl::opt<unsigned> MultiplexingSetLimit("multiplexing-set-limit", cl::init(std::numeric_limits<unsigned>::max()),
101                                        cl::desc("maximum size of any candidate multiplexing set."),
102                                        cl::cat(cPabloOptimizationsOptions));
103
104static cl::opt<unsigned> MultiplexingSelectionLimit("multiplexing-selection-limit", cl::init(100),
105                                        cl::desc("maximum number of selections from any partial candidate multiplexing set."),
106                                        cl::cat(cPabloOptimizationsOptions));
107
108static cl::opt<unsigned> MultiplexingWindowSize("multiplexing-window-size", cl::init(1),
109                                        cl::desc("maximum depth difference for computing mutual exclusion of Advance nodes."),
110                                        cl::cat(cPabloOptimizationsOptions));
111
112static cl::opt<bool> EnableLowering("lowering", cl::init(false),
113                                         cl::desc("coalesce associative functions prior to optimization passes."),
114                                         cl::cat(cPabloOptimizationsOptions));
115
116static cl::opt<bool> EnablePreDistribution("pre-dist", cl::init(false),
117                                         cl::desc("apply distribution law optimization prior to multiplexing."),
118                                         cl::cat(cPabloOptimizationsOptions));
119
120static cl::opt<bool> EnablePostDistribution("post-dist", cl::init(false),
121                                         cl::desc("apply distribution law optimization after multiplexing."),
122                                         cl::cat(cPabloOptimizationsOptions));
123
124static cl::opt<bool> EnablePrePassScheduling("pre-pass-scheduling", cl::init(false),
125                                         cl::desc("apply pre-pass scheduling prior to LLVM IR generation."),
126                                         cl::cat(cPabloOptimizationsOptions));
127#endif
128
129static cl::opt<bool> DisableAVX2("disable-AVX2", cl::init(false), cl::desc("disable AVX2 instruction set."), cl::cat(cPabloOptimizationsOptions));
130
131re::RE * regular_expression_passes(const Encoding encoding, re::RE * re_ast)  {
132    if (PrintAllREs || PrintParsedREs) {
133        std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
134    }
135
136    //Optimization passes to simplify the AST.
137    re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);
138    if (PrintAllREs || PrintStrippedREs) {
139        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
140    }
141    re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);
142    if (PrintAllREs || PrintStrippedREs) {
143        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
144    }
145
146    re_ast = re::RE_Simplifier::simplify(re_ast);
147    if (PrintAllREs || PrintSimplifiedREs) {
148        //Print to the terminal the AST that was generated by the simplifier.
149        std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
150    }
151    return re_ast;
152}
153   
154PabloFunction * re2pablo_compiler(const Encoding encoding, re::RE * re_ast) {
155    PabloFunction * function = PabloFunction::Create("process_block", 8, 2);
156    cc::CC_Compiler cc_compiler(*function, encoding);
157    re::RE_Compiler re_compiler(*function, cc_compiler);
158    re_compiler.initializeRequiredStreams();
159    re_compiler.compileUnicodeNames(re_ast);
160    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast));
161
162    if (PrintCompiledREcode) {
163        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
164        llvm::raw_os_ostream cerr(std::cerr);
165        cerr << "Initial Pablo AST:\n";
166        PabloPrinter::print(*function, cerr);
167    }
168    #ifndef NDEBUG
169    PabloVerifier::verify(*function, "creation");
170    #endif
171    return function;
172}
173
174#ifdef PRINT_TIMING_INFORMATION
175#define READ_CYCLE_COUNTER(name) name = read_cycle_counter();
176#else
177#define READ_CYCLE_COUNTER(name)
178#endif
179
180#ifdef PRINT_TIMING_INFORMATION
181unsigned COUNT_STATEMENTS(const PabloFunction * const entry) {
182    std::stack<const Statement *> scope;
183    unsigned statements = 0;
184    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
185    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
186        while ( stmt ) {
187            ++statements;
188            if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
189                // Set the next statement to be the first statement of the inner scope and push the
190                // next statement of the current statement into the scope stack.
191                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
192                scope.push(stmt->getNextNode());
193                stmt = nested->front();
194                assert (stmt);
195                continue;
196            }
197            stmt = stmt->getNextNode();
198        }
199        if (scope.empty()) {
200            break;
201        }
202        stmt = scope.top();
203        scope.pop();
204    }
205    return statements;
206}
207
208unsigned COUNT_ADVANCES(const PabloFunction * const entry) {
209
210    std::stack<const Statement *> scope;
211    unsigned advances = 0;
212
213    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
214    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
215        while ( stmt ) {
216            if (isa<Advance>(stmt)) {
217                ++advances;
218            }
219            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
220                // Set the next statement to be the first statement of the inner scope and push the
221                // next statement of the current statement into the scope stack.
222                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
223                scope.push(stmt->getNextNode());
224                stmt = nested->front();
225                assert (stmt);
226                continue;
227            }
228            stmt = stmt->getNextNode();
229        }
230        if (scope.empty()) {
231            break;
232        }
233        stmt = scope.top();
234        scope.pop();
235    }
236    return advances;
237}
238
239using DistributionMap = boost::container::flat_map<unsigned, unsigned>;
240
241DistributionMap SUMMARIZE_VARIADIC_DISTRIBUTION(const PabloFunction * const entry) {
242    std::stack<const Statement *> scope;
243    DistributionMap distribution;
244    // Scan through and collect all the advances, calls, scanthrus and matchstars ...
245    for (const Statement * stmt = entry->getEntryBlock()->front(); ; ) {
246        while ( stmt ) {
247            if (isa<Variadic>(stmt)) {
248                auto f = distribution.find(stmt->getNumOperands());
249                if (f == distribution.end()) {
250                    distribution.emplace(stmt->getNumOperands(), 1);
251                } else {
252                    f->second += 1;
253                }
254            }
255            else if (LLVM_UNLIKELY(isa<If>(stmt) || isa<While>(stmt))) {
256                // Set the next statement to be the first statement of the inner scope and push the
257                // next statement of the current statement into the scope stack.
258                const PabloBlock * const nested = isa<If>(stmt) ? cast<If>(stmt)->getBody() : cast<While>(stmt)->getBody();
259                scope.push(stmt->getNextNode());
260                stmt = nested->front();
261                assert (stmt);
262                continue;
263            }
264            stmt = stmt->getNextNode();
265        }
266        if (scope.empty()) {
267            break;
268        }
269        stmt = scope.top();
270        scope.pop();
271    }
272    return distribution;
273}
274#endif
275
276void pablo_function_passes(PabloFunction * function) {   
277    // Scan through the pablo code and perform DCE and CSE
278
279#ifdef PRINT_TIMING_INFORMATION
280    timestamp_t simplification_start = 0, simplification_end = 0;
281    timestamp_t coalescing_start = 0, coalescing_end = 0;
282    timestamp_t sinking_start = 0, sinking_end = 0;
283    timestamp_t pre_distribution_start = 0, pre_distribution_end = 0;
284    timestamp_t multiplexing_start = 0, multiplexing_end = 0;
285    timestamp_t post_distribution_start = 0, post_distribution_end = 0;
286    timestamp_t lowering_start = 0, lowering_end = 0;
287    timestamp_t scheduling_start = 0, scheduling_end = 0;
288    DistributionMap distribution;
289    const timestamp_t optimization_start = read_cycle_counter();
290#endif
291    if (!DisableSimplification) {
292        READ_CYCLE_COUNTER(simplification_start);
293        Simplifier::optimize(*function);
294        READ_CYCLE_COUNTER(simplification_end);
295    }
296#ifdef ENABLE_MULTIPLEXING
297    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
298        READ_CYCLE_COUNTER(coalescing_start);
299        CanonicalizeDFG::transform(*function);
300        READ_CYCLE_COUNTER(coalescing_end);
301    }
302    if (EnablePreDistribution) {
303        READ_CYCLE_COUNTER(pre_distribution_start);
304        DistributivePass::optimize(*function);
305        READ_CYCLE_COUNTER(pre_distribution_end);
306    }
307    if (EnableMultiplexing) {
308        READ_CYCLE_COUNTER(multiplexing_start);
309        MultiplexingPass::optimize(*function, MultiplexingSetLimit, MultiplexingSelectionLimit, MultiplexingWindowSize);
310        READ_CYCLE_COUNTER(multiplexing_end);
311        if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
312            CanonicalizeDFG::transform(*function);
313        }
314    }
315    if (EnablePostDistribution) {
316        READ_CYCLE_COUNTER(post_distribution_start);
317        DistributivePass::optimize(*function);
318        READ_CYCLE_COUNTER(post_distribution_end);
319    }
320#endif
321    if (PabloSinkingPass) {
322        READ_CYCLE_COUNTER(sinking_start);
323        CodeMotionPass::optimize(*function);
324        READ_CYCLE_COUNTER(sinking_end);
325    }
326#ifdef ENABLE_MULTIPLEXING
327    if (PrintUnloweredCode) {
328        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
329        llvm::raw_os_ostream cerr(std::cerr);
330        cerr << "Unlowered Pablo AST:\n";
331        PabloPrinter::print(*function, cerr);
332    }   
333    #ifdef PRINT_TIMING_INFORMATION
334    distribution = SUMMARIZE_VARIADIC_DISTRIBUTION(function);
335    #endif
336    if (EnableLowering || EnablePreDistribution || EnablePostDistribution) {
337        READ_CYCLE_COUNTER(lowering_start);
338        FactorizeDFG::transform(*function);
339        READ_CYCLE_COUNTER(lowering_end);
340    }
341    if (EnablePrePassScheduling) {
342        READ_CYCLE_COUNTER(scheduling_start);
343        SchedulingPrePass::optimize(*function);
344        READ_CYCLE_COUNTER(scheduling_end);
345    }
346#endif
347#ifdef PRINT_TIMING_INFORMATION
348    const timestamp_t optimization_end = read_cycle_counter();
349#endif
350    if (PrintOptimizedREcode) {
351        if (PabloOutputFilename.empty()) {
352            //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
353            llvm::raw_os_ostream cerr(std::cerr);
354            cerr << "Final Pablo AST:\n";
355            PabloPrinter::print(*function, cerr);
356        } else {
357            std::error_code error;
358            llvm::raw_fd_ostream out(PabloOutputFilename, error, sys::fs::OpenFlags::F_None);
359            PabloPrinter::print(*function, out);
360        }
361    }
362#ifdef PRINT_TIMING_INFORMATION
363    std::cerr << "PABLO OPTIMIZATION TIME: " << (optimization_end - optimization_start) << std::endl;
364    std::cerr << "  SIMPLIFICATION TIME: " << (simplification_end - simplification_start) << std::endl;
365    std::cerr << "  COALESCING TIME: " << (coalescing_end - coalescing_start) << std::endl;
366    std::cerr << "  SINKING TIME: " << (sinking_end - sinking_start) << std::endl;
367    std::cerr << "  PRE-DISTRIBUTION TIME: " << (pre_distribution_end - pre_distribution_start) << std::endl;
368    std::cerr << "  MULTIPLEXING TIME: " << (multiplexing_end - multiplexing_start) << std::endl;
369    std::cerr << "  MULTIPLEXING SEED: " << MultiplexingPass::SEED << std::endl;
370    std::cerr << "  MULTIPLEXING NODES USED: " << MultiplexingPass::NODES_USED << std::endl;
371    std::cerr << "  MULTIPLEXING NODES ALLOCATED: " << MultiplexingPass::NODES_ALLOCATED << std::endl;
372    std::cerr << "  LOWERING TIME: " << (lowering_end - lowering_start) << std::endl;
373    std::cerr << "  POST-DISTRIBUTION TIME: " << (post_distribution_end - post_distribution_start) << std::endl;
374    std::cerr << "  SCHEDULING TIME: " << (scheduling_end - scheduling_start) << std::endl;
375    std::cerr << "PABLO STATEMENTS: " << COUNT_STATEMENTS(function) << std::endl;
376    std::cerr << "PABLO ADVANCES: " << COUNT_ADVANCES(function) << std::endl;
377    std::cerr << "PRE-LOWERING VARIADIC DISTRIBUTION: ";
378    bool join = false;
379    for (auto dist : distribution) {
380        if (join) {
381            std::cerr << ';';
382        }
383        std::cerr << dist.first << '|' << dist.second;
384        join = true;
385    }
386    std::cerr << std::endl;
387#endif
388}
389
390// Dynamic AVX2 confirmation
391#if (BLOCK_SIZE == 256)
392#define ISPC_LLVM_VERSION ISPC_LLVM_3_6
393#include "ispc.cpp"
394#endif
395
396
397IDISA::IDISA_Builder * GetNativeIDISA_Builder(Module * mod, Type * bitBlockType) {
398
399#if (BLOCK_SIZE == 256)
400    if ((strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
401        return new IDISA::IDISA_AVX2_Builder(mod, bitBlockType);
402        //std::cerr << "IDISA_AVX2_Builder selected\n";
403    }
404    else{
405        return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
406        //std::cerr << "Generic IDISA_Builder selected\n";
407    }
408#else   
409    return new IDISA::IDISA_SSE2_Builder(mod, bitBlockType);
410#endif
411}
412
413
414
415ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
416
417    InitializeNativeTarget();
418    InitializeNativeTargetAsmPrinter();
419    InitializeNativeTargetAsmParser();
420
421    std::string errMessage;
422    EngineBuilder builder(std::move(std::unique_ptr<Module>(m)));
423    builder.setErrorStr(&errMessage);
424    builder.setMCPU(sys::getHostCPUName());
425    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
426    switch (OptLevel) {
427        case '0': optLevel = CodeGenOpt::None; break;
428        case '1': optLevel = CodeGenOpt::Less; break;
429        case '2': optLevel = CodeGenOpt::Default; break;
430        case '3': optLevel = CodeGenOpt::Aggressive; break;
431        default: errs() << OptLevel << " is an invalid optimization level.\n";
432    }
433    builder.setOptLevel(optLevel);
434
435#if (BLOCK_SIZE == 256)
436    if (!DisableAVX2 && (strncmp(lGetSystemISA(), "avx2", 4) == 0)) {
437            std::vector<std::string> attrs;
438            attrs.push_back("avx2");
439            builder.setMAttrs(attrs);
440    //std::cerr << "+avx2 set" << std::endl;
441    }
442#endif
443    //builder.setOptLevel(mMaxWhileDepth ? CodeGenOpt::Level::Less : CodeGenOpt::Level::None);
444    ExecutionEngine * engine = builder.create();
445    if (engine == nullptr) {
446        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
447    }
448
449    return engine;
450}
451
452extern "C" {
453    void wrapped_report_match(uint64_t recordNum, uint64_t recordStart, uint64_t recordEnd) {
454        printf("line %lu: (%lu, %lu)\n", recordNum, recordStart, recordEnd);
455    }
456}
457
458
459extern "C" {
460  void wrapped_print_register(char * regName, BitBlock bit_block) {
461      print_register<BitBlock>(regName, bit_block);
462  }
463}
464
465void icgrep_Linking(Module * m, ExecutionEngine * e) {
466    Module::FunctionListType & fns = m->getFunctionList();
467    for (Module::FunctionListType::iterator it = fns.begin(), it_end = fns.end(); it != it_end; ++it) {
468        std::string fnName = it->getName().str();
469        if (fnName == "s2p_block") continue;
470        if (fnName == "process_block") continue;
471        if (fnName == "process_block_initialize_carries") continue;
472       
473        if (fnName == "wrapped_print_register") {
474            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_print_register);
475        }
476        if (fnName == "wrapped_report_match") {
477            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match);
478        }
479#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
480        else {
481            const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(fnName);
482            e->addGlobalMapping(cast<GlobalValue>(it), std::get<0>(ep));
483        }
484#endif
485    }
486}
487
Note: See TracBrowser for help on using the repository browser.