source: icGREP/icgrep-devel/icgrep/compiler.cpp @ 4722

Last change on this file since 4722 was 4722, checked in by nmedfort, 4 years ago

Misc. changes and start of dependency chain analysis in ucd generator.

File size: 7.7 KB
RevLine 
[3850]1/*
[4388]2 *  Copyright (c) 2015 International Characters.
[3850]3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
[4237]7#include <compiler.h>
[4337]8#include <re/re_cc.h>
[4237]9#include <re/re_nullable.h>
10#include <re/re_simplifier.h>
[4328]11#include <re/re_alt.h>
[4237]12#include <re/parsefailure.h>
13#include <re/re_parser.h>
14#include <re/re_compiler.h>
15#include <utf8_encoder.h>
16#include <cc/cc_compiler.h>
[4249]17#include <cc/cc_namemap.hpp>
[4237]18#include <pablo/pablo_compiler.h>
[4416]19#include <pablo/optimizers/pablo_simplifier.hpp>
[4521]20#include <pablo/optimizers/pablo_codesinking.hpp>
[4583]21#ifdef ENABLE_MULTIPLEXING
22#include <pablo/optimizers/pablo_automultiplexing.hpp>
23#endif
[4657]24#include <pablo/function.h>
[4227]25#include <re/printer_re.h>
26#include <pablo/printer_pablos.h>
[4626]27#include <iostream>
[4722]28#include <llvm/Support/CommandLine.h>
[4197]29
[4626]30static cl::OptionCategory cRegexOutputOptions("Regex Dump Options",
[4353]31                                      "These options control printing of intermediate regular expression structures.");
[4348]32
[4626]33static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options",
[4353]34                                      "These options control printing of intermediate Pablo code.");
35
36static cl::opt<bool> PrintAllREs("print-REs", cl::init(false), cl::desc("print regular expression passes"), cl::cat(cRegexOutputOptions));
37static cl::opt<bool> PrintParsedREs("print-parsed-REs", cl::init(false), cl::desc("print out parsed regular expressions"), cl::cat(cRegexOutputOptions));
38static cl::opt<bool> PrintStrippedREs("print-stripped-REs", cl::init(false), cl::desc("print out REs with nullable prefixes/suffixes removed"), cl::cat(cRegexOutputOptions));
39static cl::opt<bool> PrintNamedREs("print-named-REs", cl::init(false), cl::desc("print out named REs"), cl::cat(cRegexOutputOptions));
40static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
41static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
42static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
43static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
[4513]44static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
[4353]45
[4722]46static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
[4353]47
[4523]48static cl::opt<bool> DisablePabloCSE("disable-CSE", cl::init(false),
49                                      cl::desc("Disable Pablo common subexpression elimination/dead code elimination"),
50                                      cl::cat(cPabloOptimizationsOptions));
[4521]51static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
52                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
53                                      cl::cat(cPabloOptimizationsOptions));
[4722]54
[4588]55#ifdef ENABLE_MULTIPLEXING
[4722]56static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
57    cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
58    cl::cat(cPabloOptimizationsOptions));
[4588]59#endif
[4521]60
[4197]61using namespace re;
[4198]62using namespace cc;
[4237]63using namespace pablo;
[4197]64
65namespace icgrep {
66
[4516]67CompiledPabloFunction compile(const Encoding encoding, const std::vector<std::string> regexps, const ModeFlagSet initialFlags) {
[4328]68    std::vector<RE *> REs;
[4197]69    RE * re_ast = nullptr;
[4328]70    for (int i = 0; i < regexps.size(); i++) {
71        try
72        {
[4412]73            re_ast = RE_Parser::parse(regexps[i], initialFlags);
[4328]74        }
75        catch (ParseFailure failure)
76        {
[4431]77            std::cerr << "Regex parsing failure: " << failure.what() << std::endl;
[4328]78            std::cerr << regexps[i] << std::endl;
79            exit(1);
80        }
81        REs.push_back(re_ast);
[4197]82    }
[4328]83    if (REs.size() > 1) {
84        re_ast = makeAlt(REs.begin(), REs.end());
[4197]85    }
[3850]86
[4348]87    if (PrintAllREs || PrintParsedREs) {
[4588]88        std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
[4348]89    }
[3850]90
[4246]91    //Optimization passes to simplify the AST.
92    re_ast = RE_Nullable::removeNullablePrefix(re_ast);
[4348]93    if (PrintAllREs || PrintStrippedREs) {
[4588]94        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
[4348]95    }
[4246]96    re_ast = RE_Nullable::removeNullableSuffix(re_ast);
[4348]97    if (PrintAllREs || PrintStrippedREs) {
[4588]98        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
[4348]99    }
[4626]100
[4249]101    CC_NameMap nameMap;
[4337]102    re_ast = nameMap.process(re_ast, UnicodeClass);
[3850]103
[4660]104    // std::cerr << "-----------------------------" << std::endl;
105
[4348]106    if (PrintAllREs || PrintNamedREs) {
[4588]107        std::cerr << "Namer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
108        std::cerr << "NameMap:\n" << nameMap.printMap() << std::endl;
[4348]109    }
[4197]110
[4249]111    //Add the UTF encoding.
112    if (encoding.getType() == Encoding::Type::UTF_8) {
113        re_ast = UTF8_Encoder::toUTF8(nameMap, re_ast);
[4348]114        if (PrintAllREs || PrintUTF8REs) {
[4588]115            //Print to the terminal the AST that was generated by the utf8 encoder.
116            std::cerr << "UTF8-encoder:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
117            std::cerr << "NameMap:\n" << nameMap.printMap() << std::endl;
[4348]118        }
[4249]119    }
[4331]120   
[4197]121    re_ast = RE_Simplifier::simplify(re_ast);
[4348]122    if (PrintAllREs || PrintSimplifiedREs) {
123      //Print to the terminal the AST that was generated by the simplifier.
124      std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
125    }
[4197]126
[4680]127    PabloFunction function = PabloFunction::Create("process_block", 8, 2);
[4210]128
[4657]129    CC_Compiler cc_compiler(function, encoding);
[4337]130   
131    cc_compiler.compileByteClasses(re_ast);
132   
[4513]133    if (PrintCompiledCCcode) {
[4348]134      //Print to the terminal the AST that was generated by the character class compiler.
[4567]135      llvm::raw_os_ostream cerr(std::cerr);
136      cerr << "CC AST:" << "\n";
[4657]137      PabloPrinter::print(function.getEntryBlock().statements(), cerr);
[4348]138    }
[4337]139   
[4681]140    RE_Compiler re_compiler(function, cc_compiler);
[4622]141    re_compiler.initializeRequiredStreams();
[4681]142    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast));
[4588]143
[4513]144    if (PrintCompiledREcode) {
[4588]145        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
146        llvm::raw_os_ostream cerr(std::cerr);
147        cerr << "Initial Pablo AST:\n";
[4657]148        PabloPrinter::print(function.getEntryBlock().statements(), cerr);
[4348]149    }
[4197]150
[4280]151    // Scan through the pablo code and perform DCE and CSE
[4523]152    if (!DisablePabloCSE) {
[4657]153        Simplifier::optimize(function);
[4523]154    }
[4521]155    if (PabloSinkingPass) {
[4657]156        CodeSinking::optimize(function);
[4521]157    }
[4583]158    #ifdef ENABLE_MULTIPLEXING
[4638]159    if (EnableMultiplexing) {
[4657]160        AutoMultiplexing::optimize(function);
[4588]161    }
[4583]162    #endif
[4513]163    if (PrintOptimizedREcode) {
[4348]164      //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
[4567]165      llvm::raw_os_ostream cerr(std::cerr);
166      cerr << "Final Pablo AST:\n";
[4657]167      PabloPrinter::print(function.getEntryBlock().statements(), cerr);
[4348]168    }
[4280]169
[4657]170    PabloCompiler pablo_compiler;
[4522]171    try {
[4657]172        CompiledPabloFunction retVal = pablo_compiler.compile(function);
[4526]173        releaseSlabAllocatorMemory();
[4522]174        return retVal;
175    }
[4526]176    catch (std::runtime_error e) {
177        releaseSlabAllocatorMemory();
[4522]178        std::cerr << "Runtime error: " << e.what() << std::endl;
179        exit(1);
180    }
[4197]181}
182
183}
Note: See TracBrowser for help on using the repository browser.