Changeset 4734


Ignore:
Timestamp:
Aug 19, 2015, 12:15:30 PM (4 years ago)
Author:
cameron
Message:

Reorganize icgrep into RE/Pablo/IR passes, support -precompiled IR

Location:
icGREP/icgrep-devel/icgrep
Files:
2 deleted
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4725 r4734  
    4646# Let's suppose we want to build a JIT compiler with support for
    4747# binary code (no interpreter):
    48 llvm_map_components_to_libnames(REQ_LLVM_LIBRARIES mcjit native)
     48llvm_map_components_to_libnames(REQ_LLVM_LIBRARIES mcjit native IRReader)
    4949
    5050message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
     
    111111ENDIF()
    112112
    113 add_executable(icgrep icgrep.cpp do_grep.cpp compiler.cpp ${PRECOMPILED_FILES})
     113add_executable(icgrep icgrep.cpp do_grep.cpp ${PRECOMPILED_FILES})
    114114IF(NOT DISABLE_PREGENERATED_UCD_FUNCTIONS)
    115115add_dependencies(icgrep run_generate_predefined_ucd_functions)
    116116ENDIF()
    117 target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
    118 
    119117IF(Boost_FOUND)
    120118    include_directories("${Boost_INCLUDE_DIRS}")
     
    122120    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_BOOST")
    123121ENDIF()
     122
     123target_link_libraries (icgrep UCDlib PabloADT RegExpCompiler CCADT ${REQ_LLVM_LIBRARIES})
     124
     125
    124126
    125127IF (ENABLE_MULTIPLEXING)
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4732 r4734  
    1111#include "icgrep.h"
    1212#include "utf_encoding.h"
    13 #include "compiler.h"
    1413#include "pablo/pablo_compiler.h"
    1514#include <llvm/IR/Function.h>
     
    2221#include <llvm/Support/TargetSelect.h>
    2322#include <llvm/Support/Host.h>
     23
    2424#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    2525#include <UCD/precompiled_properties.h>
    2626#endif
     27#include <re/re_cc.h>
     28#include <re/re_nullable.h>
     29#include <re/re_simplifier.h>
     30#include <re/re_alt.h>
     31#include <re/parsefailure.h>
     32#include <re/re_parser.h>
     33#include <re/re_compiler.h>
     34#include <utf8_encoder.h>
     35#include <cc/cc_compiler.h>
     36#include <cc/cc_namemap.hpp>
     37#include <pablo/pablo_compiler.h>
     38#include <pablo/optimizers/pablo_simplifier.hpp>
     39#include <pablo/optimizers/pablo_codesinking.hpp>
     40#ifdef ENABLE_MULTIPLEXING
     41#include <pablo/optimizers/pablo_automultiplexing.hpp>
     42#endif
     43#include <pablo/function.h>
     44#include <re/printer_re.h>
     45#include <pablo/printer_pablos.h>
    2746
    2847#include "do_grep.h"
     
    4968static cl::list<std::string> regexVector("e", cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(aRegexSourceOptions));
    5069static cl::opt<std::string> RegexFilename("f", cl::desc("Take regular expressions (one per line) from a file"), cl::value_desc("regex file"), cl::init(""), cl::cat(aRegexSourceOptions));
     70static cl::opt<std::string> IRFileName("precompiled", cl::desc("Use precompiled regular expression"), cl::value_desc("LLVM IR file"), cl::init(""), cl::cat(aRegexSourceOptions));
     71
     72static cl::OptionCategory cRegexOutputOptions("Regex Dump Options",
     73                                      "These options control printing of intermediate regular expression structures.");
     74
     75static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options",
     76                                      "These options control printing of intermediate Pablo code.");
     77
     78static cl::opt<bool> PrintAllREs("print-REs", cl::init(false), cl::desc("print regular expression passes"), cl::cat(cRegexOutputOptions));
     79static cl::opt<bool> PrintParsedREs("print-parsed-REs", cl::init(false), cl::desc("print out parsed regular expressions"), cl::cat(cRegexOutputOptions));
     80static cl::opt<bool> PrintStrippedREs("print-stripped-REs", cl::init(false), cl::desc("print out REs with nullable prefixes/suffixes removed"), cl::cat(cRegexOutputOptions));
     81static cl::opt<bool> PrintNamedREs("print-named-REs", cl::init(false), cl::desc("print out named REs"), cl::cat(cRegexOutputOptions));
     82static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
     83static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
     84static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
     85static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
     86static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
     87
     88static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
     89
     90static cl::opt<bool> DisablePabloCSE("disable-CSE", cl::init(false),
     91                                      cl::desc("Disable Pablo common subexpression elimination/dead code elimination"),
     92                                      cl::cat(cPabloOptimizationsOptions));
     93static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
     94                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
     95                                      cl::cat(cPabloOptimizationsOptions));
     96
     97#ifdef ENABLE_MULTIPLEXING
     98static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
     99    cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
     100    cl::cat(cPabloOptimizationsOptions));
     101#endif
     102
     103static int firstInputFile = 1;  // Normal case when first positional arg is a regex.
     104
     105re::RE * get_icgrep_RE() {
     106 
     107    //std::vector<std::string> regexVector;
     108    if (RegexFilename != "") {
     109        std::ifstream regexFile(RegexFilename.c_str());
     110        std::string r;
     111        if (regexFile.is_open()) {
     112            while (std::getline(regexFile, r)) {
     113                regexVector.push_back(r);
     114            }
     115            regexFile.close();
     116        }
     117    }
     118   
     119    // if there are no regexes specified through -e or -f, the first positional argument
     120    // must be a regex, not an input file.
     121   
     122    if (regexVector.size() == 0) {
     123        regexVector.push_back(inputFiles[0]);
     124        firstInputFile = 1;
     125    }
     126    else {
     127        firstInputFile = 0;
     128    }
     129   
     130    re::ModeFlagSet globalFlags = 0;
     131    if (CaseInsensitive) globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
     132
     133 
     134    std::vector<re::RE *> REs;
     135    re::RE * re_ast = nullptr;
     136    for (int i = 0; i < regexVector.size(); i++) {
     137        try
     138        {
     139            re_ast = re::RE_Parser::parse(regexVector[i], globalFlags);
     140        }
     141        catch (ParseFailure failure)
     142        {
     143            std::cerr << "Regex parsing failure: " << failure.what() << std::endl;
     144            std::cerr << regexVector[i] << std::endl;
     145            exit(1);
     146        }
     147        REs.push_back(re_ast);
     148    }
     149    if (REs.size() > 1) {
     150        re_ast = re::makeAlt(REs.begin(), REs.end());
     151    }
     152   
     153    if (PrintAllREs || PrintParsedREs) {
     154        std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     155    }
     156    return re_ast;
     157}
     158
     159re::RE * regular_expression_passes(const Encoding encoding, re::RE * re_ast)  {
     160    //Optimization passes to simplify the AST.
     161    re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);
     162    if (PrintAllREs || PrintStrippedREs) {
     163        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     164    }
     165    re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);
     166    if (PrintAllREs || PrintStrippedREs) {
     167        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     168    }
     169   
     170    cc::CC_NameMap nameMap;
     171    re_ast = nameMap.process(re_ast, re::UnicodeClass);
     172   
     173    // std::cerr << "-----------------------------" << std::endl;
     174   
     175    if (PrintAllREs || PrintNamedREs) {
     176        std::cerr << "Namer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     177        std::cerr << "NameMap:\n" << nameMap.printMap() << std::endl;
     178    }
     179   
     180    //Add the UTF encoding.
     181    if (encoding.getType() == Encoding::Type::UTF_8) {
     182        re_ast = cc::UTF8_Encoder::toUTF8(nameMap, re_ast);
     183        if (PrintAllREs || PrintUTF8REs) {
     184            //Print to the terminal the AST that was generated by the utf8 encoder.
     185            std::cerr << "UTF8-encoder:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     186            std::cerr << "NameMap:\n" << nameMap.printMap() << std::endl;
     187        }
     188    }
     189   
     190    re_ast = re::RE_Simplifier::simplify(re_ast);
     191    if (PrintAllREs || PrintSimplifiedREs) {
     192        //Print to the terminal the AST that was generated by the simplifier.
     193        std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     194    }
     195    return re_ast;
     196}
     197   
     198pablo::PabloFunction * re2pablo_compiler(const Encoding encoding, re::RE * re_ast) {
     199   
     200    pablo::PabloFunction * function = pablo::PabloFunction::Create("process_block", 8, 2);
     201   
     202    cc::CC_Compiler cc_compiler(*function, encoding);
     203   
     204    cc_compiler.compileByteClasses(re_ast);
     205   
     206    if (PrintCompiledCCcode) {
     207        //Print to the terminal the AST that was generated by the character class compiler.
     208        llvm::raw_os_ostream cerr(std::cerr);
     209        cerr << "CC AST:" << "\n";
     210        PabloPrinter::print(function->getEntryBlock().statements(), cerr);
     211    }
     212   
     213    re::RE_Compiler re_compiler(*function, cc_compiler);
     214    re_compiler.initializeRequiredStreams();
     215    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast));
     216   
     217    if (PrintCompiledREcode) {
     218        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     219        llvm::raw_os_ostream cerr(std::cerr);
     220        cerr << "Initial Pablo AST:\n";
     221        PabloPrinter::print(function->getEntryBlock().statements(), cerr);
     222    }
     223    return function;
     224}
     225
     226void pablo_function_passes(pablo::PabloFunction * function) {
     227    // Scan through the pablo code and perform DCE and CSE
     228    if (!DisablePabloCSE) {
     229        pablo::Simplifier::optimize(*function);
     230    }
     231    if (PabloSinkingPass) {
     232        pablo::CodeSinking::optimize(*function);
     233    }
     234#ifdef ENABLE_MULTIPLEXING
     235    if (EnableMultiplexing) {
     236        pablo::AutoMultiplexing::optimize(*function);
     237    }
     238#endif
     239    if (PrintOptimizedREcode) {
     240        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     241        llvm::raw_os_ostream cerr(std::cerr);
     242        cerr << "Final Pablo AST:\n";
     243        PabloPrinter::print(function->getEntryBlock().statements(), cerr);
     244    }
     245}
    51246
    52247
     
    123318    Map["verify-regalloc"]->setHiddenFlag(cl::Hidden);
    124319    Map["verify-scev"]->setHiddenFlag(cl::Hidden);
    125 #ifdef USE_LLVM_3_5
    126     Map["spiller"]->setHiddenFlag(cl::Hidden);
    127     Map["fatal-assembler-warnings"]->setHiddenFlag(cl::Hidden);
    128 #else
    129320    Map["x86-recip-refinement-steps"]->setHiddenFlag(cl::Hidden);
    130321    Map["rewrite-map-file"]->setHiddenFlag(cl::Hidden);
    131322
    132 #endif
    133323    cl::ParseCommandLineOptions(argc, argv);
    134324   
    135    
    136     int firstInputFile = 1;  // Normal case when first positional arg is a regex.
    137 
    138325    Encoding encoding(Encoding::Type::UTF_8, 8);
    139326
    140    
    141    
    142    
    143     //std::vector<std::string> regexVector;
    144     if (RegexFilename != "") {
    145         std::ifstream regexFile(RegexFilename.c_str());
    146         std::string r;
    147         if (regexFile.is_open()) {
    148             while (std::getline(regexFile, r)) {
    149                 regexVector.push_back(r);
    150             }
    151             regexFile.close();
    152         }
    153     }
    154    
    155     // if there are no regexes specified through -e or -f, the first positional argument
    156     // must be a regex, not an input file.
    157    
    158     if (regexVector.size() == 0) {
    159         regexVector.push_back(inputFiles[0]);
    160         firstInputFile = 1;
     327    llvm::Function * icgrep_IR = nullptr;
     328   
     329    if (IRFileName == "") {       
     330        re::RE * re_ast = get_icgrep_RE();
     331        re_ast = regular_expression_passes(encoding, re_ast);
     332       
     333        pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
     334
     335        pablo_function_passes(function);
     336        pablo::PabloCompiler pablo_compiler;
     337        try {
     338            icgrep_IR = pablo_compiler.compile(function);
     339            releaseSlabAllocatorMemory();
     340        }
     341        catch (std::runtime_error e) {
     342            releaseSlabAllocatorMemory();
     343            std::cerr << "Runtime error: " << e.what() << std::endl;
     344            exit(1);
     345        }
    161346    }
    162347    else {
    163         firstInputFile = 0;
    164     }
    165    
    166     re::ModeFlagSet globalFlags = 0;
    167     if (CaseInsensitive) globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
    168    
    169     llvm::Function * icgrep_IR = icgrep::compile(encoding, regexVector, globalFlags);
     348        firstInputFile = 0;  // No regexp arguments; first positional argument is a file to process.
     349        SMDiagnostic ParseErr;
     350        Module * M = parseIRFile(IRFileName, ParseErr, getGlobalContext()).release();
     351        if (!M) {
     352            throw std::runtime_error("Error in Parsing IR File " + IRFileName);
     353        }
     354        icgrep_IR = M->getFunction("process_block");
     355    }
    170356   
    171357    llvm::ExecutionEngine * engine = JIT_to_ExecutionEngine(icgrep_IR);
  • icGREP/icgrep-devel/icgrep/pablo/function.h

    r4726 r4734  
    7777    }
    7878
    79     static PabloFunction Create(std::string name, const unsigned numOfParameters, const unsigned numOfResults);
    80 
     79    static PabloFunction * Create(std::string name, const unsigned numOfParameters, const unsigned numOfResults);
     80   
    8181    virtual bool operator==(const PabloAST & other) const {
    8282        return &other == this;
     
    158158};
    159159
    160 inline PabloFunction PabloFunction::Create(std::string name, const unsigned numOfParameters, const unsigned numOfResults) {
    161     return PabloFunction(std::move(name), numOfParameters, numOfResults);
     160inline PabloFunction * PabloFunction::Create(std::string name, const unsigned numOfParameters, const unsigned numOfResults) {
     161    return new PabloFunction(std::move(name), numOfParameters, numOfResults);
    162162}
    163 
     163   
    164164}
    165165
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4731 r4734  
    9292}
    9393
    94 llvm::Function * PabloCompiler::compile(PabloFunction & function) {
     94llvm::Function * PabloCompiler::compile(PabloFunction * function) {
    9595    Module * module = new Module("", getGlobalContext());
    9696   
     
    107107}
    108108
    109 llvm::Function * PabloCompiler::compile(PabloFunction & function, Module * module) {
     109llvm::Function * PabloCompiler::compile(PabloFunction * function, Module * module) {
    110110
    111111 
    112     PabloBlock & mainScope = function.getEntryBlock();
     112    PabloBlock & mainScope = function->getEntryBlock();
    113113
    114114    mainScope.enumerateScopes(0);
    115115   
    116     Examine(function);
     116    Examine(*function);
    117117
    118118    mMod = module;
     
    126126    if (DumpTrace) DeclareDebugFunctions();
    127127       
    128     GenerateFunction(function);
     128    GenerateFunction(*function);
    129129   
    130130    mBuilder->SetInsertPoint(BasicBlock::Create(mMod->getContext(), "entry", mFunction,0));
    131131
    132132    //The basis bits structure
    133     for (unsigned i = 0; i != function.getNumOfParameters(); ++i) {
     133    for (unsigned i = 0; i != function->getNumOfParameters(); ++i) {
    134134        Value* indices[] = {mBuilder->getInt64(0), mBuilder->getInt32(i)};
    135135        Value * gep = mBuilder->CreateGEP(mInputAddressPtr, indices);
    136         LoadInst * basisBit = mBuilder->CreateAlignedLoad(gep, BLOCK_SIZE/8, false, function.getParameter(i)->getName()->to_string());
    137         mMarkerMap[function.getParameter(i)] = basisBit;
     136        LoadInst * basisBit = mBuilder->CreateAlignedLoad(gep, BLOCK_SIZE/8, false, function->getParameter(i)->getName()->to_string());
     137        mMarkerMap[function->getParameter(i)] = basisBit;
    138138        if (DumpTrace) {
    139             genPrintRegister(function.getParameter(i)->getName()->to_string(), basisBit);
     139            genPrintRegister(function->getParameter(i)->getName()->to_string(), basisBit);
    140140        }
    141141    }
     
    158158   
    159159    // Write the output values out
    160     for (unsigned i = 0; i != function.getNumOfResults(); ++i) {
     160    for (unsigned i = 0; i != function->getNumOfResults(); ++i) {
    161161        assert (function.getResult(i));
    162         SetOutputValue(mMarkerMap[function.getResult(i)], i);
     162        SetOutputValue(mMarkerMap[function->getResult(i)], i);
    163163    }
    164164
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.h

    r4730 r4734  
    7676    PabloCompiler();
    7777    ~PabloCompiler();
    78     Function * compile(pablo::PabloFunction & function);
    79     Function * compile(pablo::PabloFunction & function, Module *module);
     78    Function * compile(pablo::PabloFunction * function);
     79    Function * compile(pablo::PabloFunction * function, Module *module);
    8080    Module *getModule();
    8181private:
Note: See TracChangeset for help on using the changeset viewer.