Ignore:
Timestamp:
Aug 19, 2015, 12:15:30 PM (4 years ago)
Author:
cameron
Message:

Reorganize icgrep into RE/Pablo/IR passes, support -precompiled IR

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4732 r4734  
    1111#include "icgrep.h"
    1212#include "utf_encoding.h"
    13 #include "compiler.h"
    1413#include "pablo/pablo_compiler.h"
    1514#include <llvm/IR/Function.h>
     
    2221#include <llvm/Support/TargetSelect.h>
    2322#include <llvm/Support/Host.h>
     23
    2424#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    2525#include <UCD/precompiled_properties.h>
    2626#endif
     27#include <re/re_cc.h>
     28#include <re/re_nullable.h>
     29#include <re/re_simplifier.h>
     30#include <re/re_alt.h>
     31#include <re/parsefailure.h>
     32#include <re/re_parser.h>
     33#include <re/re_compiler.h>
     34#include <utf8_encoder.h>
     35#include <cc/cc_compiler.h>
     36#include <cc/cc_namemap.hpp>
     37#include <pablo/pablo_compiler.h>
     38#include <pablo/optimizers/pablo_simplifier.hpp>
     39#include <pablo/optimizers/pablo_codesinking.hpp>
     40#ifdef ENABLE_MULTIPLEXING
     41#include <pablo/optimizers/pablo_automultiplexing.hpp>
     42#endif
     43#include <pablo/function.h>
     44#include <re/printer_re.h>
     45#include <pablo/printer_pablos.h>
    2746
    2847#include "do_grep.h"
     
    4968static cl::list<std::string> regexVector("e", cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(aRegexSourceOptions));
    5069static cl::opt<std::string> RegexFilename("f", cl::desc("Take regular expressions (one per line) from a file"), cl::value_desc("regex file"), cl::init(""), cl::cat(aRegexSourceOptions));
     70static cl::opt<std::string> IRFileName("precompiled", cl::desc("Use precompiled regular expression"), cl::value_desc("LLVM IR file"), cl::init(""), cl::cat(aRegexSourceOptions));
     71
     72static cl::OptionCategory cRegexOutputOptions("Regex Dump Options",
     73                                      "These options control printing of intermediate regular expression structures.");
     74
     75static cl::OptionCategory dPabloDumpOptions("Pablo Dump Options",
     76                                      "These options control printing of intermediate Pablo code.");
     77
     78static cl::opt<bool> PrintAllREs("print-REs", cl::init(false), cl::desc("print regular expression passes"), cl::cat(cRegexOutputOptions));
     79static cl::opt<bool> PrintParsedREs("print-parsed-REs", cl::init(false), cl::desc("print out parsed regular expressions"), cl::cat(cRegexOutputOptions));
     80static cl::opt<bool> PrintStrippedREs("print-stripped-REs", cl::init(false), cl::desc("print out REs with nullable prefixes/suffixes removed"), cl::cat(cRegexOutputOptions));
     81static cl::opt<bool> PrintNamedREs("print-named-REs", cl::init(false), cl::desc("print out named REs"), cl::cat(cRegexOutputOptions));
     82static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
     83static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
     84static cl::opt<bool> PrintCompiledCCcode("print-CC-pablo", cl::init(false), cl::desc("print Pablo output from character class compiler"), cl::cat(dPabloDumpOptions));
     85static cl::opt<bool> PrintCompiledREcode("print-RE-pablo", cl::init(false), cl::desc("print Pablo output from the regular expression compiler"), cl::cat(dPabloDumpOptions));
     86static cl::opt<bool> PrintOptimizedREcode("print-pablo", cl::init(false), cl::desc("print final optimized Pablo code"), cl::cat(dPabloDumpOptions));
     87
     88static cl::OptionCategory cPabloOptimizationsOptions("Pablo Optimizations", "These options control Pablo optimization passes.");
     89
     90static cl::opt<bool> DisablePabloCSE("disable-CSE", cl::init(false),
     91                                      cl::desc("Disable Pablo common subexpression elimination/dead code elimination"),
     92                                      cl::cat(cPabloOptimizationsOptions));
     93static cl::opt<bool> PabloSinkingPass("sinking", cl::init(false),
     94                                      cl::desc("Moves all instructions into the innermost legal If-scope so that they are only executed when needed."),
     95                                      cl::cat(cPabloOptimizationsOptions));
     96
     97#ifdef ENABLE_MULTIPLEXING
     98static cl::opt<bool> EnableMultiplexing("multiplexing", cl::init(false),
     99    cl::desc("combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
     100    cl::cat(cPabloOptimizationsOptions));
     101#endif
     102
     103static int firstInputFile = 1;  // Normal case when first positional arg is a regex.
     104
     105re::RE * get_icgrep_RE() {
     106 
     107    //std::vector<std::string> regexVector;
     108    if (RegexFilename != "") {
     109        std::ifstream regexFile(RegexFilename.c_str());
     110        std::string r;
     111        if (regexFile.is_open()) {
     112            while (std::getline(regexFile, r)) {
     113                regexVector.push_back(r);
     114            }
     115            regexFile.close();
     116        }
     117    }
     118   
     119    // if there are no regexes specified through -e or -f, the first positional argument
     120    // must be a regex, not an input file.
     121   
     122    if (regexVector.size() == 0) {
     123        regexVector.push_back(inputFiles[0]);
     124        firstInputFile = 1;
     125    }
     126    else {
     127        firstInputFile = 0;
     128    }
     129   
     130    re::ModeFlagSet globalFlags = 0;
     131    if (CaseInsensitive) globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
     132
     133 
     134    std::vector<re::RE *> REs;
     135    re::RE * re_ast = nullptr;
     136    for (int i = 0; i < regexVector.size(); i++) {
     137        try
     138        {
     139            re_ast = re::RE_Parser::parse(regexVector[i], globalFlags);
     140        }
     141        catch (ParseFailure failure)
     142        {
     143            std::cerr << "Regex parsing failure: " << failure.what() << std::endl;
     144            std::cerr << regexVector[i] << std::endl;
     145            exit(1);
     146        }
     147        REs.push_back(re_ast);
     148    }
     149    if (REs.size() > 1) {
     150        re_ast = re::makeAlt(REs.begin(), REs.end());
     151    }
     152   
     153    if (PrintAllREs || PrintParsedREs) {
     154        std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     155    }
     156    return re_ast;
     157}
     158
     159re::RE * regular_expression_passes(const Encoding encoding, re::RE * re_ast)  {
     160    //Optimization passes to simplify the AST.
     161    re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);
     162    if (PrintAllREs || PrintStrippedREs) {
     163        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     164    }
     165    re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);
     166    if (PrintAllREs || PrintStrippedREs) {
     167        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     168    }
     169   
     170    cc::CC_NameMap nameMap;
     171    re_ast = nameMap.process(re_ast, re::UnicodeClass);
     172   
     173    // std::cerr << "-----------------------------" << std::endl;
     174   
     175    if (PrintAllREs || PrintNamedREs) {
     176        std::cerr << "Namer:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     177        std::cerr << "NameMap:\n" << nameMap.printMap() << std::endl;
     178    }
     179   
     180    //Add the UTF encoding.
     181    if (encoding.getType() == Encoding::Type::UTF_8) {
     182        re_ast = cc::UTF8_Encoder::toUTF8(nameMap, re_ast);
     183        if (PrintAllREs || PrintUTF8REs) {
     184            //Print to the terminal the AST that was generated by the utf8 encoder.
     185            std::cerr << "UTF8-encoder:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     186            std::cerr << "NameMap:\n" << nameMap.printMap() << std::endl;
     187        }
     188    }
     189   
     190    re_ast = re::RE_Simplifier::simplify(re_ast);
     191    if (PrintAllREs || PrintSimplifiedREs) {
     192        //Print to the terminal the AST that was generated by the simplifier.
     193        std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     194    }
     195    return re_ast;
     196}
     197   
     198pablo::PabloFunction * re2pablo_compiler(const Encoding encoding, re::RE * re_ast) {
     199   
     200    pablo::PabloFunction * function = pablo::PabloFunction::Create("process_block", 8, 2);
     201   
     202    cc::CC_Compiler cc_compiler(*function, encoding);
     203   
     204    cc_compiler.compileByteClasses(re_ast);
     205   
     206    if (PrintCompiledCCcode) {
     207        //Print to the terminal the AST that was generated by the character class compiler.
     208        llvm::raw_os_ostream cerr(std::cerr);
     209        cerr << "CC AST:" << "\n";
     210        PabloPrinter::print(function->getEntryBlock().statements(), cerr);
     211    }
     212   
     213    re::RE_Compiler re_compiler(*function, cc_compiler);
     214    re_compiler.initializeRequiredStreams();
     215    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast));
     216   
     217    if (PrintCompiledREcode) {
     218        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     219        llvm::raw_os_ostream cerr(std::cerr);
     220        cerr << "Initial Pablo AST:\n";
     221        PabloPrinter::print(function->getEntryBlock().statements(), cerr);
     222    }
     223    return function;
     224}
     225
     226void pablo_function_passes(pablo::PabloFunction * function) {
     227    // Scan through the pablo code and perform DCE and CSE
     228    if (!DisablePabloCSE) {
     229        pablo::Simplifier::optimize(*function);
     230    }
     231    if (PabloSinkingPass) {
     232        pablo::CodeSinking::optimize(*function);
     233    }
     234#ifdef ENABLE_MULTIPLEXING
     235    if (EnableMultiplexing) {
     236        pablo::AutoMultiplexing::optimize(*function);
     237    }
     238#endif
     239    if (PrintOptimizedREcode) {
     240        //Print to the terminal the AST that was generated by the pararallel bit-stream compiler.
     241        llvm::raw_os_ostream cerr(std::cerr);
     242        cerr << "Final Pablo AST:\n";
     243        PabloPrinter::print(function->getEntryBlock().statements(), cerr);
     244    }
     245}
    51246
    52247
     
    123318    Map["verify-regalloc"]->setHiddenFlag(cl::Hidden);
    124319    Map["verify-scev"]->setHiddenFlag(cl::Hidden);
    125 #ifdef USE_LLVM_3_5
    126     Map["spiller"]->setHiddenFlag(cl::Hidden);
    127     Map["fatal-assembler-warnings"]->setHiddenFlag(cl::Hidden);
    128 #else
    129320    Map["x86-recip-refinement-steps"]->setHiddenFlag(cl::Hidden);
    130321    Map["rewrite-map-file"]->setHiddenFlag(cl::Hidden);
    131322
    132 #endif
    133323    cl::ParseCommandLineOptions(argc, argv);
    134324   
    135    
    136     int firstInputFile = 1;  // Normal case when first positional arg is a regex.
    137 
    138325    Encoding encoding(Encoding::Type::UTF_8, 8);
    139326
    140    
    141    
    142    
    143     //std::vector<std::string> regexVector;
    144     if (RegexFilename != "") {
    145         std::ifstream regexFile(RegexFilename.c_str());
    146         std::string r;
    147         if (regexFile.is_open()) {
    148             while (std::getline(regexFile, r)) {
    149                 regexVector.push_back(r);
    150             }
    151             regexFile.close();
    152         }
    153     }
    154    
    155     // if there are no regexes specified through -e or -f, the first positional argument
    156     // must be a regex, not an input file.
    157    
    158     if (regexVector.size() == 0) {
    159         regexVector.push_back(inputFiles[0]);
    160         firstInputFile = 1;
     327    llvm::Function * icgrep_IR = nullptr;
     328   
     329    if (IRFileName == "") {       
     330        re::RE * re_ast = get_icgrep_RE();
     331        re_ast = regular_expression_passes(encoding, re_ast);
     332       
     333        pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
     334
     335        pablo_function_passes(function);
     336        pablo::PabloCompiler pablo_compiler;
     337        try {
     338            icgrep_IR = pablo_compiler.compile(function);
     339            releaseSlabAllocatorMemory();
     340        }
     341        catch (std::runtime_error e) {
     342            releaseSlabAllocatorMemory();
     343            std::cerr << "Runtime error: " << e.what() << std::endl;
     344            exit(1);
     345        }
    161346    }
    162347    else {
    163         firstInputFile = 0;
    164     }
    165    
    166     re::ModeFlagSet globalFlags = 0;
    167     if (CaseInsensitive) globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
    168    
    169     llvm::Function * icgrep_IR = icgrep::compile(encoding, regexVector, globalFlags);
     348        firstInputFile = 0;  // No regexp arguments; first positional argument is a file to process.
     349        SMDiagnostic ParseErr;
     350        Module * M = parseIRFile(IRFileName, ParseErr, getGlobalContext()).release();
     351        if (!M) {
     352            throw std::runtime_error("Error in Parsing IR File " + IRFileName);
     353        }
     354        icgrep_IR = M->getFunction("process_block");
     355    }
    170356   
    171357    llvm::ExecutionEngine * engine = JIT_to_ExecutionEngine(icgrep_IR);
Note: See TracChangeset for help on using the changeset viewer.