Changeset 4946


Ignore:
Timestamp:
Feb 29, 2016, 3:46:35 PM (3 years ago)
Author:
cameron
Message:

Refactor and encapsulate grep codegen/execution in GrepEngine?

Location:
icGREP/icgrep-devel/icgrep
Files:
5 edited
2 moved

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r4944 r4946  
    120120ENDIF()
    121121
    122 add_executable(icgrep icgrep.cpp toolchain.cpp do_grep.cpp ${PRECOMPILED_FILES})
     122add_executable(icgrep icgrep.cpp toolchain.cpp grep_engine.cpp ${PRECOMPILED_FILES})
    123123IF(ENABLE_PREGENERATED_UCD_FUNCTIONS)
    124124add_dependencies(icgrep run_generate_predefined_ucd_functions)
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r4945 r4946  
    55 */
    66
    7 #include "toolchain.h"
    8 #include "do_grep.h"
     7#include <grep_engine.h>
     8#include <toolchain.h>
     9#include <utf_encoding.h>
     10#include <pablo/pablo_compiler.h>
     11#include <kernels/pipeline.h>
     12#include <llvm/IR/Function.h>
     13#include <llvm/IR/Type.h>
     14#include <llvm/IR/Module.h>
     15#include <llvm/ExecutionEngine/MCJIT.h>
     16#include <llvm/IRReader/IRReader.h>
     17#include <llvm/Support/Debug.h>
     18#include <llvm/IR/Verifier.h>
    919
    1020#include <fstream>
     
    3747#include <fcntl.h>
    3848
    39 
    40 #define BUFFER_SEGMENTS 15
    41 #define BUFFER_SIZE (BUFFER_SEGMENTS * SEGMENT_SIZE)
    42 
    43 //
    44 // Write matched lines from a buffer to an output file, given segment
    45 // scanners for line ends and matches (where matches are a subset of line ends).
    46 // The buffer pointer must point to the first byte of the segment
    47 // corresponding to the scanner indexes.   The first_line_start is the
    48 // start position of the first line relative to the buffer start position.
    49 // It must be zero or negative;  if negative, the buffer must permit negative
    50 // indexing so that the lineup to the buffer start position can also be printed.
    51 // The start position of the final line in the processed segment is returned.
    52 //
     49#include <kernels/kernel.h>
    5350
    5451
    55 bool GrepExecutor::finalLineIsUnterminated() const {
     52
     53bool GrepEngine::finalLineIsUnterminated() const {
    5654    if (mFileSize == 0) return false;
    5755    unsigned char end_byte = static_cast<unsigned char>(mFileBuffer[mFileSize-1]);
     
    6967}
    7068
    71 void GrepExecutor::doGrep(const std::string & fileName) {
     69void GrepEngine::doGrep(const std::string & fileName) {
    7270
    7371    mFileName = fileName;
     
    151149#endif   
    152150}
     151
     152void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool isNameExpression) {
     153                           
     154    Module * M = new Module("moduleName", getGlobalContext());
     155   
     156    IDISA::IDISA_Builder * idb = GetNativeIDISA_Builder(M, VectorType::get(IntegerType::get(getGlobalContext(), 64), BLOCK_SIZE/64));
     157
     158    PipelineBuilder pipelineBuilder(M, idb);
     159
     160    Encoding encoding(Encoding::Type::UTF_8, 8);
     161    re_ast = regular_expression_passes(encoding, re_ast);   
     162    pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
     163
     164    pipelineBuilder.CreateKernels(function, isNameExpression);
     165
     166    pipelineBuilder.ExecuteKernels();
     167
     168    llvm::Function * main_IR = M->getFunction("Main");
     169    mEngine = JIT_to_ExecutionEngine(M);
     170   
     171    icgrep_Linking(M, mEngine);
     172    verifyModule(*M, &dbgs());
     173    mEngine->finalizeObject();
     174    delete idb;
     175
     176    mMainFcn = (main_fcn_T) mEngine->getPointerToFunction(main_IR);
     177}
     178
     179
     180re::CC *  GrepEngine::grepCodepoints(const std::string & UNameFile) {
     181    setParsedCodePointSet();
     182    doGrep(UNameFile);
     183    return getParsedCodePointSet();
     184}
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r4945 r4946  
    99#include <string>
    1010#include <stdint.h>
    11 #include <re/re_cc.h>
     11#include <re/re_re.h>
     12#include <llvm/ExecutionEngine/ExecutionEngine.h>
    1213
    1314
     
    1617namespace llvm { class raw_ostream; }
    1718
    18 class GrepExecutor {
     19class GrepEngine {
    1920public:
    2021
    21     GrepExecutor(void * main_fnptr)
    22     : mMainFcn(reinterpret_cast<main_fcn_T>(main_fnptr)) {
    23        
     22    GrepEngine() {};
     23 
     24    void grepCodeGen(std::string moduleName, re::RE * re_ast, bool isNameExpression = false);
     25   
     26    void doGrep(const std::string & fileName);
     27   
     28    re::CC *  grepCodepoints(const std::string & UNameFile);
     29
     30    ~GrepEngine() {
     31      delete mEngine;
    2432    }
    25  
    26     void doGrep(const std::string & fileName);
     33   
    2734private:
    2835   
     
    3441    size_t mFileSize;
    3542    char * mFileBuffer;
     43    llvm::ExecutionEngine * mEngine;
    3644};
    3745
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r4944 r4946  
    99#include <fstream>
    1010
    11 #include "toolchain.h"
    12 #include "utf_encoding.h"
    13 #include "pablo/pablo_compiler.h"
    14 #include <llvm/IR/Function.h>
    15 #include <llvm/IR/Type.h>
    16 #include <llvm/IR/Module.h>
    17 #include <llvm/ExecutionEngine/ExecutionEngine.h>
    18 #include <llvm/ExecutionEngine/MCJIT.h>
    19 #include <llvm/IRReader/IRReader.h>
     11
    2012#include <llvm/Support/SourceMgr.h>
    2113#include <llvm/Support/CommandLine.h>
     
    2315#include <llvm/Support/TargetSelect.h>
    2416#include <llvm/Support/Host.h>
    25 #include <llvm/IR/Verifier.h>
    2617
    2718#include <re/re_re.h>
     
    3021#include <re/re_any.h>
    3122#include <re/re_alt.h>
    32 #include <pablo/function.h>
    3323
    34 #include "do_grep.h"
    35 #include <kernels/pipeline.h>
     24#include <grep_engine.h>
    3625
    3726static cl::OptionCategory aRegexSourceOptions("Regular Expression Options",
     
    134123    cl::ParseCommandLineOptions(argc, argv);
    135124   
    136     Module * M = new Module("grepcode", getGlobalContext());
     125    re::RE * re_ast = get_icgrep_RE();
    137126   
    138     IDISA::IDISA_Builder * idb = GetNativeIDISA_Builder(M, VectorType::get(IntegerType::get(getGlobalContext(), 64), BLOCK_SIZE/64));
    139 
    140     PipelineBuilder pipelineBuilder(M, idb);
    141 
    142     re::RE * re_ast = get_icgrep_RE();
    143     Encoding encoding(Encoding::Type::UTF_8, 8);
    144     re_ast = regular_expression_passes(encoding, re_ast);   
    145     pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
    146 
    147     pipelineBuilder.CreateKernels(function, false);
    148 
    149     pipelineBuilder.ExecuteKernels();
    150 
    151     llvm::Function * main_IR = M->getFunction("Main");
    152     llvm::ExecutionEngine * engine = JIT_to_ExecutionEngine(M);
     127    GrepEngine grepEngine;
     128    grepEngine.grepCodeGen("grepcode", re_ast);
    153129   
    154     icgrep_Linking(M, engine);
    155     verifyModule(*M, &dbgs());
    156     engine->finalizeObject();
    157 
    158     void * main_MCptr = engine->getPointerToFunction(main_IR);
    159 
    160     if(main_MCptr){
    161         GrepExecutor grepEngine(main_MCptr);
    162         for (unsigned i = firstInputFile; i != inputFiles.size(); ++i) {
     130    for (unsigned i = firstInputFile; i != inputFiles.size(); ++i) {
    163131            grepEngine.doGrep(inputFiles[i]);
    164         }
    165132    }
    166133   
    167     delete engine;
    168     delete idb;
    169 
    170134    return 0;
    171135}
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r4945 r4946  
    5555    sFunction->setCallingConv(CallingConv::C);
    5656    sFunction->setAttributes(AttrSet);
     57    sFunction->addFnAttr(llvm::Attribute::AlwaysInline);
    5758       
    5859    Function::arg_iterator args = sFunction->arg_begin();
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r4939 r4946  
    1919#include <UCD/resolve_properties.h>
    2020#include <UCD/CaseFolding_txt.h>
    21 #include <toolchain.h>
    22 #include "utf_encoding.h"
    23 #include <llvm/IR/Type.h>
    24 #include <pablo/pablo_compiler.h>
    25 #include <do_grep.h>
     21#include <grep_engine.h>
    2622#include <sstream>
    2723#include <algorithm>
    28 #include "../kernels/pipeline.h"
    29 #include "../toolchain.h"
    30 
    3124
    3225// It would probably be best to enforce that {}, [], () must always
     
    513506    return createName(std::move(canonicalize(start, mCursor.pos())));
    514507}
    515 /*
     508
    516509Name * RE_Parser::parseNamePatternExpression(){
    517510
     
    530523    // Embed the nameRE in ";.*$nameRE" to skip the codepoint field of Uname.txt
    531524    RE * embedded = makeSeq({mMemoizer.memoize(makeCC(0x3B)), makeRep(makeAny(), 0, Rep::UNBOUNDED_REP), nameRE});
    532     Encoding encoding(Encoding::Type::UTF_8, 8);
    533     embedded = regular_expression_passes(encoding, embedded);
    534 
    535     pablo::PabloFunction * const nameSearchFunction = re2pablo_compiler(encoding, embedded);
    536     pablo_function_passes(nameSearchFunction);
    537525   
    538     Module * M = new Module("NamePattern", getGlobalContext());
    539     IDISA::IDISA_Builder * idb = GetNativeIDISA_Builder(M, VectorType::get(IntegerType::get(getGlobalContext(), 64), BLOCK_SIZE/64));
    540     gen_s2p_function(M, idb);
    541 
    542     pablo::PabloCompiler pablo_compiler(M, idb);
    543 
    544     llvm::Function * const nameSearchIR = pablo_compiler.compile(nameSearchFunction); // <- may throw error if parsing exception occurs.
    545     llvm::Function * s2p_IR = M->getFunction("s2p_block");
    546 
    547     llvm::ExecutionEngine * engine = JIT_to_ExecutionEngine(M);   
    548     icgrep_Linking(M, engine);
     526    GrepEngine engine;
     527    engine.grepCodeGen("NamePattern", embedded, true);
    549528   
    550     // Ensure everything is ready to go.
    551     engine->finalizeObject();
    552        
    553     void * icgrep_MCptr = engine->getPointerToFunction(nameSearchIR);
    554     void * s2p_MCptr = engine->getPointerToFunction(s2p_IR);
    555 
    556     CC * codepoints = nullptr;
    557     if (icgrep_MCptr) {
    558         void * icgrep_init_carry_ptr = engine->getPointerToFunction(nameSearchIR->getParent()->getFunction("process_block_initialize_carries"));
    559         GrepExecutor grepEngine(s2p_MCptr, icgrep_init_carry_ptr, icgrep_MCptr);
    560         grepEngine.setParseCodepointsOption();
    561         grepEngine.doGrep("../Uname.txt");
    562         codepoints = grepEngine.getParsedCodepoints();
    563         assert (codepoints);
    564     }
    565     delete engine;
    566     if (codepoints) {
    567         Name * const result = mMemoizer.memoize(codepoints);
    568         assert (*cast<CC>(result->getDefinition()) == *codepoints);
    569         return result;
    570     }
    571     return nullptr;
    572 }
    573 */
    574 Name * RE_Parser::parseNamePatternExpression(){
    575 
    576     ModeFlagSet outerFlags = fModeFlagSet;
    577     fModeFlagSet = 1;
    578 
    579     bool outerNested = fNested;
    580     fNested = true;
    581 
    582     RE * nameRE = parse_RE();
    583 
    584     // Reset outer parsing state.
    585     fModeFlagSet = outerFlags;
    586     fNested = outerNested;
    587 
    588     // Embed the nameRE in ";.*$nameRE" to skip the codepoint field of Uname.txt
    589     RE * embedded = makeSeq({mMemoizer.memoize(makeCC(0x3B)), makeRep(makeAny(), 0, Rep::UNBOUNDED_REP), nameRE});
    590     Encoding encoding(Encoding::Type::UTF_8, 8);
    591     embedded = regular_expression_passes(encoding, embedded);
    592 
    593     pablo::PabloFunction * const nameSearchFunction = re2pablo_compiler(encoding, embedded);
    594     pablo_function_passes(nameSearchFunction);
     529    CC * codepoints = engine.grepCodepoints("../Uname.txt");
    595530   
    596     Module * M = new Module("NamePattern", getGlobalContext());
    597     IDISA::IDISA_Builder * idb = GetNativeIDISA_Builder(M, VectorType::get(IntegerType::get(getGlobalContext(), 64), BLOCK_SIZE/64));
    598    
    599     PipelineBuilder pipelineBuilder(M, idb);
    600     pipelineBuilder.CreateKernels(nameSearchFunction, true);
    601     pipelineBuilder.ExecuteKernels();
    602 
    603     llvm::Function * main_IR = M->getFunction("Main");
    604     llvm::ExecutionEngine * engine = JIT_to_ExecutionEngine(M);
    605    
    606     icgrep_Linking(M, engine);
    607 
    608     engine->finalizeObject();
    609 
    610     void * main_MCptr = engine->getPointerToFunction(main_IR);
    611 
    612     CC * codepoints = nullptr;
    613     if(main_MCptr){
    614         GrepExecutor grepEngine(main_MCptr);
    615         setParsedCodePointSet();
    616         grepEngine.doGrep("../Uname.txt");
    617         codepoints = getParsedCodePointSet();
    618         assert (codepoints);
    619     }
    620        
    621     delete engine;
    622531    if (codepoints) {
    623532        Name * const result = mMemoizer.memoize(codepoints);
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r4944 r4946  
    99#include <fstream>
    1010
    11 #include "utf_encoding.h"
    12 #include "pablo/pablo_compiler.h"
    1311#include <llvm/IR/Function.h>
    1412#include <llvm/IR/Module.h>
     
    5351#include <pablo/printer_pablos.h>
    5452
    55 #include "do_grep.h"
    56 
    5753using namespace pablo;
    5854
     
    113109#endif
    114110
    115 static cl::opt<bool> DisableAVX2("disable-AVX2", cl::init(false), cl::desc("disable AVX2 instruction set."), cl::cat(cPabloOptimizationsOptions));
     111static cl::OptionCategory dCodeGenOptions("Code Generation Options", "These options control code generation.");
     112
     113static cl::opt<bool> DisableAVX2("disable-AVX2", cl::init(false), cl::desc("disable AVX2 instruction set."), cl::cat(dCodeGenOptions));
     114
     115static cl::opt<int> BlockSize("BlockSize", cl::init(0), cl::desc("specify a block size (defaults to widest SIMD register width in bits)."), cl::cat(dCodeGenOptions));
     116
    116117
    117118re::RE * regular_expression_passes(const Encoding encoding, re::RE * re_ast)  {
     
    378379    }
    379380}
    380 
Note: See TracChangeset for help on using the changeset viewer.