Changeset 5401 for icGREP


Ignore:
Timestamp:
Apr 9, 2017, 3:59:17 PM (2 years ago)
Author:
nmedfort
Message:

Updated all projects to use ParabixDriver?. Deprecated original pipeline generation methods. Enabled LLVM optimizations, IR and ASM printing for Kernel modules. Enabled object cache by default. Begun work on moving consumed position information back to producing kernels.

Location:
icGREP/icgrep-devel/icgrep
Files:
18 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/array-test.cpp

    r5377 r5401  
    124124}
    125125
    126 Function * pipeline(IDISA::IDISA_Builder * iBuilder, const unsigned count) {
     126void pipeline(ParabixDriver & pxDriver, const unsigned count) {
     127
     128    IDISA::IDISA_Builder * const iBuilder = pxDriver.getIDISA_Builder();
     129    Module * const mod = iBuilder->getModule();
    127130
    128131    Type * byteStreamTy = iBuilder->getStreamSetTy(1, 8);
    129 
    130     Module * const mod = iBuilder->getModule();
    131132
    132133    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", iBuilder->getVoidTy(), byteStreamTy->getPointerTo(), iBuilder->getSizeTy(), nullptr));
     
    145146
    146147    MMapSourceKernel mmapK(iBuilder, segmentSize);
    147     mmapK.generateKernel({}, {&ByteStream});
    148148    mmapK.setInitialArguments({fileSize});
    149149
     150    pxDriver.addKernelCall(mmapK, {}, {&ByteStream});
     151
    150152    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    151153
    152     S2PKernel  s2pk(iBuilder);
    153     s2pk.generateKernel({&ByteStream}, {&BasisBits});
     154    S2PKernel s2pk(iBuilder);
     155    pxDriver.addKernelCall(s2pk, {&ByteStream}, {&BasisBits});
    154156
    155157    PabloKernel bm(iBuilder, "MatchParens",
     
    158160
    159161    generate(&bm);
    160 //    SSAPass::transform(&bm);
    161 
    162 //    pablo_function_passes(&bm);
    163 
    164     bm.getEntryBlock()->print(errs());
    165162
    166163    ExpandableBuffer matches(iBuilder, iBuilder->getStreamSetTy(count), segmentSize * bufferSegments);
    167164    SingleBlockBuffer errors(iBuilder, iBuilder->getStreamTy());
    168165
    169     bm.generateKernel({&BasisBits}, {&matches, &errors});
     166    pxDriver.addKernelCall(bm, {&BasisBits}, {&matches, &errors});
    170167
    171168    PrintStreamSet printer(iBuilder, {"matches", "errors"});
    172     printer.generateKernel({&matches, &errors}, {});
     169    pxDriver.addKernelCall(printer, {&matches, &errors}, {});
    173170
    174171    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main, 0));
     
    179176    errors.allocateBuffer();
    180177
    181 //    generatePipeline(iBuilder, {&mmapK, &s2pk, &bm});
    182 
    183     generatePipeline(iBuilder, {&mmapK, &s2pk, &bm, &printer});
     178    pxDriver.generatePipelineIR();
    184179    iBuilder->CreateRetVoid();
    185180
    186     return main;
     181    pxDriver.linkAndFinalize();
    187182}
    188183
     
    193188    Module * M = new Module("mp", ctx);
    194189    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
    195 
    196     llvm::Function * f = pipeline(idb, 3);
    197 
    198     verifyModule(*M, &dbgs());
    199 
    200     ExecutionEngine * wcEngine = JIT_to_ExecutionEngine(M);
    201 
    202     wcEngine->finalizeObject();
    203 
     190    ParabixDriver pxDriver(idb);
     191    pipeline(pxDriver, 3);
    204192    delete idb;
    205 
    206     return reinterpret_cast<MatchParens>(wcEngine->getPointerToFunction(f));
     193    return reinterpret_cast<MatchParens>(pxDriver.getPointerToMain());
    207194}
    208195
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5396 r5401  
    9797
    9898    iBuilder->CreateRetVoid();
    99     pxDriver.JITcompileMain();
     99
    100100    pxDriver.linkAndFinalize();
    101101}
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5399 r5401  
    248248    iBuilder->CreateRetVoid();
    249249
    250     pxDriver.JITcompileMain();
    251250    pxDriver.linkAndFinalize();
    252251}
     
    323322    iBuilder->CreateRetVoid();
    324323
    325     pxDriver.JITcompileMain();
    326324    pxDriver.linkAndFinalize();
    327325}
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5400 r5401  
    6262static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
    6363
    64 /// iNVESTIGATE: icgrep is reporting stdin is not empty even when nothing is being piped into it?
    65 static cl::opt<bool> UseStdIn("stdin", cl::desc("Read from standard input."), cl::cat(bGrepOutputOptions));
    66 
    67 bool isUTF_16 = false;
    68 std::string IRFilename = "icgrep.ll";
    69 std::string PTXFilename = "icgrep.ptx";
     64#ifdef CUDA_ENABLED
     65const auto IRFilename = "icgrep.ll";
     66const auto PTXFilename = "icgrep.ptx";
     67#endif
    7068
    7169static re::CC * parsedCodePointSet = nullptr;
     70
    7271static std::vector<std::string> parsedPropertyValues;
    7372
     
    288287template<typename CodeUnit>
    289288void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const size_t fileIdx) {
    290 
    291 //    errs() << lineNum << " : (" << line_start << ", " << line_end << ", " << filesize << ")\n";
    292 
    293289    assert (buffer);
    294290    assert (line_start <= line_end);
     
    390386}
    391387
    392 void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, GrepType grepType, const bool usingStdIn) {
    393     isUTF_16 = UTF_16;
     388inline void linkGrepFunction(ParabixDriver & pxDriver, const GrepType grepType, const bool UTF_16, kernel::KernelBuilder & kernel) {
     389    switch (grepType) {
     390        case GrepType::Normal:
     391            if (UTF_16) {
     392                pxDriver.addExternalLink(kernel, "matcher", &wrapped_report_match<uint16_t>);
     393            } else {
     394                pxDriver.addExternalLink(kernel, "matcher", &wrapped_report_match<uint8_t>);
     395            }
     396            break;
     397        case GrepType::NameExpression:
     398            pxDriver.addExternalLink(kernel, "matcher", &insert_codepoints);
     399            break;
     400        case GrepType::PropertyValue:
     401            pxDriver.addExternalLink(kernel, "matcher", &insert_property_values);
     402            break;
     403    }
     404}
     405
     406void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, const bool CountOnly, const bool UTF_16, const GrepType grepType, const bool usingStdIn) {
    394407    int addrSpace = 0;
    395408    bool CPU_Only = true;
     
    499512    pxDriver.addKernelCall(s2pk, {byteStream}, {&BasisBits});
    500513
    501     kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
     514    kernel::LineBreakKernelBuilder linebreakK(iBuilder, encodingBits);
    502515    CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    503516    LineBreakStream.allocateBuffer();
     
    517530        iBuilder->CreateRet(icgrepK.createGetAccumulatorCall(icgrepK.getInstance(), "matchedLineCount"));
    518531
    519         pxDriver.JITcompileMain();
    520532        pxDriver.linkAndFinalize();
    521533
     
    550562            pxDriver.addKernelCall(scanMatchK, {&MatchResults, &LineBreakStream, byteStream}, {});
    551563
    552             switch (grepType) {
    553                 case GrepType::Normal:
    554                     if (UTF_16) {
    555                         pxDriver.addExternalLink(scanMatchK, "matcher", &wrapped_report_match<uint16_t>);
    556                     } else {
    557                         pxDriver.addExternalLink(scanMatchK, "matcher", &wrapped_report_match<uint8_t>);
    558                     }
    559                     break;
    560                 case GrepType::NameExpression:
    561                     pxDriver.addExternalLink(scanMatchK, "matcher", &insert_codepoints);
    562                     break;
    563                 case GrepType::PropertyValue:
    564                     pxDriver.addExternalLink(scanMatchK, "matcher", &insert_property_values);
    565                     break;
    566             }
     564            linkGrepFunction(pxDriver, grepType, UTF_16, scanMatchK);
    567565
    568566            pxDriver.generatePipelineIR();
     
    570568            iBuilder->CreateRetVoid();
    571569
    572             pxDriver.JITcompileMain();
    573570            pxDriver.linkAndFinalize();
    574571        }
     
    610607
    611608
    612 void GrepEngine::multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16, GrepType grepType, const bool usingStdIn) {
    613 
    614     isUTF_16 = UTF_16;
     609
     610void GrepEngine::grepCodeGen(std::string moduleName, std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16, const GrepType grepType, const bool usingStdIn) {
     611
    615612    Module * M = new Module(moduleName + ":icgrep", getGlobalContext());;
    616613    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);;
     614    ParabixDriver pxDriver(iBuilder);
    617615
    618616    const unsigned segmentSize = codegen::SegmentSize;
     
    639637    kernel::KernelBuilder * sourceK = nullptr;
    640638    if (usingStdIn) {
     639        // TODO: use fstat(STDIN_FILENO) to see if we can mmap the stdin safely and avoid the calls to read
    641640        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize);
    642         cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
    643641        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
    644642    } else {
    645         byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
    646         cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
    647         sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
    648         sourceK->setInitialArguments({fileSize});
    649     }
    650     sourceK->generateKernel({}, {byteStream});
     643        byteStream = new SourceFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     644        sourceK = new kernel::FileSourceKernel(iBuilder, inputStream->getType(), segmentSize);
     645        sourceK->setInitialArguments({inputStream, fileSize});
     646    }
     647    byteStream->allocateBuffer();
     648    pxDriver.addKernelCall(*sourceK, {}, {byteStream});
    651649
    652650    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    653651    BasisBits.allocateBuffer();
    654652
    655     kernel::S2PKernel  s2pk(iBuilder);
    656     s2pk.generateKernel({byteStream}, {&BasisBits});
     653    kernel::S2PKernel s2pk(iBuilder);
     654    pxDriver.addKernelCall(s2pk, {byteStream}, {&BasisBits});
     655
     656    kernel::LineBreakKernelBuilder linebreakK(iBuilder, encodingBits);
     657    CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     658    LineBreakStream.allocateBuffer();
     659    pxDriver.addKernelCall(linebreakK, {&BasisBits}, {&LineBreakStream});
    657660
    658661    std::vector<pablo::PabloKernel *> icgrepKs;
    659662    std::vector<StreamSetBuffer *> MatchResultsBufs;
    660663
    661     for(unsigned i=0; i<REs.size(); i++){
    662         pablo::PabloKernel * icgrepK = new pablo::PabloKernel(iBuilder, "icgrep"+std::to_string(i), {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
     664    for(unsigned i = 0; i < REs.size(); ++i){
     665        pablo::PabloKernel * const icgrepK = new pablo::PabloKernel(iBuilder, "icgrep" + std::to_string(i), {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
    663666        re::re2pablo_compiler(icgrepK, re::regular_expression_passes(REs[i]), false);
    664667        pablo_function_passes(icgrepK);
     668        CircularBuffer * const matchResults = new CircularBuffer(iBuilder, iBuilder->getStreamSetTy(2, 1), segmentSize * bufferSegments);
     669        matchResults->allocateBuffer();
     670
     671        pxDriver.addKernelCall(*icgrepK, {&BasisBits, &LineBreakStream}, {matchResults});
    665672        icgrepKs.push_back(icgrepK);
    666         CircularBuffer * MatchResults = new CircularBuffer(iBuilder, iBuilder->getStreamSetTy(2, 1), segmentSize * bufferSegments);
    667         MatchResults->allocateBuffer();
    668         MatchResultsBufs.push_back(MatchResults);
    669     }
    670 
    671     std::vector<kernel::KernelBuilder *> KernelList;
    672     KernelList.push_back(sourceK);
    673     KernelList.push_back(&s2pk);
     673        MatchResultsBufs.push_back(matchResults);
     674    }
    674675
    675676    CircularBuffer mergedResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     
    677678
    678679    kernel::StreamsMerge streamsMergeK(iBuilder, 1, REs.size());
    679     streamsMergeK.generateKernel(MatchResultsBufs, {&mergedResults});
    680 
    681     kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
    682     CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    683     LineBreakStream.allocateBuffer();
    684     linebreakK.generateKernel({&BasisBits}, {&LineBreakStream});
    685 
    686     KernelList.push_back(&linebreakK);
    687     for(unsigned i=0; i<REs.size(); i++){
    688         icgrepKs[i]->generateKernel({&BasisBits, &LineBreakStream}, {MatchResultsBufs[i]});
    689         KernelList.push_back(icgrepKs[i]);
    690     }
    691     KernelList.push_back(&streamsMergeK);
     680    pxDriver.addKernelCall(streamsMergeK, MatchResultsBufs, {&mergedResults});
    692681
    693682    if (CountOnly) {
    694683        kernel::MatchCount matchCountK(iBuilder);
    695         matchCountK.generateKernel({&mergedResults}, {});
    696 
    697         KernelList.push_back(&matchCountK);
    698 
    699         generatePipeline(iBuilder, KernelList);
     684        pxDriver.addKernelCall(matchCountK, {&mergedResults}, {});
     685        pxDriver.generatePipelineIR();
    700686        iBuilder->CreateRet(matchCountK.getScalarField(matchCountK.getInstance(), "matchedLineCount"));
    701 
     687        pxDriver.linkAndFinalize();
    702688    } else {
    703689        kernel::ScanMatchKernel scanMatchK(iBuilder, grepType, encodingBits);
    704         scanMatchK.generateKernel({byteStream, &mergedResults, &LineBreakStream}, {});
    705690        scanMatchK.setInitialArguments({fileIdx});
    706 
    707         KernelList.push_back(&scanMatchK);
    708 
    709         generatePipeline(iBuilder, KernelList);
    710 
     691        pxDriver.addKernelCall(scanMatchK, {&mergedResults, &LineBreakStream, byteStream}, {});
     692        linkGrepFunction(pxDriver, grepType, UTF_16, scanMatchK);
     693        pxDriver.generatePipelineIR();
    711694        iBuilder->CreateRetVoid();
    712     }
    713 
    714     mEngine = JIT_to_ExecutionEngine(M);
    715     ApplyObjectCache(mEngine);
    716     icgrep_Linking(M, mEngine);
    717 
    718     mEngine->finalizeObject();
     695        pxDriver.linkAndFinalize();
     696    }
     697
    719698    delete iBuilder;
    720699    delete sourceK;
    721700    delete byteStream;
     701    for (StreamSetBuffer * buf : MatchResultsBufs) {
     702        delete buf;
     703    }
    722704
    723705    if (CountOnly) {
    724         mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
    725     } else {
    726         mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
    727     }
    728 
     706        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(pxDriver.getPointerToMain());
     707    } else {
     708        mGrepFunction = reinterpret_cast<GrepFunctionType>(pxDriver.getPointerToMain());
     709    }
    729710}
    730711
     
    753734}
    754735
    755 void icgrep_Linking(Module * m, ExecutionEngine * e) {
    756     Module::FunctionListType & fns = m->getFunctionList();
    757     for (auto it = fns.begin(), it_end = fns.end(); it != it_end; ++it) {
    758         std::string fnName = it->getName().str();
    759         if (fnName == "s2p_block") continue;
    760         if (fnName == "process_block") continue;
    761         if (fnName == "process_block_initialize_carries") continue;
    762        
    763         if (fnName == "wrapped_report_match8") {
    764             e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match<uint8_t>);
    765         }
    766         if (fnName == "wrapped_report_match16") {
    767             e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match<uint16_t>);
    768         }
    769         if (fnName == "insert_codepoints") {
    770             e->addGlobalMapping(cast<GlobalValue>(it), (void *)&insert_codepoints);
    771         }
    772         if (fnName == "insert_property_values") {
    773             e->addGlobalMapping(cast<GlobalValue>(it), (void *)&insert_property_values);
    774         }
    775     }
    776 }
    777 
    778736GrepEngine::GrepEngine()
    779737: mGrepFunction(nullptr)
     
    782740, mGrepFunction_CPU(nullptr)
    783741#endif
    784 , mEngine(nullptr) {
    785 
    786 }
    787 
    788 GrepEngine::~GrepEngine() {
    789     delete mEngine;
    790 }
     742{
     743
     744}
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5400 r5401  
    2121
    2222    GrepEngine();
    23     ~GrepEngine();
    24  
     23
    2524    void grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal, const bool usingStdIn = false);
    2625
    27     void multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal, const bool usingStdIn = false);
     26    void grepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal, const bool usingStdIn = false);
    2827     
    2928    void doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly);
     
    4241    GrepFunctionType_CPU mGrepFunction_CPU;
    4342#endif
    44     llvm::ExecutionEngine * mEngine;
    4543};
    4644
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5379 r5401  
    408408
    409409        if (MultiGrepKernels) {
    410             grepEngine.multiGrepCodeGen(module_name, RELists, CountOnly, UTF_16);
     410            grepEngine.grepCodeGen(module_name, RELists, CountOnly, UTF_16);
    411411        } else {
    412412            grepEngine.grepCodeGen(module_name, re_ast, CountOnly, UTF_16, GrepType::Normal, false);
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5398 r5401  
    2222using namespace llvm;
    2323
    24 ProcessingRate FixedRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string referenceStreamSet) {
    25     return ProcessingRate(ProcessingRate::ProcessingRateKind::Fixed, strmItemsPer, perPrincipalInputItems, referenceStreamSet);
     24ProcessingRate FixedRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string && referenceStreamSet) {
     25    return ProcessingRate(ProcessingRate::ProcessingRateKind::Fixed, strmItemsPer, perPrincipalInputItems, std::move(referenceStreamSet));
    2626}
    2727
    28 ProcessingRate MaxRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string referenceStreamSet) {
    29     return ProcessingRate(ProcessingRate::ProcessingRateKind::Max, strmItemsPer, perPrincipalInputItems, referenceStreamSet);
     28ProcessingRate MaxRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string && referenceStreamSet) {
     29    return ProcessingRate(ProcessingRate::ProcessingRateKind::Max, strmItemsPer, perPrincipalInputItems, std::move(referenceStreamSet));
    3030}
    3131
    32 ProcessingRate RoundUpToMultiple(unsigned itemMultiple, std::string referenceStreamSet) {
    33     return ProcessingRate(ProcessingRate::ProcessingRateKind::RoundUp, itemMultiple, itemMultiple, referenceStreamSet);
     32ProcessingRate RoundUpToMultiple(unsigned itemMultiple, std::string && referenceStreamSet) {
     33    return ProcessingRate(ProcessingRate::ProcessingRateKind::RoundUp, itemMultiple, itemMultiple, std::move(referenceStreamSet));
    3434}
    3535
    36 ProcessingRate Add1(std::string referenceStreamSet) {
    37     return ProcessingRate(ProcessingRate::ProcessingRateKind::Add1, 0, 0, referenceStreamSet);
     36ProcessingRate Add1(std::string && referenceStreamSet) {
     37    return ProcessingRate(ProcessingRate::ProcessingRateKind::Add1, 0, 0, std::move(referenceStreamSet));
    3838}
    3939
     
    9494    }
    9595
    96     // Create the doSegment function prototype.
     96    /// INVESTIGATE: should we explicitly mark whether to track a kernel output's consumed amount? It would have
     97    /// to be done at the binding level using the current architecture. It would reduce the number of arguments
     98    /// passed between kernels.
     99
     100    // Create the doSegment function prototype.   
     101    IntegerType * const sizeTy = iBuilder->getSizeTy();
     102
    97103    std::vector<Type *> params = {selfType, iBuilder->getInt1Ty()};
    98     // const auto count = mStreamSetInputs.size() + mStreamSetOutputs.size();
    99     for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    100         params.push_back(iBuilder->getSizeTy());
     104    params.insert(params.end(), mStreamSetInputs.size() + mStreamSetOutputs.size(), sizeTy);
     105
     106    Type * retType = nullptr;
     107    if (mStreamSetInputs.empty()) {
     108        retType = iBuilder->getVoidTy();
     109    } else {
     110        retType = ArrayType::get(sizeTy, mStreamSetInputs.size());
    101111    }
    102112
    103     FunctionType * doSegmentType = FunctionType::get(iBuilder->getVoidTy(), params, false);
     113    FunctionType * const doSegmentType = FunctionType::get(retType, params, false);
    104114    Function * doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, client);
    105115    doSegment->setCallingConv(CallingConv::C);
     
    112122        (++args)->setName(input.name + "_availableItems");
    113123    }
    114 //    for (const Binding & output : mStreamSetOutputs) {
    115 //        (++args)->setName(output.name + "_consumedItems");
    116 //    }
     124    for (const Binding & output : mStreamSetOutputs) {
     125        (++args)->setName(output.name + "_consumedItems");
     126    }
     127
     128    /// INVESTIGATE: replace the accumulator methods with a single Exit method that handles any clean up and returns
     129    /// a struct containing all scalar outputs?
    117130
    118131    // Create the accumulator get function prototypes
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5398 r5401  
    3939
    4040struct ProcessingRate  {
    41     enum ProcessingRateKind : uint8_t {Fixed, RoundUp, Max, Add1, Unknown};
    42     ProcessingRate() {}
     41    enum class ProcessingRateKind : uint8_t { Fixed, RoundUp, Add1, Max, Unknown };
    4342    ProcessingRateKind getKind() const {return mKind;}
    44     bool isExact() const {return (mKind == Fixed)||(mKind == RoundUp)||(mKind == Add1) ;}
     43    bool isExact() const {return (mKind == ProcessingRateKind::Fixed)||(mKind == ProcessingRateKind::RoundUp)||(mKind == ProcessingRateKind::Add1) ;}
    4544    bool isUnknown() const { return !isExact(); }
    4645    llvm::Value * CreateRatioCalculation(IDISA::IDISA_Builder * b, llvm::Value * principalInputItems, llvm::Value * doFinal = nullptr) const;
    47     friend ProcessingRate FixedRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string referenceStreamSet);
    48     friend ProcessingRate MaxRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string referenceStreamSet);
    49     friend ProcessingRate RoundUpToMultiple(unsigned itemMultiple, std::string referenceStreamSet);   
    50     friend ProcessingRate Add1(std::string referenceStreamSet);
     46    friend ProcessingRate FixedRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string && referenceStreamSet);
     47    friend ProcessingRate MaxRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string && referenceStreamSet);
     48    friend ProcessingRate RoundUpToMultiple(unsigned itemMultiple, std::string && referenceStreamSet);
     49    friend ProcessingRate Add1(std::string && referenceStreamSet);
    5150    friend ProcessingRate UnknownRate();
    5251    std::string referenceStreamSet() const { return mReferenceStreamSet;}
    5352protected:
    54     ProcessingRate(ProcessingRateKind k, unsigned numerator, unsigned denominator, std::string referenceStreamSet)
     53    ProcessingRate(ProcessingRateKind k, unsigned numerator, unsigned denominator, std::string && referenceStreamSet)
    5554    : mKind(k), mRatioNumerator(numerator), mRatioDenominator(denominator), mReferenceStreamSet(referenceStreamSet) {}
    5655private:
    57     ProcessingRateKind mKind;
    58     uint16_t mRatioNumerator;
    59     uint16_t mRatioDenominator;
    60     std::string mReferenceStreamSet;
     56    const ProcessingRateKind mKind;
     57    const uint16_t mRatioNumerator;
     58    const uint16_t mRatioDenominator;
     59    const std::string mReferenceStreamSet;
    6160};
    6261
    63 ProcessingRate FixedRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems = 1, std::string referenceStreamSet = "");
    64 ProcessingRate MaxRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems = 1, std::string referenceStreamSet = "");
    65 ProcessingRate RoundUpToMultiple(unsigned itemMultiple, std::string referenceStreamSet = "");
    66 ProcessingRate Add1(std::string referenceStreamSet = "");
     62ProcessingRate FixedRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems = 1, std::string && referenceStreamSet = "");
     63ProcessingRate MaxRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems = 1, std::string && referenceStreamSet = "");
     64ProcessingRate RoundUpToMultiple(unsigned itemMultiple, std::string &&referenceStreamSet = "");
     65ProcessingRate Add1(std::string && referenceStreamSet = "");
    6766ProcessingRate UnknownRate();
    6867
     
    7069    Binding(llvm::Type * type, const std::string & name, ProcessingRate r = FixedRatio(1))
    7170    : type(type), name(name), rate(r) { }
    72     llvm::Type *        type;
    73     std::string         name;
    74     ProcessingRate      rate;
     71    llvm::Type * const        type;
     72    const std::string         name;
     73    const ProcessingRate      rate;
    7574};
    7675
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5399 r5401  
    120120}
    121121
    122 std::unique_ptr<Module> KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
     122Module * KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    123123    setCallParameters(inputs, outputs);
    124124    std::string cacheName = getName() + "_" + iBuilder->getBuilderUniqueName();
     
    130130    }
    131131    prepareKernel();
    132     return make_unique<Module>(cacheName, iBuilder->getContext());
     132    return new Module(cacheName, iBuilder->getContext());
    133133}
    134134
     
    162162}   
    163163
    164 
    165164// Default kernel signature: generate the IR and emit as byte code.
    166 void KernelBuilder::generateKernelSignature(std::string &signature) {
    167     generateKernel();
    168     raw_string_ostream OS(signature);
    169     WriteBitcodeToFile(iBuilder->getModule(), OS);
    170 }
    171 
    172 
    173 std::unique_ptr<Module> KernelBuilder::createKernelModule(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    174     auto saveModule = iBuilder->getModule();
    175     auto savePoint = iBuilder->saveIP();
    176     auto module = createKernelStub(inputs, outputs);
    177     iBuilder->setModule(module.get());
    178     generateKernel(inputs, outputs);
    179     iBuilder->setModule(saveModule);
    180     iBuilder->restoreIP(savePoint);
    181     return module;
    182 }
    183 
    184 void KernelBuilder::generateKernel(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    185     setCallParameters(inputs, outputs);
    186     prepareKernel(); // possibly overridden by the KernelBuilder subtype
    187     generateKernel();
     165std::string KernelBuilder::generateKernelSignature(std::string moduleId) {
     166    if (moduleIDisSignature()) {
     167        return moduleId;
     168    } else {
     169        generateKernel();
     170        std::string signature;
     171        raw_string_ostream OS(signature);
     172        WriteBitcodeToFile(iBuilder->getModule(), OS);
     173        return signature;
     174    }
    188175}
    189176
    190177void KernelBuilder::generateKernel() {
    191     if (mIsGenerated) return;
    192     auto savePoint = iBuilder->saveIP();
    193     addKernelDeclarations(iBuilder->getModule());
    194     callGenerateInitMethod();
    195     callGenerateDoSegmentMethod();
    196     // Implement the accumulator get functions
    197     for (auto binding : mScalarOutputs) {
    198         Function * f = getAccumulatorFunction(binding.name);
    199         iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
    200         Value * self = &*(f->arg_begin());
    201         Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
    202         Value * retVal = iBuilder->CreateLoad(ptr);
    203         iBuilder->CreateRet(retVal);
    204     }
    205     iBuilder->restoreIP(savePoint);
    206     mIsGenerated = true;
     178    // If the module id is cannot uniquely identify this kernel, "generateKernelSignature()" will have already
     179    // generated the unoptimized IR.
     180    if (!mIsGenerated) {
     181        auto savePoint = iBuilder->saveIP();
     182        addKernelDeclarations(iBuilder->getModule());
     183        callGenerateInitMethod();
     184        callGenerateDoSegmentMethod();
     185        // Implement the accumulator get functions
     186        for (auto binding : mScalarOutputs) {
     187            Function * f = getAccumulatorFunction(binding.name);
     188            iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
     189            Value * self = &*(f->arg_begin());
     190            Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
     191            Value * retVal = iBuilder->CreateLoad(ptr);
     192            iBuilder->CreateRet(retVal);
     193        }
     194        iBuilder->restoreIP(savePoint);
     195        mIsGenerated = true;
     196    }
    207197}
    208198
     
    218208    }
    219209    generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
    220     iBuilder->CreateRetVoid();
     210    if (LLVM_UNLIKELY(mStreamSetInputs.empty())) {
     211        iBuilder->CreateRetVoid();
     212    } else {
     213        const unsigned n = mStreamSetInputs.size();
     214        Value * values[n];
     215        for (unsigned i = 0; i < n; ++i) {
     216            values[i] = getProcessedItemCount(mStreamSetInputs[i].name);
     217        }
     218        iBuilder->CreateAggregateRet(values, n);
     219    }
    221220}
    222221
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5399 r5401  
    6060   
    6161    // Can the module ID itself serve as the unique signature?
    62     virtual bool moduleIDisSignature() {/* default */  return false;}
    63    
    64     virtual void generateKernelSignature(std::string & signature);
     62    virtual bool moduleIDisSignature() { return false; }
     63   
     64    virtual std::string generateKernelSignature(std::string moduleId);
    6565   
    6666    // Create a module stub for the kernel, populated only with its Module ID.     
    6767    //
    68     std::unique_ptr<llvm::Module> createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
    69    
    70     // Create a module for the kernel, including the kernel state type declaration and
    71     // the full implementation of all required methods.     
    72     //
    73     std::unique_ptr<llvm::Module> createKernelModule(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
    74    
     68    llvm::Module * createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
     69     
    7570    void setCallParameters(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
    7671
    7772    // Generate the Kernel to the current module (iBuilder->getModule()).
    7873    void generateKernel();
    79     void generateKernel(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
    8074   
    8175    void createInstance() override;
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.cpp

    r5399 r5401  
    2626LineBreakKernelBuilder::LineBreakKernelBuilder (
    2727IDISA::IDISA_Builder * iBuilder
    28 , std::string linebreak
    2928, unsigned basisBitsCount)
    30 : PabloKernel(iBuilder, "Parabix:" + linebreak + "_kernel", {Binding{iBuilder->getStreamSetTy(basisBitsCount), "basis"}}, {Binding{iBuilder->getStreamSetTy(1), "linebreak", Add1()}}) {
     29: PabloKernel(iBuilder, "Parabix:lb_kernel", {Binding{iBuilder->getStreamSetTy(basisBitsCount), "basis"}}, {Binding{iBuilder->getStreamSetTy(1), "linebreak", Add1()}}) {
    3130
    3231    CC_Compiler ccc(this, getInput(0));
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.h

    r5398 r5401  
    1515class LineBreakKernelBuilder: public pablo::PabloKernel {
    1616public:
    17     LineBreakKernelBuilder(IDISA::IDISA_Builder * iBuilder, std::string linebreak, unsigned basisBitsCount);
     17    LineBreakKernelBuilder(IDISA::IDISA_Builder * iBuilder, unsigned basisBitsCount);
    1818    bool moduleIDisSignature() override {return true;}
    1919};
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5398 r5401  
    487487        Value * const instance = kernel->getInstance();
    488488        std::vector<Value *> args = {instance, terminated};
    489         for (unsigned j = 0; j < kernel->getStreamInputs().size(); j++) {
     489        for (unsigned i = 0; i < kernel->getStreamInputs().size(); ++i) {
    490490            unsigned producerKernel, outputIndex;
    491             std::tie(producerKernel, outputIndex) = producer[k][j];
     491            std::tie(producerKernel, outputIndex) = producer[k][i];
    492492            args.push_back(ProducerPos[producerKernel][outputIndex]);
     493        }
     494        for (unsigned i = 0; i < kernel->getStreamOutputs().size(); ++i) {
     495            args.push_back(iBuilder->getSize(0));
    493496        }
    494497        kernel->createDoSegmentCall(args);
     
    502505        }
    503506        ProducerPos.push_back(produced);
    504         Value * segNo = kernel->acquireLogicalSegmentNo(instance);
     507        Value * const segNo = kernel->acquireLogicalSegmentNo(instance);
    505508        kernel->releaseLogicalSegmentNo(instance, iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    506509    }
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5398 r5401  
    197197    re = resolveUnicodeProperties(re);
    198198}
    199 
    200 //void RE_Compiler::finalizeMatchResult(MarkerType match_result, bool InvertMatches) {
    201 //    PabloAST * match_follow = mPB.createMatchStar(markerVar(match_result), mAny);
    202 //    if (InvertMatches) {
    203 //        match_follow = mPB.createNot(match_follow);
    204 //    }
    205 //    PabloAST * matches = mPB.createAnd(match_follow, mLineBreak, "matches");
    206 //    if (mCountOnly) {
    207 //        Var * const output = mKernel->getOutputScalarVar("matchedLineCount");
    208 //        PabloBuilder nestedCount = PabloBuilder::Create(mPB);
    209 //        mPB.createIf(matches, nestedCount);
    210 //        nestedCount.createAssign(output, nestedCount.createCount(matches));
    211 //    } else {
    212 //        Var * const output = mKernel->getOutputStreamVar("output");
    213 //        mPB.createAssign(mPB.createExtract(output, mPB.getInteger(0)), matches);
    214 //    }
    215 //}
    216199
    217200void RE_Compiler::finalizeMatchResult(MarkerType match_result, bool InvertMatches) {
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r5399 r5401  
    1111#include <llvm/Support/TargetSelect.h>             // for InitializeNativeTa...
    1212#include <llvm/Support/raw_ostream.h>              // for errs, raw_ostream
     13#include <llvm/Support/FormattedStream.h>
    1314#include <llvm/ADT/SmallString.h>                  // for SmallString
    1415#include <llvm/IR/LegacyPassManager.h>             // for PassManager
     16#include <llvm/IR/IRPrintingPasses.h>
    1517#include <llvm/IR/Verifier.h>
    1618#include <llvm/InitializePasses.h>                 // for initializeCodeGen
     
    4143static cl::bits<DebugFlags>
    4244DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
    43 #if LLVM_VERSION_MINOR > 6
     45#ifndef USE_LLVM_3_6
    4446                        clEnumVal(ShowASM, "Print assembly code."),
    4547#endif
     
    4850
    4951static cl::opt<std::string> IROutputFilename("dump-generated-IR-output", cl::init(""), cl::desc("output IR filename"), cl::cat(CodeGenOptions));
    50 #if LLVM_VERSION_MINOR > 6
     52#ifndef USE_LLVM_3_6
    5153static cl::opt<std::string> ASMOutputFilename("asm-output", cl::init(""), cl::desc("output ASM filename"), cl::cat(CodeGenOptions));
    5254static cl::opt<bool> AsmVerbose("asm-verbose",
     
    6062
    6163
    62 static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(false), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
     64static cl::opt<bool> EnableObjectCache("enable-object-cache", cl::init(true), cl::desc("Enable object caching"), cl::cat(CodeGenOptions));
    6365
    6466static cl::opt<std::string> ObjectCacheDir("object-cache-dir", cl::init(""), cl::desc("Path to the object cache diretory"), cl::cat(CodeGenOptions));
     
    8991   
    9092static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
    91 
    92 
    9393   
    9494#ifdef CUDA_ENABLED
     
    101101}
    102102
    103 
    104103#ifdef CUDA_ENABLED
    105104void setNVPTXOption(){
     
    139138    cl::AddExtraVersionPrinter(&printParabixVersion);
    140139}
    141 
    142 
    143140
    144141void setAllFeatures(EngineBuilder &builder) {
     
    163160}
    164161
    165 #ifndef USE_LLVM_3_6
    166 void WriteAssembly (TargetMachine *TM, Module * m) {
    167     legacy::PassManager PM;
    168 
    169     SmallString<128> Str;
    170     raw_svector_ostream dest(Str);
    171 
    172     if (TM->addPassesToEmitFile(PM, dest, TargetMachine::CGFT_AssemblyFile ) ) {
    173         throw std::runtime_error("LLVM error: addPassesToEmitFile failed.");
    174     }
    175     PM.run(*m);
    176 
    177     if (codegen::ASMOutputFilename.empty()) {
    178         errs() << Str;
    179     } else {
    180         std::error_code error;
    181         raw_fd_ostream out(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
    182         out << Str;
    183     }
    184 }
    185 #endif
    186 
    187 ExecutionEngine * JIT_to_ExecutionEngine (Module * m) {
    188 
    189     // Use the pass manager to optimize the function.
    190     #ifndef NDEBUG
    191     try {
    192     #endif
    193     legacy::PassManager PM;
    194     #ifndef NDEBUG
    195     PM.add(createVerifierPass());
    196     #endif
    197     PM.add(createReassociatePass());             //Reassociate expressions.
    198     PM.add(createGVNPass());                     //Eliminate common subexpressions.
    199     PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    200     PM.add(createCFGSimplificationPass());   
    201     PM.run(*m);
    202     #ifndef NDEBUG
    203     } catch (...) { m->dump(); throw; }
    204     #endif
     162ParabixDriver::ParabixDriver(IDISA::IDISA_Builder * iBuilder)
     163: iBuilder(iBuilder)
     164, mMainModule(iBuilder->getModule())
     165, mTarget(nullptr)
     166, mEngine(nullptr)
     167{
    205168    InitializeNativeTarget();
    206169    InitializeNativeTargetAsmPrinter();
     
    213176
    214177    std::string errMessage;
    215     EngineBuilder builder{std::unique_ptr<Module>(m)};
     178    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
    216179    builder.setErrorStr(&errMessage);
    217180    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
     
    232195    setAllFeatures(builder);
    233196
    234     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    235         if (codegen::IROutputFilename.empty()) {
    236             m->dump();
    237         } else {
    238             std::error_code error;
    239             raw_fd_ostream out(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
    240             m->print(out, nullptr);
    241         }
    242     }
    243 #if LLVM_VERSION_MINOR > 6
    244     if (codegen::DebugOptionIsSet(codegen::ShowASM)) {
    245         WriteAssembly(builder.selectTarget(), m);
    246     }
    247 #endif
    248     ExecutionEngine * engine = builder.create();
    249     if (engine == nullptr) {
    250         throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    251     }
    252     return engine;
    253 }
    254 
    255 void ApplyObjectCache(ExecutionEngine * e) {
    256     ParabixObjectCache * cache = nullptr;
    257     if (codegen::EnableObjectCache) {
    258         if (codegen::ObjectCacheDir.empty())
    259             // Default is $HOME/.cache/icgrep
    260             cache = new ParabixObjectCache();
    261         else
    262             cache = new ParabixObjectCache(codegen::ObjectCacheDir);
    263         e->setObjectCache(cache);
    264     }
    265 }
    266 
    267 void generatePipeline(IDISA::IDISA_Builder * iBuilder, const std::vector<kernel::KernelBuilder *> & kernels) {
    268     if (codegen::pipelineParallel) {
    269         generateParallelPipeline(iBuilder, kernels);
    270     } else if (codegen::segmentPipelineParallel) {
    271         generateSegmentParallelPipeline(iBuilder, kernels);
    272     } else {
    273         codegen::ThreadNum = 1;
    274         generatePipelineLoop(iBuilder, kernels);
    275     }
    276 }
    277 
    278 ParabixDriver::ParabixDriver(IDISA::IDISA_Builder * iBuilder)
    279 : iBuilder(iBuilder)
    280 , mMainModule(iBuilder->getModule())
    281 , mTarget(nullptr)
    282 , mEngine(nullptr)
    283 {
    284     InitializeNativeTarget();
    285     InitializeNativeTargetAsmPrinter();
    286     InitializeNativeTargetAsmParser();
    287 
    288     PassRegistry * Registry = PassRegistry::getPassRegistry();
    289     initializeCore(*Registry);
    290     initializeCodeGen(*Registry);
    291     initializeLowerIntrinsicsPass(*Registry);
    292 
    293     std::string errMessage;
    294     EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
    295     builder.setErrorStr(&errMessage);
    296     TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
    297     opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
    298 
    299     builder.setTargetOptions(opts);
    300     builder.setVerifyModules(true);
    301     CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
    302     switch (codegen::OptLevel) {
    303         case '0': optLevel = CodeGenOpt::None; break;
    304         case '1': optLevel = CodeGenOpt::Less; break;
    305         case '2': optLevel = CodeGenOpt::Default; break;
    306         case '3': optLevel = CodeGenOpt::Aggressive; break;
    307         default: errs() << codegen::OptLevel << " is an invalid optimization level.\n";
    308     }
    309     builder.setOptLevel(optLevel);
    310 
    311     setAllFeatures(builder);
    312 
    313197    mEngine = builder.create();
    314198    if (mEngine == nullptr) {
     
    316200    }
    317201    mTarget = builder.selectTarget();
    318     if (codegen::EnableObjectCache) {
     202    if (LLVM_LIKELY(codegen::EnableObjectCache && codegen::DebugOptions.getBits() == 0)) {
    319203        if (codegen::ObjectCacheDir.empty()) {
    320204            mCache = llvm::make_unique<ParabixObjectCache>();
     
    327211}
    328212
    329 void ParabixDriver::JITcompileMain () {
    330     // Use the pass manager to optimize the function.
    331     #ifndef NDEBUG
    332     try {
    333     #endif
    334     legacy::PassManager PM;
    335     #ifndef NDEBUG
    336     PM.add(createVerifierPass());
    337     #endif
    338     PM.add(createReassociatePass());             //Reassociate expressions.
    339     PM.add(createGVNPass());                     //Eliminate common subexpressions.
    340     PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    341     PM.add(createCFGSimplificationPass());   
    342     PM.run(*mMainModule);
    343     #ifndef NDEBUG
    344     } catch (...) { mMainModule->dump(); throw; }
    345     #endif
    346 
    347     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    348         if (codegen::IROutputFilename.empty()) {
    349             mMainModule->dump();
    350         } else {
    351             std::error_code error;
    352             raw_fd_ostream out(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
    353             mMainModule->print(out, nullptr);
    354         }
    355     }
    356     #if LLVM_VERSION_MINOR > 6
    357     if (codegen::DebugOptionIsSet(codegen::ShowASM)) {
    358         WriteAssembly(mTarget, mMainModule);
    359     }
    360     #endif
    361 }
    362 
    363213void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
    364214    assert (mModuleMap.count(&kb) == 0);
    365215    mKernelList.push_back(&kb);
    366     mModuleMap.emplace(&kb, std::move(kb.createKernelStub(inputs, outputs)));
     216    mModuleMap.emplace(&kb, kb.createKernelStub(inputs, outputs));
    367217}
    368218
     
    384234    const auto f = mModuleMap.find(&kb);
    385235    assert ("addKernelCall(kb, ...) must be called before addExternalLink(kb, ...)" && f != mModuleMap.end());
    386     llvm::Module * const m = f->second.get();
    387     mEngine->addGlobalMapping(cast<Function>(m->getOrInsertFunction(name, type)), functionPtr);
     236    mEngine->addGlobalMapping(cast<Function>(f->second->getOrInsertFunction(name, type)), functionPtr);
    388237}
    389238
    390239void ParabixDriver::linkAndFinalize() {
    391     for (kernel::KernelBuilder * kb : mKernelList) {
    392         const auto f = mModuleMap.find(kb);
    393         if (LLVM_UNLIKELY(f == mModuleMap.end())) {
    394             report_fatal_error("linkAndFinalize was called twice!");
    395         }
    396         std::unique_ptr<Module> km(std::move(f->second));
     240    Module * m = mMainModule;
     241    #ifndef NDEBUG
     242    try {
     243    #endif
     244    legacy::PassManager PM;
     245    #ifndef NDEBUG
     246    PM.add(createVerifierPass());
     247    #endif
     248    PM.add(createReassociatePass());             //Reassociate expressions.
     249    PM.add(createGVNPass());                     //Eliminate common subexpressions.
     250    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
     251    PM.add(createCFGSimplificationPass());
     252
     253    raw_fd_ostream * IROutputStream = nullptr;
     254    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
     255        if (codegen::IROutputFilename.empty()) {
     256            IROutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
     257        } else {
     258            std::error_code error;
     259            IROutputStream = new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None);
     260        }
     261        PM.add(createPrintModulePass(*IROutputStream));
     262    }
     263
     264    #ifndef USE_LLVM_3_6
     265    raw_fd_ostream * ASMOutputStream = nullptr;
     266    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
     267        if (codegen::ASMOutputFilename.empty()) {
     268            ASMOutputStream = new raw_fd_ostream(STDERR_FILENO, false, false);
     269        } else {
     270            std::error_code error;
     271            ASMOutputStream = new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None);
     272        }
     273        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
     274            report_fatal_error("LLVM error: could not add emit assembly pass");
     275        }
     276    }
     277    #endif
     278
     279    PM.run(*m);
     280    for (auto pair : mModuleMap) {
     281        kernel::KernelBuilder * const kb = std::get<0>(pair);
     282        m = std::get<1>(pair);
    397283        bool uncachedObject = true;
    398284        if (mCache) {
    399             std::string moduleID = km->getModuleIdentifier();
    400             std::string signature;
    401             if (kb->moduleIDisSignature()) {
    402                 signature = moduleID;
    403             } else {
    404                 kb->generateKernelSignature(signature);
    405             }
     285            const std::string moduleID = m->getModuleIdentifier();
     286            const std::string signature = kb->generateKernelSignature(moduleID);
    406287            if (mCache->loadCachedObjectFile(moduleID, signature)) {
    407288                uncachedObject = false;
     
    409290        }
    410291        if (uncachedObject) {
    411             Module * const saveM = iBuilder->getModule();
    412             iBuilder->setModule(km.get());
     292            Module * const cm = iBuilder->getModule();
     293            iBuilder->setModule(m);
    413294            kb->generateKernel();
    414             iBuilder->setModule(saveM);
     295            PM.run(*m);
     296            iBuilder->setModule(cm);
    415297        }       
    416         mEngine->addModule(std::move(km));
    417     }
     298        mEngine->addModule(std::unique_ptr<Module>(m));
     299    }   
    418300    mEngine->finalizeObject();
     301
     302    delete IROutputStream;
     303    #ifndef USE_LLVM_3_6
     304    delete ASMOutputStream;
     305    #endif
     306    #ifndef NDEBUG
     307    } catch (...) { m->dump(); throw; }
     308    #endif
    419309    mModuleMap.clear();
    420310}
  • icGREP/icgrep-devel/icgrep/toolchain.h

    r5400 r5401  
    1616namespace llvm { class Module; }
    1717namespace llvm { class TargetMachine; }
     18namespace llvm { class formatted_raw_ostream; }
    1819namespace llvm { namespace cl { class OptionCategory; } }
    1920namespace IDISA { class IDISA_Builder; }
     
    2728enum DebugFlags {
    2829    ShowIR,
    29 #if LLVM_VERSION_MINOR > 6
     30#ifndef USE_LLVM_3_6
    3031    ShowASM,
    3132#endif
     
    5758bool AVX2_available();
    5859
    59 llvm::ExecutionEngine * JIT_to_ExecutionEngine (llvm::Module * m);
    60 
    61 void ApplyObjectCache(llvm::ExecutionEngine * e);
    62 
    63 void generatePipeline(IDISA::IDISA_Builder * iBuilder, const std::vector<kernel::KernelBuilder *> & kernels);
    64 
    65 
    6660class ParabixDriver {
    67 
    68     using ModuleMap = boost::container::flat_map<kernel::KernelBuilder *, std::unique_ptr<llvm::Module>>;
    69 
     61    using ModuleMap = boost::container::flat_map<kernel::KernelBuilder *, llvm::Module *>;
    7062public:
    7163    ParabixDriver(IDISA::IDISA_Builder * iBuilder);
     
    7365    IDISA::IDISA_Builder * getIDISA_Builder() {return iBuilder;}
    7466   
    75     void JITcompileMain ();
    76 
    7767    void addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs);
    7868   
     
    10090namespace {
    10191
    102 // NOTE: Currently, LLVM TypeBuilder can deduce FuntionTypes for only up to 5 arguments. The following
     92// NOTE: Currently, LLVM TypeBuilder can deduce FuntionTypes for up to 5 arguments. The following
    10393// templates have no limit but should be deprecated if the TypeBuilder ever supports n-ary functions.
    10494
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5395 r5401  
    380380
    381381    iBuilder->CreateRetVoid();
    382     pxDriver.JITcompileMain();
     382
    383383    pxDriver.linkAndFinalize();
    384384}
     
    477477   
    478478    iBuilder->CreateRetVoid();
    479     pxDriver.JITcompileMain();
     479
    480480    pxDriver.linkAndFinalize();
    481481}
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5394 r5401  
    185185   
    186186    iBuilder->CreateRetVoid();
    187    
    188     pxDriver.JITcompileMain();
     187
    189188    pxDriver.linkAndFinalize();
    190189}
Note: See TracChangeset for help on using the changeset viewer.