Changeset 5630


Ignore:
Timestamp:
Sep 7, 2017, 4:56:56 PM (2 weeks ago)
Author:
nmedfort
Message:

Partial check-in for avoidance of compiling Pablo/LLVM code to determine the Kernel struct type when using a cached object. Inactive RE alternation minimization check in.

Location:
icGREP/icgrep-devel/icgrep
Files:
3 added
29 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5599 r5630  
    8787add_library(PabloADT ${PABLO_SRC})
    8888add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/printer_re.cpp)
    89 add_library(RegExpCompiler re/re_parser.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/re_utility.cpp)
     89add_library(RegExpCompiler re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/re_utility.cpp)
    9090add_library(CCADT cc/cc_compiler.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/CaseFolding_txt.cpp cc/alphabet.cpp cc/multiplex_CCs.cpp)
    9191add_library(UCDlib UCD/unicode_set.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp UCD/UnicodeNameData.cpp)
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5624 r5630  
    201201    if (LLVM_UNLIKELY(printRegister == nullptr)) {
    202202        FunctionType *FT = FunctionType::get(getVoidTy(), { getInt8PtrTy(), int64Ty }, false);
    203         Function * function = Function::Create(FT, Function::InternalLinkage, "PrintInt", m);
     203        Function * function = Function::Create(FT, Function::ExternalLinkage, "PrintInt", m);
    204204        auto arg = function->arg_begin();
    205205        std::string out = "%-40s = %" PRIx64 "\n";
     
    561561
    562562PointerType * CBuilder::getVoidPtrTy() const {
    563     return TypeBuilder<void *, true>::get(getContext());
     563    return TypeBuilder<void *, false>::get(getContext());
    564564}
    565565
     
    10501050}
    10511051
    1052 #define CONCAT(a__, b__) a__##b__
    1053 #define STRINGIFY(a__) #a__
    1054 
    10551052#ifdef HAS_ADDRESS_SANITIZER
    10561053#define CHECK_ADDRESS_SANITIZER(Ptr, Name) \
     
    10701067    Value * check = CreateCall(isPoisoned, { addr, size }); \
    10711068    check = CreateICmpEQ(check, ConstantPointerNull::get(cast<PointerType>(isPoisoned->getReturnType()))); \
    1072     CreateAssert(check, STRINGIFY(CONCAT(Name, ": invalid memory address"))); \
     1069    CreateAssert(check, Name ": invalid memory address"); \
    10731070}
    10741071#else
     
    10781075#define CHECK_ADDRESS(Ptr, Name) \
    10791076    if (codegen::EnableAsserts) { \
    1080         CreateAssert(Ptr, STRINGIFY(CONCAT(Name, ": null pointer address"))); \
     1077        CreateAssert(Ptr, Name ": null pointer address"); \
    10811078        CHECK_ADDRESS_SANITIZER(Ptr, Name) \
    10821079    }
     
    11611158}
    11621159
    1163 CallInst * CBuilder::CreateMemMove(Value * Dst, Value * Src, Value *Size, unsigned Align, bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) {
     1160CallInst * CBuilder::CreateMemMove(Value * Dst, Value * Src, Value *Size, unsigned Align, bool isVolatile,
     1161                                   MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) {
    11641162    if (codegen::EnableAsserts) {
    11651163        DataLayout DL(getModule());
     
    11671165        Value * intDst = CreatePtrToInt(Dst, intPtrTy);
    11681166        Value * intSrc = CreatePtrToInt(Src, intPtrTy);
     1167        // If the call to this intrinisic has an alignment value that is not 0 or 1, then the caller
     1168        // guarantees that both the source and destination pointers are aligned to that boundary.
    11691169        if (Align > 1) {
    11701170            ConstantInt * align = ConstantInt::get(intPtrTy, Align);
     
    11741174    }
    11751175    return IRBuilder<>::CreateMemMove(Dst, Src, Size, Align, isVolatile, TBAATag, ScopeTag, NoAliasTag);
     1176}
     1177
     1178llvm::CallInst * CBuilder::CreateMemCpy(llvm::Value *Dst, llvm::Value *Src, llvm::Value *Size, unsigned Align, bool isVolatile,
     1179                                        llvm::MDNode *TBAATag, llvm::MDNode *TBAAStructTag, llvm::MDNode *ScopeTag, llvm::MDNode *NoAliasTag) {
     1180    if (codegen::EnableAsserts) {
     1181        DataLayout DL(getModule());
     1182        IntegerType * const intPtrTy = DL.getIntPtrType(getContext());
     1183        Value * intDst = CreatePtrToInt(Dst, intPtrTy);
     1184        Value * intSrc = CreatePtrToInt(Src, intPtrTy);
     1185        // If the call to this intrinisic has an alignment value that is not 0 or 1, then the caller
     1186        // guarantees that both the source and destination pointers are aligned to that boundary.
     1187        if (Align > 1) {
     1188            ConstantInt * align = ConstantInt::get(intPtrTy, Align);
     1189            CreateAssertZero(CreateURem(intDst, align), "CreateMemCpy: Dst pointer is misaligned");
     1190            CreateAssertZero(CreateURem(intSrc, align), "CreateMemCpy: Src pointer is misaligned");
     1191        }
     1192        Value * intSize = CreateZExtOrTrunc(Size, intSrc->getType());
     1193        Value * nonOverlapping = CreateOr(CreateICmpULT(CreateAdd(intSrc, intSize), intDst),
     1194                                          CreateICmpULT(CreateAdd(intDst, intSize), intSrc));
     1195        CreateAssert(nonOverlapping, "CreateMemCpy: overlapping ranges is undefined");
     1196    }
     1197    return IRBuilder<>::CreateMemCpy(Dst, Src, Size, Align, isVolatile, TBAATag, TBAAStructTag, ScopeTag, NoAliasTag);
    11761198}
    11771199
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5624 r5630  
    258258    llvm::StoreInst * CreateAlignedStore(llvm::Value * Val, llvm::Value * Ptr, unsigned Align, bool isVolatile = false);
    259259
     260    llvm::CallInst * CreateMemMove(llvm::Value *Dst, llvm::Value *Src, uint64_t Size, unsigned Align,
     261                            bool isVolatile = false, llvm::MDNode *TBAATag = nullptr,
     262                            llvm::MDNode *ScopeTag = nullptr,
     263                            llvm::MDNode *NoAliasTag = nullptr) {
     264        return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag, ScopeTag, NoAliasTag);
     265    }
     266
    260267    llvm::CallInst * CreateMemMove(llvm::Value *Dst, llvm::Value *Src, llvm::Value *Size, unsigned Align,
    261268                            bool isVolatile = false, llvm::MDNode *TBAATag = nullptr,
    262269                            llvm::MDNode *ScopeTag = nullptr,
    263270                            llvm::MDNode *NoAliasTag = nullptr);
     271
     272    llvm::CallInst * CreateMemCpy(llvm::Value *Dst, llvm::Value *Src, uint64_t Size, unsigned Align,
     273                           bool isVolatile = false, llvm::MDNode *TBAATag = nullptr,
     274                           llvm::MDNode *TBAAStructTag = nullptr,
     275                           llvm::MDNode *ScopeTag = nullptr,
     276                           llvm::MDNode *NoAliasTag = nullptr) {
     277        return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag, TBAAStructTag, ScopeTag, NoAliasTag);
     278    }
     279
     280    llvm::CallInst * CreateMemCpy(llvm::Value *Dst, llvm::Value *Src, llvm::Value *Size, unsigned Align,
     281                           bool isVolatile = false, llvm::MDNode *TBAATag = nullptr,
     282                           llvm::MDNode *TBAAStructTag = nullptr,
     283                           llvm::MDNode *ScopeTag = nullptr,
     284                           llvm::MDNode *NoAliasTag = nullptr);
     285
    264286
    265287    void setDriver(Driver * const driver) {
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_nvptx_builder.cpp

    r5486 r5630  
    210210  CreateCall(barrierFunc);
    211211
    212   Value * carryOffsetPtr = nullptr;
    213212  Value * carryVal = carryInitVal;
    214   Value * bubbleOffsetPtr = nullptr;
    215213  Value * bubbleVal = bubbleInitVal;
    216214
    217215  for (unsigned offset = groupThreads/2; offset>0; offset=offset>>1){
    218     carryOffsetPtr = CreateGEP(carry, {getInt32(0), CreateXor(id, getInt32(offset))});
     216    Value * carryOffsetPtr = CreateGEP(carry, {getInt32(0), CreateXor(id, getInt32(offset))});
    219217    carryVal = CreateOr(carryVal, CreateLoad(carryOffsetPtr));
    220218    CreateStore(carryVal, carryPtr);
    221     bubbleOffsetPtr = CreateGEP(bubble, {getInt32(0), CreateXor(id, getInt32(offset))});
     219    Value * bubbleOffsetPtr = CreateGEP(bubble, {getInt32(0), CreateXor(id, getInt32(offset))});
    222220    bubbleVal = CreateOr(bubbleVal, CreateLoad(bubbleOffsetPtr));
    223221    CreateStore(bubbleVal, bubblePtr);
  • icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.cpp

    r5490 r5630  
    4444
    4545
    46 void doMultiplexCCs(std::vector<UCD::UnicodeSet> CCs,
     46void doMultiplexCCs(const std::vector<UCD::UnicodeSet> & CCs,
    4747                    std::vector<std::vector<unsigned>> & exclusiveSetIDs,
    4848                    std::vector<UCD::UnicodeSet> & multiplexedCCs) {
  • icGREP/icgrep-devel/icgrep/cc/multiplex_CCs.h

    r5369 r5630  
    1010
    1111
    12 void doMultiplexCCs(std::vector<UCD::UnicodeSet> CCs,
     12void doMultiplexCCs(const std::vector<UCD::UnicodeSet> & CCs,
    1313                    std::vector<std::vector<unsigned>> & exclusiveSetIDs,
    1414                    std::vector<UCD::UnicodeSet> & multiplexedCCs);
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5603 r5630  
    685685    auto editdScanK = pxDriver.addKernelInstance(make_unique<editdScanKernel>(iBuilder, editDistance));
    686686    pxDriver.makeKernelCall(editdScanK, {MatchResults}, {});
    687        
     687    pxDriver.LinkFunction(*editdScanK, "wrapped_report_pos", &wrapped_report_pos);
    688688    pxDriver.generatePipelineIR();
    689689    pxDriver.deallocateBuffers();
    690690    iBuilder->CreateRetVoid();
    691691
    692     pxDriver.LinkFunction(*editdScanK, "wrapped_report_pos", &wrapped_report_pos);
    693692    pxDriver.finalizeObject();
    694693
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5620 r5630  
    291291wc.cpp
    292292CMakeLists.txt
     293segtok.cpp
     294re/re_minimizer.h
     295re/re_minimizer.cpp
     296re/re_memoizer.cpp
  • icGREP/icgrep-devel/icgrep/icgrep-devel.includes

    r5620 r5630  
    22../boost/include/
    33../libllvm/include/
     4re
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5623 r5630  
    111111}
    112112
    113 Module * Kernel::makeModule(const std::unique_ptr<KernelBuilder> & idb) {
    114     assert (mModule == nullptr);
    115     std::stringstream cacheName;   
     113std::string Kernel::getCacheName(const std::unique_ptr<KernelBuilder> & idb) const {
     114    std::stringstream cacheName;
    116115    cacheName << getName() << '_' << idb->getBuilderUniqueName();
    117116    for (const StreamSetBuffer * b: mStreamSetInputBuffers) {
     
    121120        cacheName <<  ':' <<  b->getUniqueID();
    122121    }
    123     mModule = new Module(cacheName.str(), idb->getContext());
    124     prepareKernel(idb);
     122    return cacheName.str();
     123}
     124
     125Module * Kernel::setModule(Module * const module) {
     126    assert (mModule == nullptr || mModule == module);
     127    assert (module != nullptr);
     128    mModule = module;
    125129    return mModule;
    126130}
    127131
    128 Module * Kernel::setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module) {
    129     assert (mModule == nullptr);
    130     mModule = module;
    131     prepareKernel(idb);
    132     return mModule;
     132Module * Kernel::makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb) {
     133    return setModule(new Module(getCacheName(idb), idb->getContext()));
    133134}
    134135
     
    145146    const auto requiredBlocks = codegen::SegmentSize + ((blockSize + mLookAheadPositions - 1) / blockSize);
    146147
     148    IntegerType * const sizeTy = idb->getSizeTy();
     149
    147150    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    148151        if ((mStreamSetInputBuffers[i]->getBufferBlocks() != 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < requiredBlocks)) {
     
    151154        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getStreamSetHandle()->getType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
    152155        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
    153             addScalar(idb->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
    154         }
    155     }
    156 
    157     IntegerType * const sizeTy = idb->getSizeTy();
     156            addScalar(sizeTy, mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
     157        }
     158    }
     159
    158160    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    159161        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getStreamSetHandle()->getType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
     
    191193    // will be able to add instrumentation to cached modules without recompilation.
    192194    addScalar(idb->getInt64Ty(), CYCLECOUNT_SCALAR);
     195    addInternalKernelProperties(idb);
    193196    // NOTE: StructType::create always creates a new type even if an identical one exists.
    194     mKernelStateType = getModule()->getTypeByName(getName());
     197    if (LLVM_UNLIKELY(mModule == nullptr)) {
     198        setModule(new Module(getCacheName(idb), idb->getContext()));
     199    }
     200    mKernelStateType = mModule->getTypeByName(getName());
    195201    if (LLVM_LIKELY(mKernelStateType == nullptr)) {
    196202        mKernelStateType = StructType::create(idb->getContext(), mKernelFields, getName());
     
    198204    processingRateAnalysis();
    199205}
    200    
     206
     207void Kernel::prepareCachedKernel(const std::unique_ptr<KernelBuilder> & idb) {
     208
     209    assert ("KernelBuilder does not have a valid IDISA Builder" && idb);
     210    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
     211        report_fatal_error("Cannot prepare kernel after kernel state finalized");
     212    }
     213    assert (getModule());
     214    const auto blockSize = idb->getBitBlockWidth();
     215    if (mStride == 0) {
     216        // Set the default kernel stride.
     217        mStride = blockSize;
     218    }
     219    const auto requiredBlocks = codegen::SegmentSize + ((blockSize + mLookAheadPositions - 1) / blockSize);
     220
     221    IntegerType * const sizeTy = idb->getSizeTy();
     222    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     223        if ((mStreamSetInputBuffers[i]->getBufferBlocks() != 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < requiredBlocks)) {
     224            //report_fatal_error(getName() + ": " + mStreamSetInputs[i].name + " requires buffer size " + std::to_string(requiredBlocks));
     225        }
     226        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getStreamSetHandle()->getType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
     227        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
     228            addScalar(sizeTy, mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
     229        }
     230    }
     231
     232    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     233        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getStreamSetHandle()->getType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
     234        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
     235            addScalar(sizeTy, mStreamSetOutputs[i].name + PRODUCED_ITEM_COUNT_SUFFIX);
     236        }
     237    }
     238    for (const auto & binding : mScalarInputs) {
     239        addScalar(binding.type, binding.name);
     240    }
     241    for (const auto & binding : mScalarOutputs) {
     242        addScalar(binding.type, binding.name);
     243    }
     244    if (mStreamMap.empty()) {
     245        prepareStreamSetNameMap();
     246    }
     247    for (const auto & binding : mInternalScalars) {
     248        addScalar(binding.type, binding.name);
     249    }
     250
     251    Type * const consumerSetTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
     252    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     253        addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_SUFFIX);
     254    }
     255
     256    addScalar(sizeTy, LOGICAL_SEGMENT_NO_SCALAR);
     257    addScalar(idb->getInt1Ty(), TERMINATION_SIGNAL);
     258
     259    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     260        addScalar(sizeTy, mStreamSetOutputs[i].name + CONSUMED_ITEM_COUNT_SUFFIX);
     261    }
     262
     263    // We compile in a 64-bit CPU cycle counter into every kernel.   It will remain unused
     264    // in normal execution, but when codegen::EnableCycleCounter is specified, pipelines
     265    // will be able to add instrumentation to cached modules without recompilation.
     266    addScalar(idb->getInt64Ty(), CYCLECOUNT_SCALAR);
     267    mKernelStateType = getModule()->getTypeByName(getName());
     268    assert (mKernelStateType);
     269    processingRateAnalysis();
     270}
    201271   
    202272void Kernel::processingRateAnalysis() {
     
    290360        const auto m = idb->getModule();
    291361        const auto ip = idb->saveIP();
    292         const auto saveInstance = getInstance();
     362        // const auto saveInstance = getInstance();
    293363        idb->setModule(mModule);
    294364        addKernelDeclarations(idb);
     
    296366        callGenerateDoSegmentMethod(idb);
    297367        callGenerateFinalizeMethod(idb);
    298         setInstance(saveInstance);
     368        // setInstance(saveInstance);
    299369        idb->setModule(m);
    300370        idb->restoreIP(ip);
     
    812882
    813883    Value * blockBaseMask = kb->CreateNot(kb->getSize(kb->getBitBlockWidth() - 1));
    814     //
    815     // Define and allocate the temporary buffer area.
    816     //
    817     Type * tempBuffers[totalSetCount];
    818     for (unsigned i = 0; i < totalSetCount; i++) {
    819         unsigned blocks = maxBlocksToCopy[i];
    820         Type * bufType = i < inputSetCount ? mStreamSetInputBuffers[i]->getStreamSetBlockType() : mStreamSetOutputBuffers[i -inputSetCount]->getStreamSetBlockType();
    821         if (blocks > 1) {
    822             tempBuffers[i] = ArrayType::get(bufType, blocks);
    823         }
    824         else {
    825             tempBuffers[i] = bufType;
    826         }
    827     }
    828     Type * tempParameterStructType = StructType::create(kb->getContext(), ArrayRef<Type *>(tempBuffers, totalSetCount), "tempBuf");
    829     Value * tempParameterArea = kb->CreateCacheAlignedAlloca(tempParameterStructType);
    830884    ConstantInt * blockSize = kb->getSize(kb->getBitBlockWidth());
    831885    ConstantInt * strideSize = kb->getSize(mStride);
     
    866920    // buffer block containing the next item, and the number of linearly available items.
    867921
    868     std::vector<Value *> processedItemCount;
    869     std::vector<Value *> inputBlockPtr;
     922    Value * processedItemCount[inputSetCount];
     923    Value * inputBlockPtr[inputSetCount];
    870924    std::vector<Value *> producedItemCount;
    871925    std::vector<Value *> outputBlockPtr;
     
    873927    //  Now determine the linearly available blocks, based on blocks remaining reduced
    874928    //  by limitations of linearly available input buffer space.
    875 
    876929    Value * linearlyAvailStrides = stridesRemaining;
    877930    for (unsigned i = 0; i < inputSetCount; i++) {
     
    879932        Value * blkNo = kb->CreateUDiv(p, blockSize);
    880933        Value * b = kb->getInputStreamBlockPtr(mStreamSetInputs[i].name, kb->getInt32(0));
    881         processedItemCount.push_back(p);
    882         inputBlockPtr.push_back(b);
     934        // processedItemCount.push_back(p);
     935        processedItemCount[i] = p;
     936        // inputBlockPtr.push_back(b);
     937        inputBlockPtr[i] = b;
    883938        auto & rate = mStreamSetInputs[i].rate;
    884939        if (rate.isUnknownRate()) continue;  // No calculation possible for unknown rates.
     
    894949        linearlyAvailStrides = kb->CreateSelect(kb->CreateICmpULT(maxStrides, linearlyAvailStrides), maxStrides, linearlyAvailStrides);
    895950    }
     951
    896952    //  Now determine the linearly writeable blocks, based on available blocks reduced
    897953    //  by limitations of output buffer space.
     
    915971        linearlyWritableStrides = kb->CreateSelect(kb->CreateICmpULT(maxStrides, linearlyWritableStrides), maxStrides, linearlyWritableStrides);
    916972    }
    917     Value * haveStrides = kb->CreateICmpUGT(linearlyWritableStrides, kb->getSize(0));
    918     kb->CreateCondBr(haveStrides, doMultiBlockCall, tempBlockCheck);
     973    Value * const haveFullStrides = kb->CreateICmpUGT(linearlyWritableStrides, kb->getSize(0));
     974    kb->CreateCondBr(haveFullStrides, doMultiBlockCall, tempBlockCheck);
    919975
    920976    //  At this point we have verified the availability of one or more blocks of input data and output buffer space for all stream sets.
     
    9441000
    9451001    kb->CreateCall(multiBlockFunction, doMultiBlockArgs);
     1002
    9461003    // Do copybacks if necessary.
    9471004    unsigned priorIdx = 0;
    948     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    949         Value * log2BlockSize = kb->getSize(std::log2(kb->getBitBlockWidth()));
     1005    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {       
    9501006        if (auto cb = dyn_cast<SwizzledCopybackBuffer>(mStreamSetOutputBuffers[i]))  {
     1007            Value * log2BlockSize = kb->getSize(std::log2(kb->getBitBlockWidth()));
    9511008            BasicBlock * copyBack = kb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBack");
    9521009            BasicBlock * done = kb->CreateBasicBlock(mStreamSetOutputs[i].name + "_copyBackDone");
     
    9861043    kb->setProcessedItemCount(mStreamSetInputs[0].name, nowProcessed);
    9871044    Value * reducedStridesToDo = kb->CreateSub(stridesRemaining, linearlyWritableStrides);
    988     BasicBlock * multiBlockFinal = kb->GetInsertBlock();
    989     stridesRemaining->addIncoming(reducedStridesToDo, multiBlockFinal);
     1045    stridesRemaining->addIncoming(reducedStridesToDo, kb->GetInsertBlock());
    9901046    kb->CreateBr(doSegmentOuterLoop);
     1047
     1048
    9911049    //
    9921050    // We use temporary buffers in 3 different cases that preclude full block processing.
     
    9991057
    10001058    kb->SetInsertPoint(tempBlockCheck);
    1001     haveStrides = kb->CreateICmpUGT(stridesRemaining, kb->getSize(0));
     1059    Value * const haveStrides = kb->CreateICmpUGT(stridesRemaining, kb->getSize(0));
    10021060    kb->CreateCondBr(kb->CreateOr(mIsFinal, haveStrides), doTempBufferBlock, segmentDone);
    10031061
     
    10171075        }
    10181076    }
     1077    //
     1078    // Define and allocate the temporary buffer area.
     1079    //
     1080    Type * tempBuffers[totalSetCount];
     1081    for (unsigned i = 0; i < inputSetCount; ++i) {
     1082        Type * bufType = mStreamSetInputBuffers[i]->getStreamSetBlockType();
     1083        tempBuffers[i] = ArrayType::get(bufType, maxBlocksToCopy[i]);
     1084    }
     1085    for (unsigned i = 0; i < outputSetCount; i++) {
     1086        Type * bufType = mStreamSetOutputBuffers[i]->getStreamSetBlockType();
     1087        tempBuffers[i + inputSetCount] = ArrayType::get(bufType, maxBlocksToCopy[i + inputSetCount]);
     1088    }
     1089    Type * tempParameterStructType = StructType::create(kb->getContext(), ArrayRef<Type *>(tempBuffers, totalSetCount), "tempBuf");
    10191090    // Prepare the temporary buffer area.
    1020     //
    1021     // First zero it out.
    1022     Constant * const tempAreaSize = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(tempParameterStructType), kb->getSizeTy(), false);
    1023     kb->CreateMemZero(tempParameterArea, tempAreaSize);
    1024     // For each input and output buffer, copy over necessary data starting from the last
    1025     // block boundary.
     1091    Value * tempParameterArea = kb->CreateCacheAlignedAlloca(tempParameterStructType);
     1092    kb->CreateMemZero(tempParameterArea, ConstantExpr::getSizeOf(tempParameterStructType));
     1093    // For each input and output buffer, copy over necessary data starting from the last block boundary.
    10261094    Value * itemCountNeeded[inputSetCount];
    10271095    itemCountNeeded[0] = tempBlockItems;
    10281096    Value * finalItemCountNeeded[inputSetCount];
    10291097
    1030     for (unsigned i = 0; i < mStreamSetInputBuffers.size(); i++) {
     1098    for (unsigned i = 0; i < inputSetCount; i++) {
    10311099        Type * bufPtrType = mStreamSetInputBuffers[i]->getPointerType();
    10321100        if (mItemsPerStride[i] != 0) {
     
    10351103            ConstantInt * strideItems = kb->getSize(mItemsPerStride[i]);
    10361104            Value * strideBasePos = kb->CreateSub(processedItemCount[i], kb->CreateURem(processedItemCount[i], strideItems));
    1037             Value * blockBasePos = (mItemsPerStride[i] % bitBlockWidth == 0) ? strideBasePos : kb->CreateAnd(strideBasePos, blockBaseMask);
     1105            Value * blockBasePos = strideBasePos;
     1106            if (mItemsPerStride[i] & (bitBlockWidth - 1)) {
     1107                blockBasePos = kb->CreateAnd(strideBasePos, blockBaseMask);
     1108            }
    10381109
    10391110            // The number of items to copy is determined by the processing rate requirements.
     
    10771148            }
    10781149            tempArgs.push_back(tempBufPtr);
    1079         }
    1080         else {
     1150        } else {
    10811151            Value * bufPtr = kb->getInputStreamBlockPtr(mStreamSetInputs[i].name, kb->getInt32(0));
    10821152            bufPtr = kb->CreatePointerCast(bufPtr, mStreamSetInputBuffers[i]->getPointerType());
     
    10851155    }
    10861156    Value * outputBasePos[outputSetCount];
    1087     for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
    1088         Value * tempBufPtr = kb->CreateGEP(tempParameterArea,  {kb->getInt32(0), kb->getInt32(mStreamSetInputs.size() + i)});
     1157    for (unsigned i = 0; i < outputSetCount; i++) {
     1158        Value * tempBufPtr = kb->CreateGEP(tempParameterArea,  {kb->getInt32(0), kb->getInt32(inputSetCount + i)});
    10891159        Type * bufPtrType = mStreamSetOutputBuffers[i]->getPointerType();
    10901160        tempBufPtr = kb->CreatePointerCast(tempBufPtr, bufPtrType);
     
    11341204    }
    11351205
    1136 
    11371206    //  We've dealt with the partial block processing and copied information back into the
    11381207    //  actual buffers.  If this isn't the final block, loop back for more multiblock processing.
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5615 r5630  
    8888
    8989    StreamPort getStreamPort(const std::string & name) const;
    90    
    91     llvm::Module * makeModule(const std::unique_ptr<KernelBuilder> & idb);
    92 
    93     llvm::Module * setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module);
     90
     91    llvm::Module * setModule(llvm::Module * const module);
     92
     93    llvm::Module * makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb);
    9494
    9595    llvm::Module * getModule() const {
     
    137137    virtual ~Kernel() = 0;
    138138
    139 protected:
     139    void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
     140
     141    void prepareCachedKernel(const std::unique_ptr<KernelBuilder> & idb);
     142
     143    std::string getCacheName(const std::unique_ptr<KernelBuilder> & idb) const;
     144
     145protected:
     146
     147    virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> & idb) { }
    140148
    141149    // Constructor
     
    168176
    169177    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
    170 
    171     virtual void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
    172178
    173179    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
  • icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp

    r5440 r5630  
    7878            outputBufferBasePtr,
    7979            iBuilder->CreateGEP(inputBufferBasePtr, iBuilder->CreateAdd(literalStart, copyLength1)),
    80             iBuilder->CreateSub(literalLength, copyLength1), 8);        // Buffer start is aligned.
     80            iBuilder->CreateSub(literalLength, copyLength1), 1); // Buffer start is aligned.
     81    // NOTE: Test case reported non-8-byte alignment
    8182    outputItems = iBuilder->CreateAdd(outputItems, literalLength);
    8283
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5526 r5630  
    5555    /* self = */ args++;
    5656    Value * itemsToDo = &*(args++);
    57     Value * inputStreamAvail = &*(args++);
     57    /* inputStreamAvail = */ args++;
    5858    Value * match_result = &*(args++);
    5959    Value * line_break = &*(args++);
    60     Value * input_stream = &*(args);
     60    /* input_stream = */ args++;
    6161
    6262    Value * blocksToDo = iBuilder->CreateUDiv(iBuilder->CreateAdd(itemsToDo, blockSizeLess1), blockSize);
  • icGREP/icgrep-devel/icgrep/pablo/carry_data.h

    r5366 r5630  
    1111
    1212class CarryData {
    13     friend class CarryManager;
    1413public:
    1514
    16     enum SummaryType : int {
    17         NoSummary
    18         , ImplicitSummary
    19         , BorrowedSummary
    20         , ExplicitSummary
     15    enum SummaryType : unsigned {
     16        NoSummary = 0
     17        , ImplicitSummary = 1
     18        , BorrowedSummary = 2
     19        , ExplicitSummary = 3
     20        , NonCarryCollapsingMode = 4
    2121    };
    2222
    2323    CarryData()
    24     : mSummaryType(NoSummary)
    25     , mInNonCollapsingCarryMode(false) {
     24    : mSummaryType(NoSummary) {
    2625
    2726    }
    2827             
    2928    bool hasSummary() const {
    30         return (mSummaryType != NoSummary);
     29        return (mSummaryType & (ImplicitSummary | BorrowedSummary | ExplicitSummary)) != NoSummary;
    3130    }
    3231   
    3332    bool hasImplicitSummary() const {
    34         return (mSummaryType == ImplicitSummary);
     33        return (mSummaryType & (ImplicitSummary | BorrowedSummary | ExplicitSummary)) == ImplicitSummary;
    3534    }
    3635
    3736    bool hasBorrowedSummary() const {
    38         return (mSummaryType == BorrowedSummary);
     37        return (mSummaryType & (ImplicitSummary | BorrowedSummary | ExplicitSummary)) == BorrowedSummary;
    3938    }
    4039
    4140    bool hasExplicitSummary() const {
    42         return (mSummaryType == ExplicitSummary);
     41        return (mSummaryType & (ImplicitSummary | BorrowedSummary | ExplicitSummary)) == ExplicitSummary;
    4342    }
    4443
    4544    bool nonCarryCollapsingMode() const {
    46         return mInNonCollapsingCarryMode;
     45        return (mSummaryType & (NonCarryCollapsingMode)) != 0;
    4746    }
    4847
     
    5251
    5352    void setNonCollapsingCarryMode(const bool value = true) {
    54         mInNonCollapsingCarryMode = value;
     53        if (value) {
     54            mSummaryType = (SummaryType)(mSummaryType | NonCarryCollapsingMode);
     55        } else {
     56            mSummaryType = (SummaryType)(mSummaryType & ~NonCarryCollapsingMode);
     57        }
    5558    }
    5659   
     
    5861
    5962    SummaryType     mSummaryType;
    60     bool            mInNonCollapsingCarryMode;
    6163
    6264};
  • icGREP/icgrep-devel/icgrep/pablo/carrypack_manager.cpp

    r5510 r5630  
    127127    mCarryGroup.resize(assignDefaultCarryGroups(kernel->getEntryBlock()));
    128128
    129     Type * const carryStateTy = analyse(iBuilder, kernel->getEntryBlock());
    130 
    131     kernel->addScalar(carryStateTy, "carries");
    132 
    133 //    iBuilder->CallPrintInt("carry state size:", ConstantExpr::getSizeOf(carryStateTy));
     129    kernel->setCarryDataTy(analyse(iBuilder, mCurrentScope));
     130
     131    kernel->addScalar(kernel->getCarryDataTy(), "carries");
    134132
    135133    if (mHasLoop) {
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5620 r5630  
    125125}
    126126
    127 void PabloKernel::prepareKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     127void PabloKernel::addInternalKernelProperties(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    128128    mSizeTy = iBuilder->getSizeTy();
    129129    mStreamTy = iBuilder->getStreamTy();
     
    132132    mPabloCompiler->initializeKernelData(iBuilder);
    133133    mSizeTy = nullptr;
    134     mStreamTy = nullptr;
    135     BlockOrientedKernel::prepareKernel(iBuilder);
     134    mStreamTy = nullptr;   
    136135}
    137136
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.h

    r5510 r5630  
    3131    friend class CarryManager;
    3232    friend class CarryPackManager;
     33    friend class ParabixObjectCache;
    3334
    3435public:
     
    124125    Integer * getInteger(const int64_t value) const;
    125126
     127    llvm::StructType * getCarryDataTy() const {
     128        return mCarryDataTy;
     129    }
     130
    126131protected:
    127132
     
    145150    llvm::IntegerType * getInt1Ty() const;
    146151
    147 private:
     152    void setCarryDataTy(llvm::StructType * const carryDataTy) {
     153        mCarryDataTy = carryDataTy;
     154    }
    148155
    149156    // A custom method for preparing kernel declarations is needed,
    150157    // so that the carry data requirements may be accommodated before
    151158    // finalizing the KernelStateType.
    152     void prepareKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) final;
     159    void addInternalKernelProperties(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) final;
     160
     161private:
    153162
    154163    void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) final;
     
    169178    llvm::IntegerType *             mSizeTy;
    170179    llvm::VectorType *              mStreamTy;
     180    llvm::StructType *              mCarryDataTy;
    171181    std::vector<Var *>              mInputs;
    172182    std::vector<Var *>              mOutputs;
  • icGREP/icgrep-devel/icgrep/re/re_memoizer.hpp

    r5267 r5630  
    1 #ifndef RE_NAMEDICTIONARY_H
    2 #define RE_NAMEDICTIONARY_H
     1#ifndef RE_MEMOIZER_H
     2#define RE_MEMOIZER_H
    33
    44#include <re/re_name.h>
     
    88
    99struct MemoizerComparator {
    10     inline bool operator() (const RE * lh, const RE * rh) const{
    11         if (LLVM_LIKELY(llvm::isa<Name>(lh) && llvm::isa<Name>(rh))) {
    12             return *llvm::cast<Name>(lh) < *llvm::cast<Name>(rh);
    13         } else if (llvm::isa<Name>(lh)) {
    14             return *llvm::cast<Name>(lh) < *llvm::cast<CC>(rh);
    15         }
    16         return *llvm::cast<Name>(rh) > *llvm::cast<CC>(lh);
    17     }
     10    bool operator() (const RE * lh, const RE * rh) const;
    1811};
    1912
    2013struct Memoizer : public std::set<RE *, MemoizerComparator> {
    2114
    22     inline Name * memoize(CC * cc) {
     15    RE * memoize(RE * const re) {
     16        return *(insert(re).first);
     17    }
     18
     19    Name * memoize(Name * const name) {
     20        return llvm::cast<Name>(memoize(llvm::cast<RE>(name)));
     21    }
     22
     23    Name * memoize(CC * const cc) {
    2324        auto f = find(cc);
    2425        if (f != end()) {
    2526            return llvm::cast<Name>(*f);
    2627        } else {
    27             Name * name = makeName(cc);
    28             insert(name);
    29             return name;
     28            return memoize(makeName(cc));
    3029        }
    31     }
    32 
    33     inline Name * memoize(Name * name) {
    34         return llvm::cast<Name>(*insert(name).first);
    3530    }
    3631};
     
    3833}
    3934
    40 #endif // RE_NAMEDICTIONARY_H
     35#endif // RE_MEMOIZER_H
  • icGREP/icgrep-devel/icgrep/re/re_name.h

    r5558 r5630  
    127127        return *llvm::cast<CC>(mDefinition) < other;
    128128    }
    129     return false;
     129    return RE::ClassTypeId::Name < RE::ClassTypeId::CC;
    130130}
    131131
     
    134134        return other < *llvm::cast<CC>(mDefinition);
    135135    }
    136     return true;
     136    return RE::ClassTypeId::CC < RE::ClassTypeId::Name;
    137137}
    138138
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5620 r5630  
    149149    RE * re = nullptr;
    150150    if (fModeFlagSet & IGNORE_SPACE_MODE_FLAG) {
    151         while (*mCursor == ' ') mCursor++;
     151        while (mCursor.more() && *mCursor == ' ') {
     152            ++mCursor;
     153        }
    152154    }
    153155    if (mCursor.more()) {
     
    190192                    re = makeSeq({re, makeZeroWidth("GCB")});
    191193                }
    192                 return re;
     194                break;
    193195            case '.': // the 'any' metacharacter
    194196                mCursor++;
     
    202204                    fGraphemeBoundaryPending = true;
    203205                }
    204                 return re;
    205         }
    206     }
    207     return nullptr;
     206        }
     207    }
     208    return re;
    208209}
    209210
  • icGREP/icgrep-devel/icgrep/re/re_re.h

    r5267 r5630  
    2626        , Intersect
    2727        , Name
    28         , Permute
     28        // , Permute
    2929        , Rep
    3030        , Seq
    3131        , Start
    32         , SymDiff
    33         , Union
     32        // , SymDiff
     33        // , Union
    3434    };
    3535    inline ClassTypeId getClassTypeId() const {
     
    5252class Vector : public RE, public std::vector<RE*, RE::VectorAllocator> {
    5353public:
     54    static inline bool classof(const RE * re) {
     55        const auto typeId = re->getClassTypeId();
     56        return typeId == ClassTypeId::Alt || typeId == ClassTypeId::Seq;
     57    }
     58    static inline bool classof(const void *) {
     59        return false;
     60    }
    5461    virtual ~Vector() {}
    5562protected:
  • icGREP/icgrep-devel/icgrep/re/re_reverse.cpp

    r5493 r5630  
    5656    } else if (Name * n = dyn_cast<Name>(re)) {
    5757        switch (n->getType()) {
    58             case Name::Type::Byte:  return makeName(cast<CC>(n->getDefinition()));
    59             case Name::Type::Unicode:  return makeName(cast<CC>(n->getDefinition()));
     58            case Name::Type::Byte:
     59            case Name::Type::Unicode:
     60                return makeName(cast<CC>(n->getDefinition()));
    6061            case Name::Type::UnicodeProperty:
    6162                return makeName(n->getNamespace(), n->getName(), Name::Type::UnicodeProperty);
  • icGREP/icgrep-devel/icgrep/re/re_simplifier.cpp

    r5493 r5630  
    11#include "re_simplifier.h"
    22#include <re/re_name.h>
    3 #include <re/re_any.h>
    4 #include <re/re_start.h>
    5 #include <re/re_end.h>
    63#include <re/re_alt.h>
    7 #include <re/re_cc.h>
    84#include <re/re_seq.h>
    95#include <re/re_rep.h>
     
    117#include <re/re_intersect.h>
    128#include <re/re_assertion.h>
    13 #include <re/re_analysis.h>
    14 #include <algorithm>
    15 #include <memory>
    16 #include <queue>
     9#include <re/re_memoizer.hpp>
     10#include <boost/container/flat_set.hpp>
    1711
    1812using namespace llvm;
     
    2014namespace re {
    2115
     16struct PassContainer {
     17    RE * simplify(RE * re) {
     18        if (Alt * alt = dyn_cast<Alt>(re)) {
     19            boost::container::flat_set<RE *> list;
     20            list.reserve(alt->size());
     21            for (RE * item : *alt) {
     22                item = simplify(item);
     23                if (LLVM_UNLIKELY(isa<Vector>(item) && cast<Vector>(item)->empty())) {
     24                    continue;
     25                }
     26                list.insert(item);
     27            }
     28            re = makeAlt(list.begin(), list.end());
     29        } else if (Seq * seq = dyn_cast<Seq>(re)) {
     30            std::vector<RE *> list;
     31            list.reserve(seq->size());
     32            for (RE * item : *seq) {
     33                item = simplify(item);
     34                if (LLVM_UNLIKELY(isa<Vector>(item) && cast<Vector>(item)->empty())) {
     35                    continue;
     36                }
     37                list.push_back(item);
     38            }
     39            re = makeSeq(list.begin(), list.end());
     40        } else if (Assertion * a = dyn_cast<Assertion>(re)) {
     41            re = makeAssertion(simplify(a->getAsserted()), a->getKind(), a->getSense());
     42        } else if (Rep * rep = dyn_cast<Rep>(re)) {
     43            RE * expr = simplify(rep->getRE());
     44            re = makeRep(expr, rep->getLB(), rep->getUB());
     45        } else if (Diff * diff = dyn_cast<Diff>(re)) {
     46            re = makeDiff(simplify(diff->getLH()), simplify(diff->getRH()));
     47        } else if (Intersect * e = dyn_cast<Intersect>(re)) {
     48            re = makeIntersect(simplify(e->getLH()), simplify(e->getRH()));
     49        }
     50        return mMemoizer.memoize(re);
     51    }
     52private:
     53    Memoizer mMemoizer;
     54};
     55
    2256RE * RE_Simplifier::simplify(RE * re) {
    23     if (Alt * alt = dyn_cast<Alt>(re)) {
    24         std::vector<RE *> list;
    25         list.reserve(alt->size());
    26         for (RE * re : *alt) {
    27             list.push_back(simplify(re));
    28         }
    29         re = makeAlt(list.begin(), list.end());
    30     } else if (Seq * seq = dyn_cast<Seq>(re)) {
    31         std::vector<RE *> list;
    32         list.reserve(seq->size());
    33         for (RE * re : *seq) {
    34             list.push_back(simplify(re));
    35         }
    36         re = makeSeq(list.begin(), list.end());
    37     } else if (Assertion * a = dyn_cast<Assertion>(re)) {
    38         re = makeAssertion(simplify(a->getAsserted()), a->getKind(), a->getSense());
    39     } else if (Rep * rep = dyn_cast<Rep>(re)) {
    40         RE * expr = simplify(rep->getRE());
    41         re = makeRep(expr, rep->getLB(), rep->getUB());
    42     } else if (Diff * diff = dyn_cast<Diff>(re)) {
    43         re = makeDiff(simplify(diff->getLH()), simplify(diff->getRH()));
    44     } else if (Intersect * e = dyn_cast<Intersect>(re)) {
    45         re = makeIntersect(simplify(e->getLH()), simplify(e->getRH()));
    46     }
    47     return re;
     57    PassContainer pc;
     58    return pc.simplify(re);
    4859}
    4960
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp

    r5620 r5630  
    1212#include <re/re_star_normal.h>         // for RE_Star_Normal
    1313#include <re/re_simplifier.h>          // for RE_Simplifier
     14#include <re/re_minimizer.h>
    1415#include <re/re_local.h>
    1516#include <re/printer_re.h>
     
    6364
    6465    //Optimization passes to simplify the AST.
    65     re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);
     66    re_ast = RE_Nullable::removeNullablePrefix(re_ast);
    6667    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
    6768        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    6869    }
    69     re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);
     70    re_ast = RE_Nullable::removeNullableSuffix(re_ast);
    7071    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
    7172        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    7273    }
    73     re_ast = re::RE_Nullable::removeNullableAssertion(re_ast);
     74    re_ast = RE_Nullable::removeNullableAssertion(re_ast);
    7475    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
    7576        std::cerr << "RemoveNullableAssertion:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    7677    }
    77     //re_ast = re::RE_Nullable::removeNullableAfterAssertion(re_ast);
     78    //re_ast = RE_Nullable::removeNullableAfterAssertion(re_ast);
    7879    //if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
    7980    //    std::cerr << "RemoveNullableAfterAssertion" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    8081    //}
    8182   
    82     re_ast = re::RE_Simplifier::simplify(re_ast);
     83    // re_ast = RE_Minimizer::minimize(re_ast);
     84
     85    re_ast = RE_Simplifier::simplify(re_ast);
     86
    8387    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
    8488        //Print to the terminal the AST that was generated by the simplifier.
     
    8690    }
    8791
    88     re_ast = re::RE_Star_Normal::star_normal(re_ast);
     92    re_ast = RE_Star_Normal::star_normal(re_ast);
     93
    8994    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
    9095        //Print to the terminal the AST that was transformed to the star normal form.
    9196        std::cerr << "Star_Normal_Form:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    9297    }   
     98
    9399    return re_ast;
    94100}
     
    98104    bool local = RE_Local::isLocalLanguage(re_ast) && isTypeForLocal(re_ast);
    99105    cc::CC_Compiler cc_compiler(kernel, basis);
    100     re::RE_Compiler re_compiler(kernel, cc_compiler, local);
     106    RE_Compiler re_compiler(kernel, cc_compiler, local);
    101107    re_compiler.compileUnicodeNames(re_ast);
    102108    re_compiler.compile(re_ast);
  • icGREP/icgrep-devel/icgrep/toolchain/NVPTXDriver.cpp

    r5474 r5630  
    5454    mPipeline.emplace_back(kb);
    5555    kb->bindPorts(inputs, outputs);
    56     kb->setModule(iBuilder, mMainModule);
     56    kb->setModule(mMainModule);
    5757}
    5858
  • icGREP/icgrep-devel/icgrep/toolchain/cpudriver.cpp

    r5616 r5630  
    7070        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    7171    }
    72     mTarget = builder.selectTarget();
     72    mTarget = builder.selectTarget();   
    7373    if (LLVM_LIKELY(codegen::EnableObjectCache)) {
    7474        if (codegen::ObjectCacheDir) {
     
    8080    }
    8181    mMainModule->setTargetTriple(mTarget->getTargetTriple().getTriple());
    82 
    8382    iBuilder.reset(IDISA::GetIDISA_Builder(*mContext));
    8483    iBuilder->setDriver(this);
     
    8685}
    8786
    88 void ParabixDriver::makeKernelCall(Kernel * kb, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
    89     assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
    90     mPipeline.emplace_back(kb);
    91     kb->bindPorts(inputs, outputs);
    92     kb->makeModule(iBuilder);
     87void ParabixDriver::makeKernelCall(Kernel * kernel, const std::vector<StreamSetBuffer *> & inputs, const std::vector<StreamSetBuffer *> & outputs) {
     88    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kernel->getModule() == nullptr));
     89    mPipeline.emplace_back(kernel);
     90    kernel->bindPorts(inputs, outputs);
     91    if (!mCache || !mCache->loadCachedObjectFile(iBuilder, kernel)) {
     92        mUncachedKernel.push_back(kernel);
     93    }
     94    if (kernel->getModule() == nullptr) {
     95        kernel->makeModule(iBuilder);
     96    }
     97    assert (kernel->getModule());
    9398}
    9499
    95100void ParabixDriver::generatePipelineIR() {
    96     #ifndef NDEBUG
     101
    97102    if (LLVM_UNLIKELY(mPipeline.empty())) {
    98103        report_fatal_error("Pipeline cannot be empty");
     
    101106            for (auto j = i; ++j != mPipeline.end(); ) {
    102107                if (LLVM_UNLIKELY(*i == *j)) {
    103                     report_fatal_error("Kernel instances cannot occur twice in the pipeline");
     108                    report_fatal_error("Kernel " + (*i)->getName() + " occurs twice in the pipeline");
    104109                }
    105110            }
    106111        }
    107112    }
    108     #endif
     113
     114    for (Kernel * const kernel : mUncachedKernel) {
     115        kernel->prepareKernel(iBuilder);
     116    }
     117
    109118    // note: instantiation of all kernels must occur prior to initialization
    110119    for (const auto & k : mPipeline) {
     
    130139
    131140Function * ParabixDriver::addLinkFunction(Module * mod, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
    132     assert ("addKernelCall or makeKernelCall must be called before LinkFunction" && (mod != nullptr));
     141    if (LLVM_UNLIKELY(mod == nullptr)) {
     142        report_fatal_error("addLinkFunction(" + name + ") cannot be called until after addKernelCall or makeKernelCall");
     143    }
    133144    Function * f = mod->getFunction(name);
    134145    if (LLVM_UNLIKELY(f == nullptr)) {
    135146        f = Function::Create(type, Function::ExternalLinkage, name, mod);
    136         mEngine->addGlobalMapping(f, functionPtr);
     147        mEngine->updateGlobalMapping(f, functionPtr);
    137148    } else if (LLVM_UNLIKELY(f->getType() != type->getPointerTo())) {
    138149        report_fatal_error("Cannot link " + name + ": a function with a different signature already exists with that name in " + mod->getName());
     
    193204
    194205    Module * module = nullptr;
    195 
    196206    try {
    197 
     207        for (Kernel * const kernel : mUncachedKernel) {
     208            iBuilder->setKernel(kernel);
     209            kernel->generateKernel(iBuilder);
     210            module = kernel->getModule(); assert (module);
     211            module->setTargetTriple(mMainModule->getTargetTriple());
     212            PM.run(*module);
     213        }
     214        module = mMainModule;
     215        iBuilder->setKernel(nullptr);
     216        PM.run(*mMainModule);
    198217        for (Kernel * const kernel : mPipeline) {
    199             iBuilder->setKernel(kernel);
    200             module = kernel->getModule();
    201             assert (module != mMainModule);
    202             bool uncachedObject = true;
    203             if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
    204                 uncachedObject = false;
    205             }
    206             if (uncachedObject) {
    207                 module->setTargetTriple(mMainModule->getTargetTriple());
    208                 kernel->generateKernel(iBuilder);
    209                 PM.run(*module);
    210             }
    211             mEngine->addModule(std::unique_ptr<Module>(module));
    212         }
    213 
    214         iBuilder->setKernel(nullptr);
    215         module = mMainModule;
    216         PM.run(*mMainModule);
     218            if (LLVM_UNLIKELY(kernel->getModule() == nullptr)) {
     219                report_fatal_error(kernel->getName() + " was neither loaded from cache nor generated prior to finalizeObject");
     220            }
     221            mEngine->addModule(std::unique_ptr<Module>(kernel->getModule()));
     222        }
    217223        mEngine->finalizeObject();
    218 
    219224        if (mCache) mCache->cleanUpObjectCacheFiles();
    220 
    221225    } catch (const std::exception & e) {
    222         report_fatal_error(e.what());
     226        report_fatal_error(module->getName() + ": " + e.what());
    223227    }
    224228
  • icGREP/icgrep-devel/icgrep/toolchain/cpudriver.h

    r5616 r5630  
    3434    llvm::ExecutionEngine *                                 mEngine;
    3535    ParabixObjectCache *                                    mCache;
     36    std::vector<kernel::Kernel *>                           mUncachedKernel;
    3637    // NOTE: when printing the IR/ASM, we cannot assume they're completely finished after finalizeObject is executed. Instead we store a
    3738    // pointer and delete them once the driver (and any processing) is complete. This prevents us from reclaiming the memory early but
  • icGREP/icgrep-devel/icgrep/toolchain/object_cache.cpp

    r5493 r5630  
    1313#include <boost/filesystem.hpp>
    1414#include <boost/range/iterator_range.hpp>
     15#include <boost/container/flat_set.hpp>
     16#include <llvm/Bitcode/ReaderWriter.h>
     17#include <llvm/IR/Verifier.h>
    1518#include <ctime>
    1619
     
    5962const static auto SIGNATURE = "signature";
    6063
    61 const static boost::uintmax_t CACHE_SIZE_LIMIT = 5 * 1024 * 1024;
     64const static boost::uintmax_t CACHE_SIZE_LIMIT = 50 * 1024 * 1024;
    6265
    6366const MDString * getSignature(const llvm::Module * const M) {
     
    7376bool ParabixObjectCache::loadCachedObjectFile(const std::unique_ptr<kernel::KernelBuilder> & idb, kernel::Kernel * const kernel) {
    7477    if (LLVM_LIKELY(kernel->isCachable())) {
    75         Module * const module = kernel->getModule();
    76         assert ("kernel module cannot be null!" && module);
    77         const auto moduleId = module->getModuleIdentifier();
     78        assert (kernel->getModule() == nullptr);
     79        const auto moduleId = kernel->getCacheName(idb);
     80
    7881        // Have we already seen this module before?
    79         if (LLVM_UNLIKELY(mCachedObject.count(moduleId) != 0)) {
     82        const auto f = mCachedObject.find(moduleId);
     83        if (LLVM_UNLIKELY(f != mCachedObject.end())) {
     84            Module * const m = f->second.first; assert (m);
     85            kernel->setModule(m);
    8086            return true;
    8187        }
     
    94100                if (signatureBuffer) {
    95101                    const StringRef loadedSig = signatureBuffer.get()->getBuffer();
    96                     if (!loadedSig.equals(kernel->makeSignature(idb))) {
    97                         return false;
     102                    if (LLVM_UNLIKELY(!loadedSig.equals(kernel->makeSignature(idb)))) {
     103                        goto invalid;
    98104                    }
    99105                } else {
    100106                    report_fatal_error("signature file expected but not found: " + moduleId);
    101                     return false;
     107                }               
     108            }
     109            sys::path::replace_extension(objectName, ".kernel");
     110            auto kernelBuffer = MemoryBuffer::getFile(objectName.c_str(), -1, false);
     111            if (*kernelBuffer) {
     112                //MemoryBuffer * kb = kernelBuffer.get().release();
     113                //auto loadedFile = parseBitcodeFile(kb->getMemBufferRef(), mContext);
     114                auto loadedFile = getLazyBitcodeModule(std::move(kernelBuffer.get()), idb->getContext());
     115                if (*loadedFile) {
     116                    Module * const m = loadedFile.get().release(); assert (m);
     117                    // defaults to <path>/<moduleId>.kernel
     118                    m->setModuleIdentifier(moduleId);
     119                    kernel->setModule(m);
     120                    kernel->prepareCachedKernel(idb);                   
     121                    mCachedObject.emplace(moduleId, std::make_pair(m, std::move(objectBuffer.get())));
     122                    // update the modified time of the object file
     123                    sys::path::replace_extension(objectName, ".o");
     124                    boost::filesystem::last_write_time(objectName.c_str(), time(0));
     125                    return true;
    102126                }
    103127            }
    104             // update the modified time of the file then add it to our cache
    105             boost::filesystem::last_write_time(objectName.c_str(), time(0));
    106             mCachedObject.emplace(moduleId, std::move(objectBuffer.get()));
    107             return true;
    108         } else {
    109             // mark this module as cachable
    110             module->getOrInsertNamedMetadata(CACHEABLE);
    111             // if this module has a signature, add it to the metadata
    112             if (kernel->hasSignature()) {
    113                 NamedMDNode * const md = module->getOrInsertNamedMetadata(SIGNATURE);
    114                 assert (md->getNumOperands() == 0);
    115                 MDString * const sig = MDString::get(module->getContext(), kernel->makeSignature(idb));               
    116                 md->addOperand(MDNode::get(module->getContext(), {sig}));
    117             }
     128        }
     129
     130invalid:
     131
     132        Module * const module = kernel->setModule(new Module(moduleId, idb->getContext()));
     133        // mark this module as cachable
     134        module->getOrInsertNamedMetadata(CACHEABLE);
     135        // if this module has a signature, add it to the metadata
     136        if (kernel->hasSignature()) {
     137            NamedMDNode * const md = module->getOrInsertNamedMetadata(SIGNATURE);
     138            assert (md->getNumOperands() == 0);
     139            MDString * const sig = MDString::get(module->getContext(), kernel->makeSignature(idb));
     140            md->addOperand(MDNode::get(module->getContext(), {sig}));
    118141        }
    119142    }
     
    124147// exists, write it out.
    125148void ParabixObjectCache::notifyObjectCompiled(const Module * M, MemoryBufferRef Obj) {
    126     if (M->getNamedMetadata(CACHEABLE)) {
     149    if (LLVM_LIKELY(M->getNamedMetadata(CACHEABLE))) {
    127150        const auto moduleId = M->getModuleIdentifier();
    128151        Path objectName(mCachePath);
     
    135158        }
    136159
     160        // Write the object code
    137161        std::error_code EC;
    138         raw_fd_ostream outfile(objectName, EC, sys::fs::F_None);
    139         outfile.write(Obj.getBufferStart(), Obj.getBufferSize());
    140         outfile.close();
    141 
    142         // If this module has a signature, write it.
     162        raw_fd_ostream objFile(objectName, EC, sys::fs::F_None);
     163        objFile.write(Obj.getBufferStart(), Obj.getBufferSize());
     164        objFile.close();
     165
     166        // then the signature (if one exists)
    143167        const MDString * const sig = getSignature(M);
    144168        if (sig) {
     
    148172            sigfile.close();
    149173        }
     174
     175        // and finally kernel prototype header.
     176        std::unique_ptr<Module> header(new Module(M->getModuleIdentifier(), M->getContext()));
     177        for (const Function & f : M->getFunctionList()) {
     178            if (f.hasExternalLinkage() && !f.empty()) {
     179                Function::Create(f.getFunctionType(), Function::ExternalLinkage, f.getName(), header.get());
     180            }
     181        }
     182
     183        sys::path::replace_extension(objectName, ".kernel");
     184        raw_fd_ostream kernelFile(objectName.str(), EC, sys::fs::F_None);
     185        WriteBitcodeToFile(header.get(), kernelFile, false, false);
     186        kernelFile.close();
    150187    }
    151188}
     
    178215                    objectPath.replace_extension("sig");
    179216                    remove(objectPath);
     217                    objectPath.replace_extension("kernel");
     218                    remove(objectPath);
    180219                }
    181220            }
     
    185224
    186225std::unique_ptr<MemoryBuffer> ParabixObjectCache::getObject(const Module * module) {
    187     const auto moduleId = module->getModuleIdentifier();
    188     const auto f = mCachedObject.find(moduleId);
     226    const auto f = mCachedObject.find(module->getModuleIdentifier());
    189227    if (f == mCachedObject.end()) {
    190228        return nullptr;
    191229    }
    192230    // Return a copy of the buffer, for MCJIT to modify, if necessary.
    193     return MemoryBuffer::getMemBufferCopy(f->second.get()->getBuffer());
     231    return MemoryBuffer::getMemBufferCopy(f->second.second.get()->getBuffer());
    194232}
    195233
     
    211249}
    212250
    213 ParabixObjectCache::ParabixObjectCache(const std::string & dir)
     251ParabixObjectCache::ParabixObjectCache(const std::string dir)
    214252: mCachePath(dir) {
    215253
  • icGREP/icgrep-devel/icgrep/toolchain/object_cache.h

    r5464 r5630  
    1818namespace llvm { class MemoryBuffer; }
    1919namespace llvm { class MemoryBufferRef; }
     20namespace llvm { class LLVMContext; }
    2021namespace kernel { class Kernel; }
    2122namespace kernel { class KernelBuilder; }
     
    3738    template <typename K, typename V>
    3839    using Map = boost::container::flat_map<K, V>;
    39     using ModuleCache = Map<std::string, std::unique_ptr<llvm::MemoryBuffer>>;
     40    using ModuleCache = Map<std::string, std::pair<llvm::Module *, std::unique_ptr<llvm::MemoryBuffer>>>;
    4041public:
    4142    ParabixObjectCache();
    42     ParabixObjectCache(const std::string & dir);
     43    ParabixObjectCache(const std::string dir);
    4344    bool loadCachedObjectFile(const std::unique_ptr<kernel::KernelBuilder> & idb, kernel::Kernel * const kernel);
    44     void notifyObjectCompiled(const llvm::Module *M, llvm::MemoryBufferRef Obj) override;
     45    void notifyObjectCompiled(const llvm::Module * M, llvm::MemoryBufferRef Obj) override;
    4546    void cleanUpObjectCacheFiles();
    4647    std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module * M) override;
     
    4849    static Path getDefaultPath();
    4950private:
    50     ModuleCache     mCachedObject;
    51     const Path      mCachePath;
     51    ModuleCache         mCachedObject;
     52    const Path          mCachePath;
    5253};
    5354
Note: See TracChangeset for help on using the changeset viewer.