Changeset 6005


Ignore:
Timestamp:
Apr 30, 2018, 2:34:46 PM (4 months ago)
Author:
nmedfort
Message:

Fix for overly aggressive temporary buffer allocation.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5998 r6005  
    4545    , terminated(nullptr)
    4646    , noMore(nullptr)
     47    , requiresTemporaryBuffers(nullptr)
    4748    , deadLockCounter(nullptr)
    4849    , anyProgress(nullptr)
     
    102103    void runKernel(const std::unique_ptr<KernelBuilder> & b, const Kernel * const kernel);
    103104
    104     void allocateTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel, Value * const requiresTemporaryBuffers);
    105 
    106     void freeTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel, Value * const requiresTemporaryBuffers);
     105    void allocateTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel);
     106
     107    void freeTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel);
    107108
    108109    Value * getFullyProcessedItemCount(const std::unique_ptr<KernelBuilder> & b, const Binding & binding, Value * const final) const;
     
    118119
    119120    Value *                             noMore;
     121    Value *                             requiresTemporaryBuffers;
    120122
    121123    DependencyGraph                     dependencyGraph;
     
    818820void PipelineGenerator::checkAvailableInputData(const std::unique_ptr<KernelBuilder> & b, const unsigned index) {
    819821    const Kernel * const kernel = kernels[index];
    820     b->setKernel(kernel);   
     822    b->setKernel(kernel);
     823    requiresTemporaryBuffers = nullptr;
    821824    for (auto e : make_iterator_range(in_edges(index, inputGraph))) {
    822825        const Channel & c = inputGraph[e];
     
    843846        b->CreateLikelyCondBr(check, hasSufficientInput, kernelFinished);
    844847        b->SetInsertPoint(hasSufficientInput);
     848        if (isPotentiallyUnsafeInputBuffer(c.buffer)) {
     849            Value * const notEnough = b->CreateNot(hasEnough);
     850            if (requiresTemporaryBuffers) {
     851                requiresTemporaryBuffers = b->CreateOr(requiresTemporaryBuffers, notEnough);
     852            } else {
     853                requiresTemporaryBuffers = notEnough;
     854            }
     855        }
    845856    }
    846857}
     
    893904    } else if (binding.hasAttribute(kernel::Attribute::KindId::AlwaysConsume)) {
    894905        const auto lb = kernel->getLowerBound(rate);
    895         strideLength = b->getSize(ceiling(lb * kernel->getStride()));
     906        strideLength = b->getSize(std::max(ceiling(lb * kernel->getStride()), 1U));
    896907    } else {
    897908        const auto ub = kernel->getUpperBound(rate); assert (ub > 0);
     
    976987    Value * isFinal = noMore;
    977988
    978     Value * requiresTemporaryBuffers = nullptr;
    979 
    980989    for (unsigned i = 0; i < n; ++i) {
    981990        const Binding & input = inputs[i];
     
    9961005            arguments[i + 2] = b->CreateSelect(hasPartial, produced, limit);
    9971006            isFinal = b->CreateAnd(isFinal, hasPartial);
    998             if (!temporaryBufferPtrs.empty() && temporaryBufferPtrs[i]) {
    999                 if (requiresTemporaryBuffers) {
    1000                     requiresTemporaryBuffers = b->CreateOr(requiresTemporaryBuffers, hasPartial);
    1001                 } else {
    1002                     requiresTemporaryBuffers = hasPartial;
    1003                 }
    1004             }
    10051007        }
    10061008    }
     
    10131015
    10141016    if (requiresTemporaryBuffers) {
    1015         allocateTemporaryBuffers(b, kernel, requiresTemporaryBuffers);
     1017        allocateTemporaryBuffers(b, kernel);
    10161018    }
    10171019
     
    10191021
    10201022    if (requiresTemporaryBuffers) {
    1021         freeTemporaryBuffers(b, kernel, requiresTemporaryBuffers);
     1023        freeTemporaryBuffers(b, kernel);
    10221024    }
    10231025}
     
    10311033    for (unsigned i = 0; i < outputs.size(); i++) {
    10321034        if (isa<DynamicBuffer>(outputs[i])) {
    1033 
    10341035
    10351036            const auto baseSize = ceiling(kernel->getUpperBound(kernel->getStreamOutput(i).getRate()) * kernel->getStride() * codegen::SegmentSize);
     
    10701071    // temporary copies of it. This could be optimized to have it done at production and deleted after the last
    10711072    // consuming kernel utilizes it.
    1072     temporaryBufferPtrs.clear();
    1073 
    1074     const auto & inputs = kernel->getStreamInputs();
    1075     for (unsigned i = 0; i < inputs.size(); ++i) {
    1076         const StreamSetBuffer * const buffer = kernel->getStreamSetInputBuffer(i);
    1077         if (LLVM_UNLIKELY(isPotentiallyUnsafeInputBuffer(buffer))) {
    1078             if (temporaryBufferPtrs.empty()) {
    1079                 temporaryBufferPtrs.resize(inputs.size(), nullptr);
    1080             }
    1081             assert (temporaryBufferPtrs[i] == nullptr);
    1082             PointerType * const ptrTy = buffer->getStreamSetPointerType();
    1083             StructType * const structTy = StructType::create(b->getContext(), {ptrTy, ptrTy});
    1084             AllocaInst * const tempBuffer = b->CreateAlloca(structTy);
    1085             b->CreateStore(Constant::getNullValue(structTy), tempBuffer);
    1086             temporaryBufferPtrs[i] = tempBuffer;
    1087         }
    1088     }
    1089 
     1073    if (requiresTemporaryBuffers) {
     1074        const auto & inputs = kernel->getStreamInputs();
     1075        temporaryBufferPtrs.resize(inputs.size());
     1076        std::fill(temporaryBufferPtrs.begin(), temporaryBufferPtrs.end(), nullptr);
     1077        for (unsigned i = 0; i < inputs.size(); ++i) {
     1078            const StreamSetBuffer * const buffer = kernel->getStreamSetInputBuffer(i);
     1079            if (LLVM_UNLIKELY(isPotentiallyUnsafeInputBuffer(buffer))) {
     1080                assert (temporaryBufferPtrs[i] == nullptr);
     1081                PointerType * const ptrTy = buffer->getStreamSetPointerType();
     1082                StructType * const structTy = StructType::create(b->getContext(), {ptrTy, ptrTy});
     1083                AllocaInst * const tempBuffer = b->CreateAlloca(structTy);
     1084                b->CreateStore(Constant::getNullValue(structTy), tempBuffer);
     1085                temporaryBufferPtrs[i] = tempBuffer;
     1086            }
     1087        }
     1088    }
    10901089}
    10911090
     
    11041103 * @brief allocateTemporaryBuffers
    11051104 ** ------------------------------------------------------------------------------------------------------------- */
    1106 void PipelineGenerator::allocateTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel, Value * const requiresTemporaryBuffers) {
     1105void PipelineGenerator::allocateTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel) {
    11071106    ConstantInt * const ZERO = b->getInt32(0);
    11081107    ConstantInt * const ONE = b->getInt32(1);
     
    11331132 * @brief freeTemporaryBuffers
    11341133 ** ------------------------------------------------------------------------------------------------------------- */
    1135 void PipelineGenerator::freeTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel, Value * const requiresTemporaryBuffers) {
     1134void PipelineGenerator::freeTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel) {
    11361135    ConstantInt * const ZERO = b->getInt32(0);
    11371136    ConstantInt * const ONE = b->getInt32(1);
Note: See TracChangeset for help on using the changeset viewer.