Changeset 6272


Ignore:
Timestamp:
Jan 4, 2019, 7:46:10 PM (5 months ago)
Author:
nmedfort
Message:

simplification of pop count logic

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/attributes.h

    r6184 r6272  
    1111
    1212struct Attribute {
     13
     14    friend struct AttributeSet;
    1315
    1416    enum class KindId {
     
    299301    }
    300302
    301 protected:
    302 
    303     friend struct AttributeSet;
    304     friend struct Binding;
    305     friend Attribute Add1();
    306     friend Attribute BlockSize(const unsigned k);
    307     friend Attribute Principal();
    308     friend Attribute RoundUpTo(const unsigned);
    309     friend Attribute ManagedBuffer();
    310     friend Attribute LookAhead(const unsigned);
    311     friend Attribute LookBehind(const unsigned);
    312     friend Attribute Deferred();
    313     friend Attribute Misaligned();
    314     friend Attribute ConditionalRegionBegin();
    315     friend Attribute ConditionalRegionEnd();
    316     friend Attribute CanTerminateEarly();
    317     friend Attribute MustExplicitlyTerminate();
    318     friend Attribute RequiresPopCountArray();
    319     friend Attribute RequiresNegatedPopCountArray();
    320     friend Attribute SideEffecting();
    321     friend Attribute Family();
    322 
    323303    void print(llvm::raw_ostream & out) const noexcept;
    324304
  • icGREP/icgrep-devel/icgrep/kernels/block_kernel.cpp

    r6261 r6272  
    345345
    346346/** ------------------------------------------------------------------------------------------------------------- *
    347  * @brief makePopCountRateGraph
    348  *
    349  * Returns a graph with a vertex for each input port and a directed edge annotated with the appropriate RateId
    350  ** ------------------------------------------------------------------------------------------------------------- */
    351 
    352 using PopCountRateGraph = adjacency_list<hash_setS, vecS, directedS>;
    353 using PopCountRateMap = llvm::StringMap<PopCountRateGraph::vertex_descriptor>;
    354 
    355 #define POP_COUNT 0
    356 #define NEGATED_POP_COUNT 1
    357 
    358 void checkPopCount(const Binding & binding, PopCountRateGraph & G, const PopCountRateMap & M) {
    359     const ProcessingRate & rate = binding.getRate();
    360     if (LLVM_UNLIKELY(rate.isPopCount() || rate.isNegatedPopCount())) {
    361         const auto f = M.find(rate.getReference());
    362         assert ("pop count rate cannot refer to an output stream" && f != M.end());
    363         const auto targetType = (rate.getKind() == RateId::PopCount) ? POP_COUNT : NEGATED_POP_COUNT;
    364         add_edge(f->second, targetType, G);
    365     }
    366 }
    367 
    368 inline PopCountRateGraph makePopCountRateGraph(const Bindings & inputStreamSets, const Bindings & outputStreamSets) {
    369     const auto n = inputStreamSets.size();
    370     PopCountRateGraph G(n);
    371     PopCountRateMap M;
     347 * @brief annotateInputBindingsWithPopCountArrayAttributes
     348 ** ------------------------------------------------------------------------------------------------------------- */
     349void BlockOrientedKernel::annotateInputBindingsWithPopCountArrayAttributes() {
     350    const unsigned n = mInputStreamSets.size();
     351    const unsigned m = mOutputStreamSets.size();
     352
     353    if (LLVM_UNLIKELY(n == 0 || (n + m) < 2)) {
     354        return;
     355    }
     356
     357    enum : int {
     358        POSITIVE = 0
     359        , NEGATIVE = 1
     360    };
     361
     362    using Graph = adjacency_list<vecS, vecS, directedS>;
     363
     364    Graph G(std::max(n, 2U));
     365
     366    auto checkPopCount = [&](const Binding & binding) {
     367        const ProcessingRate & rate = binding.getRate();
     368        if (LLVM_UNLIKELY(rate.isPopCount() || rate.isNegatedPopCount())) {
     369            Kernel::Port type;
     370            unsigned refPort;
     371            std::tie(type, refPort) = getStreamPort(rate.getReference());
     372            assert ("pop count rate cannot refer to an output stream" && (type == Kernel::Port::Input));
     373            const auto targetType = rate.isPopCount() ? POSITIVE : NEGATIVE;
     374            add_edge(refPort, targetType, G);
     375        }
     376    };
     377
    372378    for (unsigned i = 0; i < n; ++i) {
    373         M.insert(std::make_pair(inputStreamSets[i].getName(), i));
    374         checkPopCount(inputStreamSets[i], G, M);
    375     }
    376     const auto m = outputStreamSets.size();
     379        checkPopCount(mInputStreamSets[i]);
     380    }
    377381    for (unsigned i = 0; i < m; ++i) {
    378         checkPopCount(outputStreamSets[i], G, M);
    379     }
    380     return G;
    381 }
    382 
    383 /** ------------------------------------------------------------------------------------------------------------- *
    384  * @brief annotateInputBindingsWithPopCountArrayAttributes
    385  ** ------------------------------------------------------------------------------------------------------------- */
    386 void annotateInputBindingsWithPopCountArrayAttributes(Bindings & inputStreamSets, const Bindings & outputStreamSets) {
    387     if (LLVM_LIKELY(inputStreamSets.size() > 1)) {
    388         const auto G = makePopCountRateGraph(inputStreamSets, outputStreamSets);
    389         for (const auto e : make_iterator_range(edges(G))) {
    390             const auto i = source(e, G);
    391             assert (i < inputStreamSets.size());
    392             Binding & input = inputStreamSets[i];
    393             switch (target(e, G)) {
    394                 case POP_COUNT:
    395                     input.addAttribute(RequiresPopCountArray());
    396                     break;
    397                 case NEGATED_POP_COUNT:
    398                     input.addAttribute(RequiresNegatedPopCountArray());
    399                     break;
    400                 default: llvm_unreachable("unhandled pop count type!");
    401             }
     382        checkPopCount(mOutputStreamSets[i]);
     383    }
     384
     385    for (const auto e : make_iterator_range(edges(G))) {
     386        const auto i = source(e, G);
     387        assert (i < mInputStreamSets.size());
     388        Binding & input = mInputStreamSets[i];
     389        switch (target(e, G)) {
     390            case POSITIVE:
     391                input.addAttribute(RequiresPopCountArray());
     392                break;
     393            case NEGATIVE:
     394                input.addAttribute(RequiresNegatedPopCountArray());
     395                break;
     396            default: llvm_unreachable("unhandled pop count type!");
    402397        }
    403398    }
     
    427422, mStrideLoopTarget(nullptr)
    428423, mStrideBlockIndex(nullptr) {
    429     annotateInputBindingsWithPopCountArrayAttributes(mInputStreamSets, mOutputStreamSets);
    430 }
    431 
    432 
    433 }
     424    annotateInputBindingsWithPopCountArrayAttributes();
     425}
     426
     427
     428}
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r6261 r6272  
    830830
    831831/** ------------------------------------------------------------------------------------------------------------- *
    832  * @brief addAttributesFrom
    833  *
    834  * Add any attributes from a set of kernels
    835  ** ------------------------------------------------------------------------------------------------------------- */
    836 void Kernel::addAttributesFrom(const std::vector<Kernel *> & kernels) {
    837     unsigned mustTerminate = 0;
    838     bool canTerminate = false;
    839     bool sideEffecting = false;
    840     for (const Kernel * kernel : kernels) {
    841         if (kernel->hasAttribute(AttrId::MustExplicitlyTerminate)) {
    842             mustTerminate++;
    843         } else if (kernel->hasAttribute(AttrId::CanTerminateEarly)) {
    844             canTerminate = true;
    845         }
    846         if (kernel->hasAttribute(AttrId::SideEffecting)) {
    847             sideEffecting = true;
    848         }
    849     }
    850     if (LLVM_UNLIKELY(mustTerminate == kernels.size())) {
    851         addAttribute(MustExplicitlyTerminate());
    852     } else if (canTerminate || mustTerminate) {
    853         addAttribute(CanTerminateEarly());
    854     }
    855     if (sideEffecting) {
    856         addAttribute(SideEffecting());
    857     }
    858 }
    859 
    860 /** ------------------------------------------------------------------------------------------------------------- *
    861832 * @brief createInstance
    862833 ** ------------------------------------------------------------------------------------------------------------- */
     
    11351106    }
    11361107    assert (array && "missing pop count array attribute");
    1137     return b->CreateLoad(b->CreateGEP(array, strideIndex));
     1108    Value * const currentSum = b->CreateLoad(b->CreateGEP(array, strideIndex));
     1109    Value * const priorIndex = b->CreateSub(strideIndex, b->getSize(1));
     1110    Value * const priorSum = b->CreateLoad(b->CreateGEP(array, priorIndex));
     1111    return b->CreateSub(currentSum, priorSum);
    11381112}
    11391113
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r6261 r6272  
    488488private:
    489489
    490     void addAttributesFrom(const std::vector<Kernel *> & kernels);
    491 
    492490    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b);
    493491
     
    657655private:
    658656
     657    void annotateInputBindingsWithPopCountArrayAttributes();
     658
    659659    void incrementCountableItemCounts(const std::unique_ptr<KernelBuilder> & b);
    660660
  • icGREP/icgrep-devel/icgrep/kernels/optimizationbranch.cpp

    r6266 r6272  
    330330, mTrueKernel(nonZeroKernel.get())
    331331, mFalseKernel(allZeroKernel.get()) {
    332     addAttributesFrom({mTrueKernel, mFalseKernel});
     332
    333333}
    334334
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/core_logic.hpp

    r6266 r6272  
    11#include "pipeline_compiler.hpp"
     2
     3// TODO: if we have multiple copies of the same type of kernel executing sequentially, we could avoid
     4// generating an "execution call" for each and instead pass in different handles/item counts. This
     5// could improve I-Cache utilization.
    26
    37namespace kernel {
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/kernel_logic.hpp

    r6266 r6272  
    472472
    473473        args.push_back(inputItems);
     474
    474475        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
    475             args.push_back(getPopCountArray(b, i));
     476            args.push_back(getPositivePopCountArray(b, i));
    476477        }
    477478        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
    478             args.push_back(getNegatedPopCountArray(b, i));
     479            args.push_back(getNegativePopCountArray(b, i));
    479480        }
    480481    }
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_analysis.hpp

    r6266 r6272  
    424424PopCountGraph PipelineCompiler::makePopCountGraph() const {
    425425
    426     using PopCountVertex = PopCountGraph::vertex_descriptor;
    427     using PortMapping = flat_map<unsigned, PopCountVertex>;
     426    using Vertex = PopCountGraph::vertex_descriptor;
     427    using Map = flat_map<unsigned, Vertex>;
    428428
    429429    PopCountGraph G(num_vertices(mBufferGraph));
    430     PortMapping M;
    431 
    432     auto addPopCountDependency = [&](
    433         const Kernel * const kernel,
    434         const PopCountVertex kernelVertex,
    435         const unsigned portIndex,
    436         const Binding & binding) {
    437 
    438         const ProcessingRate & rate = binding.getRate();
    439         if (LLVM_UNLIKELY(rate.isPopCount() || rate.isNegatedPopCount())) {
    440             // determine which port this I/O port refers to
    441             Port portType; unsigned refPort;
    442             std::tie(portType, refPort) = kernel->getStreamPort(rate.getReference());
    443 
    444             // verify the reference stream is an input port
    445             if (LLVM_UNLIKELY(portType != Port::Input)) {
    446                 std::string tmp;
    447                 raw_string_ostream msg(tmp);
    448                 msg << kernel->getName();
    449                 msg << ": pop count rate for ";
    450                 msg << binding.getName();
    451                 msg << " cannot refer to an output stream";
    452                 report_fatal_error(msg.str());
    453             }
    454 
    455             const auto type = rate.isPopCount() ? CountingType::Positive : CountingType::Negative;
    456 
     430    Map M;
     431
     432    for (unsigned kernelVertex = mFirstKernel; kernelVertex < mLastKernel; ++kernelVertex) {
     433        const Kernel * const kernel = mPipeline[kernelVertex];
     434        const auto numOfInputs = kernel->getNumOfStreamInputs();
     435        const auto numOfOutputs = kernel->getNumOfStreamOutputs();
     436
     437        auto insertPopCountDependency = [&](
     438                const CountingType type,
     439                const unsigned refPort,
     440                const unsigned srcPort) {
    457441            // check if we've already created a vertex for the ref port ...
    458442            const auto f = M.find(refPort);
    459443            if (LLVM_UNLIKELY(f != M.end())) {
    460444                const auto refPortVertex = f->second;
    461                 add_edge(kernelVertex, refPortVertex, PopCountEdge{type, portIndex}, G);
     445                add_edge(kernelVertex, refPortVertex, PopCountEdge{type, srcPort}, G);
    462446                // update the ref -> buffer edge with the counting type
    463                 for (const auto e : make_iterator_range(out_edges(refPortVertex, G))) {
    464                     G[e].Type |= type;
    465                 }
     447                G[out_edge(refPortVertex, G)].Type |= type;
    466448            } else { // ... otherwise map a new vertex to it.
    467449
    468450                // verify the reference stream is a Fixed rate stream
    469                 const Binding & refBinding = kernel->getInputStreamSetBinding(refPort);
    470                 if (LLVM_UNLIKELY(refBinding.isDeferred() || !refBinding.getRate().isFixed())) {
     451                if (LLVM_LIKELY(srcPort != refPort)) {
     452                    const Binding & refBinding = kernel->getInputStreamSetBinding(refPort);
     453                    if (LLVM_UNLIKELY(refBinding.isDeferred() || !refBinding.getRate().isFixed())) {
     454                        std::string tmp;
     455                        raw_string_ostream msg(tmp);
     456                        msg << kernel->getName();
     457                        msg << ": pop count reference ";
     458                        msg << refBinding.getName();
     459                        msg << " must be a non-deferred Fixed rate stream";
     460                        report_fatal_error(msg.str());
     461                    }
     462                }
     463
     464                const auto refPortVertex = add_vertex(G);
     465                M.emplace(refPort, refPortVertex);
     466                add_edge(kernelVertex, refPortVertex, PopCountEdge{type, srcPort}, G);
     467
     468                // determine which buffer this port refers to by inspecting the buffer graph
     469                const auto bufferVertex = getInputBufferVertex(kernelVertex, refPort);
     470                add_edge(refPortVertex, bufferVertex, PopCountEdge{type, refPort}, G);
     471            }
     472        };
     473
     474        auto addPopCountDependency = [&](
     475            const unsigned portIndex,
     476            const Binding & binding) {
     477            const ProcessingRate & rate = binding.getRate();
     478            if (LLVM_UNLIKELY(rate.isPopCount() || rate.isNegatedPopCount())) {
     479                // determine which port this I/O port refers to
     480                Port portType; unsigned refPort;
     481                std::tie(portType, refPort) = kernel->getStreamPort(rate.getReference());
     482                // verify the reference stream is an input port
     483                if (LLVM_UNLIKELY(portType != Port::Input)) {
    471484                    std::string tmp;
    472485                    raw_string_ostream msg(tmp);
    473486                    msg << kernel->getName();
    474                     msg << ": pop count reference ";
    475                     msg << refBinding.getName();
    476                     msg << " must be a non-deferred Fixed rate stream";
     487                    msg << ": pop count rate for ";
     488                    msg << binding.getName();
     489                    msg << " cannot refer to an output stream";
    477490                    report_fatal_error(msg.str());
    478491                }
    479 
    480                 const auto refPortVertex = add_vertex(G);
    481                 M.emplace(refPort, refPortVertex);
    482                 add_edge(kernelVertex, refPortVertex, PopCountEdge{type, portIndex}, G);
    483 
    484                 // determine which buffer this port refers to by inspecting the buffer graph
    485                 const auto bufferVertex = getInputBufferVertex(kernelVertex, refPort);
    486                 add_edge(refPortVertex, bufferVertex, PopCountEdge{type, refPort}, G);
    487             }
    488         }
    489     };
    490 
    491     for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    492         const Kernel * const kernel = mPipeline[i];
    493         const auto numOfInputs = kernel->getNumOfStreamInputs();
    494         const auto numOfOutputs = kernel->getNumOfStreamOutputs();
     492                const auto type = rate.isPopCount() ? CountingType::Positive : CountingType::Negative;
     493                insertPopCountDependency(type, refPort, portIndex);
     494            }
     495        };
     496
     497        auto checkRequiredArray = [&](
     498            const unsigned portIndex,
     499            const Binding & binding) {
     500
     501            CountingType type = CountingType::Unknown;
     502            if (LLVM_UNLIKELY(binding.hasAttribute(AttrId::RequiresPopCountArray))) {
     503                type = CountingType::Positive;
     504            }
     505            if (LLVM_UNLIKELY(binding.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
     506                type |= CountingType::Negative;
     507            }
     508            if (LLVM_UNLIKELY(type != CountingType::Unknown)) {
     509                insertPopCountDependency(type, portIndex, portIndex);
     510            }
     511        };
     512
    495513        for (unsigned j = 0; j < numOfInputs; ++j) {
    496514            const auto & input = kernel->getInputStreamSetBinding(j);
    497             addPopCountDependency(kernel, i, j, input);
     515            addPopCountDependency(j, input);
     516            checkRequiredArray(j, input);
    498517        }
    499518        const auto firstOutput = numOfInputs;
    500519        for (unsigned j = 0; j < numOfOutputs; ++j) {
    501520            const auto & output = kernel->getOutputStreamSetBinding(j);
    502             addPopCountDependency(kernel, i, firstOutput + j, output);
     521            addPopCountDependency(firstOutput + j, output);
    503522        }
    504523        M.clear();
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_builder.cpp

    r6261 r6272  
    185185}
    186186
    187 #warning pipeline builder needs to be able to return the name of a kernel without generating it
     187/** ------------------------------------------------------------------------------------------------------------- *
     188 * @brief addAttributesFrom
     189 *
     190 * Add any attributes from a set of kernels
     191 ** ------------------------------------------------------------------------------------------------------------- */
     192unsigned addKernelProperties(const std::vector<Kernel *> & kernels, Kernel * const output) {
     193    unsigned mustTerminate = 0;
     194    bool canTerminate = false;
     195    bool sideEffecting = false;
     196    unsigned stride = 0;
     197    for (const Kernel * kernel : kernels) {
     198        if (kernel->hasAttribute(AttrId::MustExplicitlyTerminate)) {
     199            mustTerminate++;
     200        } else if (kernel->hasAttribute(AttrId::CanTerminateEarly)) {
     201            canTerminate = true;
     202        }
     203        if (kernel->hasAttribute(AttrId::SideEffecting)) {
     204            sideEffecting = true;
     205        }
     206        assert (kernel->getStride());
     207        if (stride) {
     208            stride = boost::lcm(stride, kernel->getStride());
     209        } else {
     210            stride = kernel->getStride();
     211        }
     212    }
     213    if (LLVM_UNLIKELY(mustTerminate == kernels.size())) {
     214        output->addAttribute(MustExplicitlyTerminate());
     215    } else if (canTerminate || mustTerminate) {
     216        output->addAttribute(CanTerminateEarly());
     217    }
     218    if (sideEffecting) {
     219        output->addAttribute(SideEffecting());
     220    }
     221    return stride;
     222}
    188223
    189224/** ------------------------------------------------------------------------------------------------------------- *
     
    284319
    285320    const std::unique_ptr<kernel::KernelBuilder> & b = mDriver.getBuilder();
    286     unsigned stride = 0;
    287321    Type * const addrPtrTy = b->getVoidPtrTy();
    288322    for (auto i : ordering) {
     
    297331            }
    298332            pipeline.emplace_back(k);
    299             assert (k->getStride());
    300             if (stride) {
    301                 stride = boost::lcm(stride, k->getStride());
    302             } else {
    303                 stride = k->getStride();
    304             }
    305333        }
    306334    }
     
    312340                           std::move(mInputScalars), std::move(mOutputScalars));
    313341
    314     pk->setStride(stride);
     342    pk->setStride(addKernelProperties(pk->getKernels(), pk));
    315343
    316344    return pk;
     345}
     346
     347using AttributeCombineSet = flat_map<AttrId, unsigned>;
     348
     349/** ------------------------------------------------------------------------------------------------------------- *
     350 * @brief combineAttributes
     351 ** ------------------------------------------------------------------------------------------------------------- */
     352void combineAttributes(const Binding & S, AttributeCombineSet & C) {
     353    for (const Attribute & s : S) {
     354        auto f = C.find(s.getKind());
     355        if (LLVM_LIKELY(f == C.end())) {
     356            C.emplace(s.getKind(), s.amount());
     357        } else {
     358            // TODO: we'll need some form of attribute combination function
     359            f->second = std::max(f->second, s.amount());
     360        }
     361    }
     362}
     363
     364/** ------------------------------------------------------------------------------------------------------------- *
     365 * @brief writeAttributes
     366 ** ------------------------------------------------------------------------------------------------------------- */
     367void writeAttributes(const AttributeCombineSet & C, Binding & S) {
     368    S.clear();
     369    for (auto i = C.begin(); i != C.end(); ++i) {
     370        Attribute::KindId k;
     371        unsigned m;
     372        std::tie(k, m) = *i;
     373        S.addAttribute(Attribute(k, m));
     374    }
     375}
     376
     377/** ------------------------------------------------------------------------------------------------------------- *
     378 * @brief makeKernel
     379 ** ------------------------------------------------------------------------------------------------------------- */
     380bool combineBindingAttributes(const Bindings & A, const Bindings & B, Bindings & R) {
     381    const auto n = A.size();
     382    if (LLVM_UNLIKELY(n != B.size() || n != R.size())) {
     383        return false;
     384    }
     385    AttributeCombineSet C;
     386    for (unsigned i = 0; i < n; ++i) {
     387        combineAttributes(A[i], C);
     388        combineAttributes(B[i], C);
     389        combineAttributes(R[i], C);
     390        writeAttributes(C, R[i]);
     391        C.clear();
     392    }
     393    return true;
    317394}
    318395
     
    333410    mCondition->getType()->print(out);
    334411
    335     out << ";True=\"";
     412    out << ";Z=\"";
    336413
    337414    if (trueBranch->hasFamilyName()) {
     
    341418    }
    342419
    343     out << "\";False=\"";
     420    out << "\";N=\"";
    344421
    345422    if (falseBranch->hasFamilyName()) {
     
    354431    // TODO: if the condition is also one of the normal inputs and the rate is compatible,
    355432    // we could avoid sending it through.
     433
     434    combineBindingAttributes(trueBranch->getInputStreamSetBindings(),
     435                             falseBranch->getInputStreamSetBindings(),
     436                             mInputStreamSets);
     437
     438    combineBindingAttributes(trueBranch->getOutputStreamSetBindings(),
     439                             falseBranch->getOutputStreamSetBindings(),
     440                             mOutputStreamSets);
    356441
    357442    OptimizationBranch * const br =
     
    361446                                   std::move(mInputScalars), std::move(mOutputScalars));
    362447
    363     br->setStride(boost::lcm(trueBranch->getStride(), falseBranch->getStride()));
     448    br->setStride(addKernelProperties({trueBranch, falseBranch}, br));
    364449
    365450    return br;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6266 r6272  
    129129    // analysis state
    130130    RateValue    FieldWidth;
    131     bool         HasArray;
    132     bool         HasNegatedArray;
    133131    bool         UsesConsumedCount;
    134     bool         AlwaysNegated;
    135132
    136133    PopCountData() = default;
     
    292289    void computeMinimumPopCountReferenceCounts(BuilderRef b);
    293290    void updatePopCountReferenceCounts(BuilderRef b);
    294     LLVM_READNONE Value * getPopCountReferenceConsumedCount(BuilderRef b, const unsigned bufferVertex);
    295     LLVM_READNONE Value * getPopCountInitialOffset(BuilderRef b, const Binding & binding, const unsigned bufferVertex, PopCountData & pc);
     291    LLVM_READNONE Value * getPopCountNextBaseOffset(BuilderRef b, const unsigned bufferVertex) const;
     292    LLVM_READNONE Value * getPopCountBaseOffset(BuilderRef b, const Binding & binding, const unsigned bufferVertex);
     293
     294    LLVM_READNONE bool hasPositivePopCountArray(const unsigned bufferVertex) const;
     295    LLVM_READNONE bool hasNegativePopCountArray(const unsigned bufferVertex) const;
    296296
    297297    Value * getMinimumNumOfLinearPopCountItems(BuilderRef b, const Binding & binding);
     
    299299    Value * getNumOfLinearPopCountItems(BuilderRef b, const Binding & binding);
    300300
    301     Value * getReferenceStreamOffset(BuilderRef b, const Binding & binding);
    302 
    303     Value * getPopCountArray(BuilderRef b, const unsigned inputPort);
    304     Value * getNegatedPopCountArray(BuilderRef b, const unsigned inputPort);
     301    Value * getPositivePopCountArray(BuilderRef b, const unsigned inputPort);
     302    Value * getNegativePopCountArray(BuilderRef b, const unsigned inputPort);
    305303    Value * getIndividualPopCountArray(BuilderRef b, const unsigned inputPort, const unsigned index);
    306304
     
    316314    PopCountGraph makePopCountGraph() const;
    317315
    318     LLVM_READNONE PopCountData & getPopCountReference(const unsigned bufferVertex) ;
     316    LLVM_READNONE PopCountData & getPopCountData(const unsigned bufferVertex) const;
    319317    LLVM_READNONE PopCountData analyzePopCountReference(const unsigned bufferVertex) const;
    320318    LLVM_READNONE RateValue popCountReferenceFieldWidth(const unsigned bufferVertex) const;
    321319    LLVM_READNONE bool popCountReferenceCanUseConsumedItemCount(const unsigned bufferVertex) const;
    322     LLVM_READNONE bool popCountReferenceRequiresBaseOffset(const unsigned bufferVertex) const;
    323     LLVM_READNONE bool popCountBufferIsUsedBySingleKernel(const unsigned bufferVertex) const;
    324     LLVM_READNONE std::pair<bool, bool> popCountReferenceRequiresPopCountArray(const unsigned bufferVertex) const;
    325     LLVM_READNONE bool popCountReferenceIsAlwaysNegated(const unsigned bufferVertex) const;
    326320
    327321    template <typename LambdaFunction>
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_kernel.cpp

    r6266 r6272  
    327327, mCallBindings(std::move(callBindings))
    328328, mSignature(std::move(signature)) {
    329     addAttributesFrom(mKernels);
     329
    330330}
    331331
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/popcount_logic.hpp

    r6266 r6272  
    88    BASE_OFFSET_INDEX       // the block # of the zeroth
    99    , CAPACITY_INDEX        // capacity of the popcount array
    10     , PARTIAL_SUM_INDEX     // pointer to the popcount partial sum array
    11     , POSITIVE_ARRAY_INDEX
    12     , NEGATED_ARRAY_INDEX
     10    , POSITIVE_ARRAY_INDEX  // pointer to the popcount partial sum array
     11    , NEGATED_ARRAY_INDEX   // pointer to the negated popcount partial sum array
    1312// -------------------------
    1413    , POP_COUNT_MAX_FIELDS
     
    3130        const auto bufferPort = mBufferGraph[in_edge(bufferVertex, mBufferGraph)].Port;
    3231        const Binding & output = mKernel->getOutputStreamSetBinding(bufferPort);
    33 
    34         PopCountData & pc = getPopCountReference(bufferVertex);
    3532
    3633        const auto prefix = makeBufferName(mKernelIndex, output) + "_genPopCount";
     
    5148
    5249        Constant * const LOG2_BLOCK_WIDTH = getLog2BlockWidth(b);
    53         Value * const consumed = getPopCountReferenceConsumedCount(b, bufferVertex);
    54         Value * const startIndex = b->CreateLShr(consumed, LOG2_BLOCK_WIDTH);
     50        Value * const start = getPopCountNextBaseOffset(b, bufferVertex);
     51        Value * const startIndex = b->CreateLShr(start, LOG2_BLOCK_WIDTH);
    5552        Value * const produced = mUpdatedProducedPhi[bufferPort];
    5653        // If this is the producer's final stride, round the index position up
     
    6966        indices[0] = b->getInt32(0);
    7067        indices[1] = b->getInt32(bufferVertex);
    71         indices[2] = b->getInt32(PARTIAL_SUM_INDEX);
    72         Value * const partialSumPtr = b->CreateGEP(mPopCountState, indices);
    73         Value * const basePartialSumArray = b->CreateLoad(partialSumPtr);
    74         assert (basePartialSumArray->getType()->isPointerTy());
    7568
    7669        indices[2] = b->getInt32(BASE_OFFSET_INDEX);
     
    8881        Value * positiveArrayPtr = nullptr;
    8982        Value * basePositiveArray = nullptr;
    90         if (pc.HasArray) {
     83        if (hasPositivePopCountArray(bufferVertex)) {
    9184            indices[2] = b->getInt32(POSITIVE_ARRAY_INDEX);
    9285            positiveArrayPtr = b->CreateGEP(mPopCountState, indices);
     
    9689        Value * negativeArrayPtr = nullptr;
    9790        Value * baseNegativeArray = nullptr;
    98         if (pc.HasNegatedArray) {
     91        if (hasNegativePopCountArray(bufferVertex)) {
    9992            indices[2] = b->getInt32(NEGATED_ARRAY_INDEX);
    10093            negativeArrayPtr = b->CreateGEP(mPopCountState, indices);
     
    111104        Value * const newCapacity = b->CreateRoundUp(required, capacity);
    112105        b->CreateStore(newCapacity, capacityPtr);
    113         Value * const newPartialSum = b->CreateRealloc(sizeTy, basePartialSumArray, newCapacity);
    114         b->CreateStore(newPartialSum, partialSumPtr);
    115         b->CreateStore(ZERO, newPartialSum);
    116106        Value * newPositiveArray = nullptr;
    117107        if (basePositiveArray) {
    118108            newPositiveArray = b->CreateRealloc(sizeTy, basePositiveArray, newCapacity);
    119109            b->CreateStore(newPositiveArray, positiveArrayPtr);
     110            b->CreateStore(ZERO, newPositiveArray);
    120111        }
    121112        Value * newNegativeArray = nullptr;
     
    123114            newNegativeArray = b->CreateRealloc(sizeTy, baseNegativeArray, newCapacity);
    124115            b->CreateStore(newNegativeArray, negativeArrayPtr);
     116            b->CreateStore(ZERO, newNegativeArray);
    125117        }
    126118        BasicBlock * const popCountExpandExit = b->GetInsertBlock();
     
    129121        // count up the actual entries
    130122        b->SetInsertPoint(popCountLoop);
    131         PHINode * const partialSumArray = b->CreatePHI(basePartialSumArray->getType(), 3);
    132         partialSumArray->addIncoming(basePartialSumArray, popCountCheckEnd);
    133         partialSumArray->addIncoming(newPartialSum, popCountExpandExit);
    134123        PHINode * positiveArray = nullptr;
    135124        if (basePositiveArray) {
     
    150139        arrayIndex->addIncoming(ZERO, popCountCheckEnd);
    151140        arrayIndex->addIncoming(ZERO, popCountExpandExit);
    152         PHINode * const totalSum = b->CreatePHI(sizeTy, 3);
    153         totalSum->addIncoming(ZERO, popCountCheckEnd);
    154         totalSum->addIncoming(ZERO, popCountExpandExit);
     141        PHINode * const positiveSum = b->CreatePHI(sizeTy, 3);
     142        positiveSum->addIncoming(ZERO, popCountCheckEnd);
     143        positiveSum->addIncoming(ZERO, popCountExpandExit);
     144        PHINode * const negativeSum = b->CreatePHI(sizeTy, 3);
     145        negativeSum->addIncoming(ZERO, popCountCheckEnd);
     146        negativeSum->addIncoming(ZERO, popCountExpandExit);
    155147
    156148        const StreamSetBuffer * const buffer = getOutputBuffer(bufferPort);
     
    159151
    160152        // If this popcount field is only used for negated rates, just invert the markers.
    161         if (LLVM_UNLIKELY(pc.AlwaysNegated)) {
     153        if (LLVM_UNLIKELY(positiveArray == nullptr)) {
    162154            markers = b->CreateNot(markers);
    163155        }
     
    168160
    169161        Value * const count = b->CreateZExtOrTrunc(b->bitblock_popcount(markers), sizeTy);
    170 
    171         if (positiveArray) { assert (!pc.AlwaysNegated);
    172             b->CreateStore(count, b->CreateGEP(positiveArray, arrayIndex));
    173         }
    174 
     162        Value * const nextArrayIndex = b->CreateAdd(arrayIndex, ONE);
     163
     164        Value * positivePartialSum = positiveSum;
     165        if (positiveArray) {
     166            positivePartialSum = b->CreateAdd(positiveSum, count);
     167            b->CreateStore(positivePartialSum, b->CreateGEP(positiveArray, nextArrayIndex));
     168        }
     169
     170        Value * negativePartialSum = negativeSum;
    175171        if (negativeArray) {
    176172            Value * negCount = count;
    177             if (!pc.AlwaysNegated) {
     173            if (positiveArray) {
    178174                Constant * blockWidth = b->getSize(b->getBitBlockWidth());
    179175                negCount = b->CreateSub(blockWidth, count);
    180176            }
    181             b->CreateStore(negCount, b->CreateGEP(negativeArray, arrayIndex));
    182         }
    183 
    184         Value * const partialSum = b->CreateAdd(totalSum, count);
    185         Value * const nextArrayIndex = b->CreateAdd(arrayIndex, ONE);
    186         b->CreateStore(partialSum, b->CreateGEP(partialSumArray, nextArrayIndex));
     177            negativePartialSum = b->CreateAdd(negativeSum, negCount);
     178            b->CreateStore(negativePartialSum, b->CreateGEP(negativeArray, nextArrayIndex));
     179        }
     180
     181        Value * const nextSourceIndex = b->CreateAdd(sourceIndex, ONE);
    187182
    188183        BasicBlock * const popCountLoopExit = b->GetInsertBlock();
    189         partialSumArray->addIncoming(partialSumArray, popCountLoopExit);
    190         Value * const nextSourceIndex = b->CreateAdd(sourceIndex, ONE);
    191184        sourceIndex->addIncoming(nextSourceIndex, popCountLoopExit);
    192185        arrayIndex->addIncoming(nextArrayIndex, popCountLoopExit);
    193         totalSum->addIncoming(partialSum, popCountLoopExit);
     186        positiveSum->addIncoming(positivePartialSum, popCountLoopExit);
     187        negativeSum->addIncoming(negativePartialSum, popCountLoopExit);
    194188        if (positiveArray) {
    195189            positiveArray->addIncoming(positiveArray, popCountLoopExit);
     
    212206    const ProcessingRate & rate = binding.getRate();
    213207    const auto bufferVertex = getPopCountReferenceBuffer(mKernel, rate);
    214     PopCountData & pc = getPopCountReference(bufferVertex);
    215208
    216209    std::vector<Value *> indices(3);
    217210    indices[0] = b->getInt32(0);
    218211    indices[1] = b->getInt32(bufferVertex);
    219 
    220     Value * const offset = getPopCountInitialOffset(b, binding, bufferVertex, pc);
    221 
    222     Value * minCount = nullptr;
    223     bool invertCount = false;
    224 
    225     // If we have the independent count, we can read from it to get a count
    226     // w/o subtracting the i-1 th entry of the partial sum.
    227     if (pc.HasArray || pc.HasNegatedArray) {
    228         const bool isNegPopCount = rate.isNegatedPopCount() && pc.HasNegatedArray;
    229         const bool useNegArray = (isNegPopCount || !pc.HasArray);
    230         indices[2] = b->getInt32(useNegArray ? NEGATED_ARRAY_INDEX : POSITIVE_ARRAY_INDEX);
    231         Value * const array = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
    232         minCount = b->CreateLoad(b->CreateGEP(array, offset));
    233         invertCount = useNegArray ^ rate.isNegatedPopCount();
    234     } else { // use the partial sum to calculate the min item count
    235         indices[2] = b->getInt32(PARTIAL_SUM_INDEX);
    236         Value * const partialSum = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
    237         Value * const prior = b->CreateLoad(b->CreateGEP(partialSum, offset));
    238         Constant * const ONE = b->getSize(1);
    239         Value * const current = b->CreateAdd(offset, ONE);
    240         if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    241             const auto refPortNum = getPopCountReferencePort(mKernel, rate);
    242             Value * const total = getTotalItemCount(b, refPortNum);
    243             Value * const strideLength = getInputStrideLength(b, refPortNum);
    244             Value * const term = producerTerminated(b, refPortNum);
    245             Value * const strideLengthMinus1 = b->CreateSub(strideLength, ONE);
    246             Value * const padding = b->CreateSelect(term, strideLengthMinus1, b->getSize(0));
    247             Value * const paddedTotal =  b->CreateAdd(total, padding);
    248             Value * const countLength = b->CreateUDiv(paddedTotal, strideLength);
    249             Value * const sanityCheck = b->CreateICmpULE(current, countLength);
    250             b->CreateAssert(sanityCheck,
    251                             mKernel->getName() + "_" + binding.getName() + ":"
    252                             " pop count minimum linear item count lookup"
    253                             " exceeds reference stream length");
    254         }
    255         Value * const itemCount = b->CreateLoad(b->CreateGEP(partialSum, current));
    256         minCount = b->CreateSub(itemCount, prior);
    257         invertCount = rate.isNegatedPopCount() ^ pc.AlwaysNegated;
    258     }
    259     if (invertCount) {
    260         Constant * const BlockWidth = b->getSize(b->getBitBlockWidth());
    261         minCount = b->CreateSub(BlockWidth, minCount);
    262     }
    263     return minCount;
     212    indices[2] = b->getInt32(rate.isPopCount() ? POSITIVE_ARRAY_INDEX : NEGATED_ARRAY_INDEX);
     213
     214    // Calculate the min item count by subtracting the (i-1) from the i-th entry of the partial sum.
     215    Value * const offset = getPopCountBaseOffset(b, binding, bufferVertex);
     216    Value * const sumArray = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
     217    Value * const prior = b->CreateLoad(b->CreateGEP(sumArray, offset));
     218    Value * const current = b->CreateAdd(offset, b->getSize(1));
     219    Value * const itemCount = b->CreateLoad(b->CreateGEP(sumArray, current));
     220    return b->CreateSub(itemCount, prior);
    264221}
    265222
     
    274231    const ProcessingRate & rate = binding.getRate();
    275232    const auto bufferVertex = getPopCountReferenceBuffer(mKernel, rate);
    276     PopCountData & pc = getPopCountReference(bufferVertex);
    277233
    278234    IntegerType * const sizeTy = b->getSizeTy();
     
    281237    indices[0] = b->getInt32(0);
    282238    indices[1] = b->getInt32(bufferVertex);
    283     indices[2] = b->getInt32(PARTIAL_SUM_INDEX);
    284 
    285     Value * const array = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
     239    indices[2] = b->getInt32(rate.isPopCount() ? POSITIVE_ARRAY_INDEX : NEGATED_ARRAY_INDEX);
     240
     241    Value * const sumArray = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
    286242    const auto prefix = makeBufferName(mKernelIndex, binding) + "_readPopCount";
    287243
     
    296252    Constant * const MAX_INT = ConstantInt::getAllOnesValue(sizeTy);
    297253    // It's possible that our partial sum started "before" the processed position of this kernel...
    298     Value * const offset = getPopCountInitialOffset(b, binding, bufferVertex, pc); assert (offset);
     254    Value * const offset = getPopCountBaseOffset(b, binding, bufferVertex);
    299255    Value * const initialIndex = b->CreateAdd(mNumOfLinearStrides, offset);
    300256    // Add the "skipped items" to the source and peekable item counts
    301     Value * const skippedItems = b->CreateLoad(b->CreateGEP(array, offset));
     257    Value * const skippedItems = b->CreateLoad(b->CreateGEP(sumArray, offset));
    302258    Value * const availableItems = b->CreateAdd(sourceItemCount, skippedItems);
    303259    Value * const peekableItems = b->CreateAdd(peekableItemCount, skippedItems);
     
    311267    nextRequiredItems->addIncoming(MAX_INT, popCountEntry);
    312268
    313     Value * requiredItems = b->CreateLoad(b->CreateGEP(array, index));
    314     if (LLVM_UNLIKELY(pc.AlwaysNegated ^ rate.isNegatedPopCount())) {
    315         Constant * const Log2BlockWidth = b->getSize(std::log2(b->getBitBlockWidth()));
    316         Value * const total = b->CreateShl(index, Log2BlockWidth);
    317         requiredItems = b->CreateSub(total, requiredItems);
    318     }
    319 
     269    Value * requiredItems = b->CreateLoad(b->CreateGEP(sumArray, index));
    320270    if (lookAhead) {
    321271        requiredItems = b->CreateAdd(requiredItems, lookAhead);
     
    345295    const ProcessingRate & rate = binding.getRate();
    346296    const auto bufferVertex = getPopCountReferenceBuffer(mKernel, rate);
    347     PopCountData & pc = getPopCountReference(bufferVertex);
     297
    348298    std::vector<Value *> indices(3);
    349299    indices[0] = b->getInt32(0);
    350300    indices[1] = b->getInt32(bufferVertex);
    351     indices[2] = b->getInt32(PARTIAL_SUM_INDEX);
     301    indices[2] = b->getInt32(rate.isPopCount() ? POSITIVE_ARRAY_INDEX : NEGATED_ARRAY_INDEX);
    352302
    353303    Value * const partialSum = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
    354     Value * const initialOffset = getPopCountInitialOffset(b, binding, bufferVertex, pc);
     304    Value * const initialOffset = getPopCountBaseOffset(b, binding, bufferVertex);
    355305    Value * const baseItemCount = b->CreateLoad(b->CreateGEP(partialSum, initialOffset));
    356306    Value * const strideOffset = b->CreateAdd(mNumOfLinearStrides, initialOffset);
    357     Value * itemCount = b->CreateLoad(b->CreateGEP(partialSum, strideOffset));
    358 
    359     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    360         Value * const sanityCheck = b->CreateICmpULE(baseItemCount, itemCount);
    361         b->CreateAssert(sanityCheck,
    362                         mKernel->getName() + "_" + binding.getName() + ":"
    363                         " pop count base items exceeds total items");
    364     }
    365 
    366     itemCount = b->CreateSub(itemCount, baseItemCount);
    367 
    368     const auto invertCount = rate.isNegatedPopCount() ^ pc.AlwaysNegated;
    369     if (invertCount) {
    370         Constant * const BlockWidth = b->getSize(b->getBitBlockWidth());
    371         Value * const totalItems = b->CreateMul(mNumOfLinearStrides, BlockWidth);
    372         itemCount = b->CreateSub(totalItems, itemCount);
    373     }
    374 
    375     #ifdef PRINT_DEBUG_MESSAGES
    376     const auto prefix = makeBufferName(mKernelIndex, binding);
    377     b->CallPrintInt(prefix + "_popCount_itemCount", itemCount);
    378     #endif
    379 
    380     return itemCount;
     307    Value * const itemCount = b->CreateLoad(b->CreateGEP(partialSum, strideOffset));
     308
     309    return b->CreateSub(itemCount, baseItemCount);
    381310}
    382311
     
    400329
    401330/** ------------------------------------------------------------------------------------------------------------- *
    402  * @brief getPopCountReferenceProcessedCount
    403  ** ------------------------------------------------------------------------------------------------------------- */
    404 inline Value * PipelineCompiler::getPopCountReferenceConsumedCount(BuilderRef b, const unsigned bufferVertex) {
     331 * @brief getPopCountBaseOffset
     332 ** ------------------------------------------------------------------------------------------------------------- */
     333Value * PipelineCompiler::getPopCountBaseOffset(BuilderRef b, const Binding & binding, const unsigned bufferVertex) {
     334    PopCountData & pc = getPopCountData(bufferVertex);
     335    if (pc.InitialOffset == nullptr) {
     336        std::vector<Value *> indices(3);
     337        indices[0] = b->getInt32(0);
     338        indices[1] = b->getInt32(bufferVertex);
     339        indices[2] = b->getInt32(BASE_OFFSET_INDEX);
     340        Value * const baseOffset = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
     341        const auto refPortNum = getPopCountReferencePort(mKernel, binding.getRate());
     342        Value * const itemCount = mAlreadyProcessedPhi[refPortNum];
     343        Value * const strideLength = getInputStrideLength(b, refPortNum);
     344        Value * const strideOffset = b->CreateUDiv(itemCount, strideLength);
     345        pc.InitialOffset = b->CreateSub(strideOffset, baseOffset);
     346    }
     347    return pc.InitialOffset;
     348}
     349
     350/** ------------------------------------------------------------------------------------------------------------- *
     351 * @brief getPopCountNextBaseOffset
     352 ** ------------------------------------------------------------------------------------------------------------- */
     353inline Value * PipelineCompiler::getPopCountNextBaseOffset(BuilderRef b, const unsigned bufferVertex) const {
    405354    const auto e = in_edge(bufferVertex, mBufferGraph);
    406355    const auto outputPort = mBufferGraph[e].Port;
    407     PopCountData & pc = getPopCountReference(bufferVertex);
     356    const PopCountData & pc = getPopCountData(bufferVertex);
    408357    if (pc.UsesConsumedCount) {
    409358        return mConsumedItemCount[outputPort];
     
    420369
    421370/** ------------------------------------------------------------------------------------------------------------- *
    422  * @brief getPopCountBaseOffsetIndex
    423  ** ------------------------------------------------------------------------------------------------------------- */
    424 inline Value * PipelineCompiler::getPopCountInitialOffset(BuilderRef b, const Binding & binding, const unsigned bufferVertex, PopCountData & pc) {
    425     if (pc.InitialOffset == nullptr) {
    426         std::vector<Value *> indices(3);
    427         indices[0] = b->getInt32(0);
    428         indices[1] = b->getInt32(bufferVertex);
    429         indices[2] = b->getInt32(BASE_OFFSET_INDEX);
    430         Value * const baseOffset = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
    431         Value * const strideOffset = getReferenceStreamOffset(b, binding);
    432         if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    433             Value * const sanityCheck = b->CreateICmpUGE(strideOffset, baseOffset);
    434             b->CreateAssert(sanityCheck,
    435                             makeBufferName(mKernelIndex, binding) +
    436                             ": pop count base offset exceeds stride offset");
    437         }
    438         pc.InitialOffset = b->CreateSub(strideOffset, baseOffset);
    439     }
    440     return pc.InitialOffset;
    441 }
    442 
    443 /** ------------------------------------------------------------------------------------------------------------- *
    444  * @brief getReferenceStreamOffset
    445  ** ------------------------------------------------------------------------------------------------------------- */
    446 inline Value * PipelineCompiler::getReferenceStreamOffset(BuilderRef b, const Binding & binding) {
    447     const auto refPortNum = getPopCountReferencePort(mKernel, binding.getRate());
    448     Value * const itemCount = mAlreadyProcessedPhi[refPortNum];
    449     Value * const strideLength = getInputStrideLength(b, refPortNum);
    450     return b->CreateUDiv(itemCount, strideLength);
    451 }
    452 
    453 /** ------------------------------------------------------------------------------------------------------------- *
    454371 * @brief initializePopCountReferenceCounts
    455372 ** ------------------------------------------------------------------------------------------------------------- */
     
    459376    }
    460377    assert (out_degree(bufferVertex, mConsumerGraph) != 0);
    461     PopCountData & pc = getPopCountReference(bufferVertex);
     378    PopCountData & pc = getPopCountData(bufferVertex);
    462379    pc.Processed = pc.UsesConsumedCount ? nullptr : produced.get();
    463380    pc.PhiNode = nullptr;
     
    471388void PipelineCompiler::createPopCountReferenceCounts(BuilderRef b) {
    472389    forEachOutputBufferThatIsAPopCountReference(mKernelIndex, [&](const unsigned bufferVertex) {
    473         PopCountData & pc = getPopCountReference(bufferVertex);
     390        PopCountData & pc = getPopCountData(bufferVertex);
    474391        if (pc.Processed) {
    475392            assert (mKernelEntry);
     
    487404inline void PipelineCompiler::computeMinimumPopCountReferenceCounts(BuilderRef b) {
    488405    forEachPopCountReferenceInputPort(mKernelIndex, [&](const unsigned bufferVertex, const unsigned inputPort) {
    489         PopCountData & pc = getPopCountReference(bufferVertex);
     406        PopCountData & pc = getPopCountData(bufferVertex);
    490407        if (pc.Processed) { assert (pc.PhiNode);
    491408            Value * const processed = mFullyProcessedItemCount[inputPort];
     
    500417inline void PipelineCompiler::updatePopCountReferenceCounts(BuilderRef b) {
    501418    forEachPopCountReferenceInputPort(mKernelIndex, [&](const unsigned bufferVertex, const unsigned inputPort) {
    502         PopCountData & pc = getPopCountReference(bufferVertex);
     419        PopCountData & pc = getPopCountData(bufferVertex);
    503420        if (pc.Processed) { assert (pc.PhiNode);
    504421            pc.PhiNode->addIncoming(pc.Processed, mKernelLoopExitPhiCatch);
     
    523440void PipelineCompiler::addPopCountScalarsToPipelineKernel(BuilderRef b, const unsigned index) {
    524441    forEachOutputBufferThatIsAPopCountReference(index, [&](const unsigned bufferVertex) {
    525         const PopCountData & pc = getPopCountReference(bufferVertex);
     442        const PopCountData & pc = getPopCountData(bufferVertex);
    526443        if (LLVM_UNLIKELY(!pc.UsesConsumedCount)) {
    527444            const auto e = in_edge(bufferVertex, mBufferGraph);
     
    537454 * @brief getNegatedPopCountArray
    538455 ** ------------------------------------------------------------------------------------------------------------- */
    539 Value * PipelineCompiler::getIndividualPopCountArray(BuilderRef b, const unsigned inputPort, const unsigned popCountArrayIndex) {
     456Value * PipelineCompiler::getIndividualPopCountArray(BuilderRef b, const unsigned inputPort, const unsigned arrayTypeIndex) {
    540457    const auto bufferVertex = getInputBufferVertex(inputPort);
    541458    std::vector<Value *> indices(3);
    542459    indices[0] = b->getInt32(0);
    543460    indices[1] = b->getInt32(bufferVertex);
    544     indices[2] = b->getInt32(popCountArrayIndex);
     461    indices[2] = b->getInt32(arrayTypeIndex);
    545462    Value * array = b->CreateLoad(b->CreateGEP(mPopCountState, indices));
    546463    indices[2] = b->getInt32(BASE_OFFSET_INDEX);
     
    556473 * @brief getPopCountArray
    557474 ** ------------------------------------------------------------------------------------------------------------- */
    558 inline Value * PipelineCompiler::getPopCountArray(BuilderRef b, const unsigned inputPort) {
     475inline Value * PipelineCompiler::getPositivePopCountArray(BuilderRef b, const unsigned inputPort) {
    559476    return getIndividualPopCountArray(b, inputPort, POSITIVE_ARRAY_INDEX);
    560477}
     
    563480 * @brief getNegatedPopCountArray
    564481 ** ------------------------------------------------------------------------------------------------------------- */
    565 inline Value * PipelineCompiler::getNegatedPopCountArray(BuilderRef b, const unsigned inputPort) {
     482inline Value * PipelineCompiler::getNegativePopCountArray(BuilderRef b, const unsigned inputPort) {
    566483    return getIndividualPopCountArray(b, inputPort, NEGATED_ARRAY_INDEX);
    567484}
     
    580497
    581498    Constant * const CAPACITY = b->getInt32(CAPACITY_INDEX);
    582     Constant * const PARTIAL_SUM = b->getInt32(PARTIAL_SUM_INDEX);
    583499    Constant * const POSITIVE_ARRAY = b->getInt32(POSITIVE_ARRAY_INDEX);
    584500    Constant * const NEGATIVE_ARRAY = b->getInt32(NEGATED_ARRAY_INDEX);
     
    594510            const auto e = in_edge(i, mBufferGraph);
    595511            const BufferRateData & rd = mBufferGraph[e];
    596             // const BufferNode & bn = mBufferGraph[source(e, mBufferGraph)];
    597             const PopCountData & pc = getPopCountReference(i);
    598             // const auto required = (bn.upper * rd.maximum) / pc.FieldWidth;
     512            const PopCountData & pc = getPopCountData(i);
    599513            const auto required = rd.Maximum / pc.FieldWidth;
    600514            Constant * const initialSize = b->getSize(ceiling(required));
     
    603517            indices[2] = CAPACITY;
    604518            b->CreateStore(initialSize, b->CreateGEP(popCountState, indices));
    605             indices[2] = PARTIAL_SUM;
    606 
    607 
    608             Value * const partialSum = b->CreateCacheAlignedMalloc(sizeTy, initialSize);
    609             b->CreateStore(partialSum, b->CreateGEP(popCountState, indices));
    610             b->CreateStore(ZERO, partialSum);
    611             if (pc.HasArray) {
     519
     520            if (hasPositivePopCountArray(i)) {
    612521                indices[2] = POSITIVE_ARRAY;
    613522                Value * const array = b->CreateCacheAlignedMalloc(sizeTy, initialSize);
    614523                b->CreateStore(array, b->CreateGEP(popCountState, indices));
     524                b->CreateStore(ZERO, array);
    615525            }
    616             if (pc.HasNegatedArray) {
     526            if (hasNegativePopCountArray(i)) {
    617527                indices[2] = NEGATIVE_ARRAY;
    618528                Value * const array = b->CreateCacheAlignedMalloc(sizeTy, initialSize);
    619529                b->CreateStore(array, b->CreateGEP(popCountState, indices));
     530                b->CreateStore(ZERO, array);
    620531            }
    621532        }
     
    633544    const auto lastBuffer = num_vertices(mBufferGraph);
    634545    assert (firstBuffer <= lastBuffer);
    635 
    636     Constant * const PARTIAL_SUM = b->getInt32(PARTIAL_SUM_INDEX);
    637546    Constant * const POSITIVE_ARRAY = b->getInt32(POSITIVE_ARRAY_INDEX);
    638547    Constant * const NEGATIVE_ARRAY = b->getInt32(NEGATED_ARRAY_INDEX);
     
    644553        if (LLVM_UNLIKELY(in_degree(i, mPopCountGraph) != 0)) {
    645554            indices[1] = b->getInt32(i);
    646             indices[2] = PARTIAL_SUM;
    647             b->CreateFree(b->CreateLoad(b->CreateGEP(popCountState, indices)));
    648             const PopCountData & pc = getPopCountReference(i);
    649             if (pc.HasArray) {
     555            if (hasPositivePopCountArray(i)) {
    650556                indices[2] = POSITIVE_ARRAY;
    651557                b->CreateFree(b->CreateLoad(b->CreateGEP(popCountState, indices)));
    652558            }
    653             if (pc.HasNegatedArray) {
     559            if (hasNegativePopCountArray(i)) {
    654560                indices[2] = NEGATIVE_ARRAY;
    655561                b->CreateFree(b->CreateLoad(b->CreateGEP(popCountState, indices)));
     
    694600            std::vector<Type *> state(POP_COUNT_MAX_FIELDS);
    695601
    696             const auto & pc = getPopCountReference(i);
    697602            state[BASE_OFFSET_INDEX] = sizeTy;
    698603            state[CAPACITY_INDEX] = sizeTy;
    699             state[PARTIAL_SUM_INDEX] = sizePtrTy;
    700             state[POSITIVE_ARRAY_INDEX] = pc.HasArray ? sizePtrTy : emptyTy;
    701             state[NEGATED_ARRAY_INDEX] = pc.HasNegatedArray ? sizePtrTy : emptyTy;
     604            state[POSITIVE_ARRAY_INDEX] = hasPositivePopCountArray(i) ? sizePtrTy : emptyTy;
     605            state[NEGATED_ARRAY_INDEX] = hasNegativePopCountArray(i) ? sizePtrTy : emptyTy;
    702606            types[i] = StructType::get(b->getContext(), state);
    703607        }
     
    709613 * @brief getPopCountReference
    710614 ** ------------------------------------------------------------------------------------------------------------- */
    711 LLVM_READNONE PopCountData & PipelineCompiler::getPopCountReference(const unsigned bufferVertex) {
     615LLVM_READNONE PopCountData & PipelineCompiler::getPopCountData(const unsigned bufferVertex) const {
    712616    const auto f = mPopCountData.find(bufferVertex);
    713617    assert (f != mPopCountData.end());
    714     return f->second;
     618    return const_cast<PopCountData &>(f->second);
    715619}
    716620
     
    722626    pc.FieldWidth =
    723627        popCountReferenceFieldWidth(bufferVertex);
    724     std::tie(pc.HasArray, pc.HasNegatedArray) =
    725         popCountReferenceRequiresPopCountArray(bufferVertex);
    726     pc.AlwaysNegated =
    727         pc.HasArray ? false : popCountReferenceIsAlwaysNegated(bufferVertex);
    728628    pc.UsesConsumedCount =
    729629        popCountReferenceCanUseConsumedItemCount(bufferVertex);
     
    756656}
    757657
    758 #if 0
    759 
    760 /** ------------------------------------------------------------------------------------------------------------- *
    761  * @brief popCountReferenceRequiresBaseOffset
    762  *
    763  * Determine that all kernels that use this buffer progress at the same rate. This requires not only that the
    764  * inputs are processed at the same rate but also that they are produced at it. Similarly, any data the kernel
    765  * produces that is produced into a non-DynamicBuffer must also guaranteed to be consumed at the same rate.
    766  ** ------------------------------------------------------------------------------------------------------------- */
    767 bool PipelineCompiler::popCountReferenceRequiresBaseOffset(const unsigned bufferVertex) const {
    768 // TODO: do we need to prove that the single kernel only ever requires a single iteration to fully
    769 // consume it's data?
    770 
    771 
    772 //    // if this reference is used by one kernel, we're trivially guaranteed this holds
    773 //    if (popCountBufferIsUsedBySingleKernel(bufferVertex)) {
    774 //        return false;
    775 //    }
    776 
    777 
    778 //    // TODO: take the transitive closure of the buffer graph, annotated with the processing rates.
    779 //    // we can handle fixed rates by simply checking that the upper/lower bound of every reachable
    780 //    // kernel is equivalent but the same notion doesn't work for popcounts. If a kernel is not
    781 //    // dependent on two different popcount rates (w.r.t. the reference buffer), can we use the
    782 //    // partial sum?
    783     return true;
    784 }
    785 
    786 /** ------------------------------------------------------------------------------------------------------------- *
    787  * @brief popCountBufferIsUsedBySingleKernel
    788  ** ------------------------------------------------------------------------------------------------------------- */
    789 bool PipelineCompiler::popCountBufferIsUsedBySingleKernel(const unsigned bufferVertex) const {
    790     // if we have only one incoming port, we're trivially guaranteed this holds
    791     if (in_degree(bufferVertex, mPopCountGraph) == 1) {
    792         return true;
    793     }
    794     flat_set<unsigned> K;
    795     // buffer <- port <- kernel
    796     for (const auto e : make_iterator_range(in_edges(bufferVertex, mPopCountGraph))) {
    797         const auto p = source(e, mPopCountGraph);
    798         const auto f = first_in_edge(p, mPopCountGraph);
    799         const auto k = source(f, mPopCountGraph);
    800         K.insert(k);
    801     }
    802     return K.size() == 1;
    803 }
    804 
    805 #endif
    806 
    807 /** ------------------------------------------------------------------------------------------------------------- *
    808  * @brief popCountReferenceRequiresPopCountArray
    809  *
    810  * Check if the kernel itself requires a popcount array to perform iteration.
    811  ** ------------------------------------------------------------------------------------------------------------- */
    812 inline std::pair<bool, bool> PipelineCompiler::popCountReferenceRequiresPopCountArray(const unsigned bufferVertex) const {
    813     bool hasArray = false, hasNegatedArray = false;
    814     for (const auto e : make_iterator_range(out_edges(bufferVertex, mBufferGraph))) {
    815         const auto port = mBufferGraph[e].Port;
    816         const auto kernelVertex = target(e, mBufferGraph);
    817         const Kernel * const consumer = mPipeline[kernelVertex];
    818         const Binding & b = consumer->getInputStreamSetBinding(port);
    819         if (LLVM_UNLIKELY(b.hasAttribute(AttrId::RequiresPopCountArray))) {
    820             hasArray = true;
    821         }
    822         if (LLVM_UNLIKELY(b.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
    823             hasNegatedArray = true;
    824         }
    825     }
    826     return std::make_pair(hasArray, hasNegatedArray);
    827 }
    828 
    829658/** ------------------------------------------------------------------------------------------------------------- *
    830659 * @brief popCountReferenceCanUseConsumedItemCount
     
    855684
    856685/** ------------------------------------------------------------------------------------------------------------- *
    857  * @brief popCountReferenceIsAlwaysNegated
    858  ** ------------------------------------------------------------------------------------------------------------- */
    859 inline bool PipelineCompiler::popCountReferenceIsAlwaysNegated(const unsigned bufferVertex) const {
    860     for (const auto e : make_iterator_range(in_edges(bufferVertex, mPopCountGraph))) {
    861         if (LLVM_LIKELY(mPopCountGraph[e].Type != CountingType::Negative)) {
    862             return false;
    863         }
    864     }
    865     return true;
     686 * @brief hasPositivePopCountArray
     687 ** ------------------------------------------------------------------------------------------------------------- */
     688bool PipelineCompiler::hasPositivePopCountArray(const unsigned bufferVertex) const {
     689    for (const auto & e : make_iterator_range(in_edges(bufferVertex, mPopCountGraph))) {
     690        if (mPopCountGraph[e].Type & CountingType::Positive) {
     691            return true;
     692        }
     693    }
     694    return false;
     695}
     696
     697/** ------------------------------------------------------------------------------------------------------------- *
     698 * @brief hasNegativePopCountArray
     699 ** ------------------------------------------------------------------------------------------------------------- */
     700bool PipelineCompiler::hasNegativePopCountArray(const unsigned bufferVertex) const {
     701    for (const auto & e : make_iterator_range(in_edges(bufferVertex, mPopCountGraph))) {
     702        if (mPopCountGraph[e].Type & CountingType::Negative) {
     703            return true;
     704        }
     705    }
     706    return false;
    866707}
    867708
     
    902743}
    903744
    904 
    905 }
     745}
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/termination_logic.hpp

    r6267 r6272  
    1818TerminationGraph PipelineCompiler::makeTerminationGraph() {
    1919
    20     using Graph = TerminationGraph;
    21 
    22     using Vertex = Graph::vertex_descriptor;
    23     using Edge = Graph::edge_descriptor;
     20    using Vertex = TerminationGraph::vertex_descriptor;
     21    using Edge = TerminationGraph::edge_descriptor;
    2422    using VertexVector = std::vector<Vertex>;
    2523
     
    3432    // how would this affect termination?
    3533
    36     Graph G(n);
     34    TerminationGraph G(n);
    3735
    3836    // 1) copy and summarize producer -> consumer relations from the buffer graph
     
    281279 ** ------------------------------------------------------------------------------------------------------------- */
    282280inline void PipelineCompiler::updateTerminationSignal(Value * const signal) {
    283     const auto pathId = mTerminationGraph[mKernelIndex];
    284     mTerminationSignals[pathId] = signal;
     281    mTerminationSignals[mTerminationGraph[mKernelIndex]] = signal;
    285282}
    286283
     
    311308    Value * const ptr = b->getScalarFieldPtr(TERMINATION_PREFIX + std::to_string(pathId));
    312309    b->setKernel(mKernel);
    313     Constant * const signal = getTerminationSignal(b, mKernelIndex);
    314     b->CreateStore(signal, ptr, true);
     310    b->CreateStore(getTerminationSignal(b, mKernelIndex), ptr, true);
    315311}
    316312
Note: See TracChangeset for help on using the changeset viewer.