Changeset 6275


Ignore:
Timestamp:
Jan 6, 2019, 4:33:48 PM (3 months ago)
Author:
nmedfort
Message:

More work on optimizing for stateless kernels

Location:
icGREP/icgrep-devel/icgrep
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6258 r6275  
    366366add_custom_target (check
    367367  COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure
    368   DEPENDS icgrep u8u16 u32u8 base64 editd abc_gen)
     368  DEPENDS icgrep u8u16 u32u8 base64 editd abc_gen idisa_test)
    369369
    370370add_custom_target (perf_icgrep
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r6274 r6275  
    147147 ** ------------------------------------------------------------------------------------------------------------- */
    148148void Kernel::addKernelDeclarations(const std::unique_ptr<KernelBuilder> & b) {
    149     if (mKernelStateType == nullptr) {
    150         throw std::runtime_error("Kernel state definition " + getName() + " has not been finalized.");
     149    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
     150        llvm_unreachable("Kernel state must be constructed prior to calling addKernelDeclarations");
    151151    }
    152152    addInitializeDeclaration(b);
     
    162162
    163163    std::vector<Type *> params;
    164     params.push_back(mKernelStateType->getPointerTo());
     164    if (LLVM_LIKELY(isStateful())) {
     165        params.push_back(mKernelStateType->getPointerTo());
     166    }
    165167    for (const Binding & binding : mInputScalars) {
    166168        params.push_back(binding.getType());
     
    172174    initFunc->setDoesNotThrow();
    173175    auto args = initFunc->arg_begin();
    174     args->setName("self");
     176    if (LLVM_LIKELY(isStateful())) {
     177        (args++)->setName("handle");
     178    }
    175179    for (const Binding & binding : mInputScalars) {
    176         (++args)->setName(binding.getName());
    177     }
    178 
    179     assert (std::next(args) == initFunc->arg_end());
     180        (args++)->setName(binding.getName());
     181    }
     182
     183    assert (args == initFunc->arg_end());
    180184}
    181185
     
    190194    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
    191195    auto args = mCurrentMethod->arg_begin();
    192     setHandle(b, &*args);
    193     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
    194         b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
    195     }
    196     b->CreateStore(ConstantAggregateZero::get(mKernelStateType), getHandle());
     196    if (LLVM_LIKELY(isStateful())) {
     197        setHandle(b, &*(args++));
     198    }
     199    if (LLVM_LIKELY(isStateful())) {
     200        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     201            b->CreateMProtect(mHandle, CBuilder::Protect::WRITE);
     202        }
     203        b->CreateStore(ConstantAggregateZero::get(mKernelStateType), mHandle);
     204    }
    197205    for (const auto & binding : mInputScalars) {
    198         b->setScalarField(binding.getName(), &*(++args));
     206        b->setScalarField(binding.getName(), &*(args++));
    199207    }
    200208    const auto numOfOutputs = mOutputStreamSets.size();
     
    211219    initializeLocalScalarValues(b);
    212220    generateInitializeMethod(b);
    213     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     221    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect) && isStateful())) {
    214222        b->CreateMProtect(mHandle, CBuilder::Protect::READ);
    215223    }
     
    259267    std::vector<Type *> params;
    260268    params.reserve(2 + mInputStreamSets.size() + mOutputStreamSets.size());
    261     params.push_back(mKernelStateType->getPointerTo());  // handle
     269    if (LLVM_LIKELY(isStateful())) {
     270        params.push_back(mKernelStateType->getPointerTo());  // handle
     271    }
    262272    params.push_back(sizeTy); // numOfStrides
    263273    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
     
    309319    doSegment->setDoesNotThrow();
    310320    auto args = doSegment->arg_begin();
    311     args->setName("self");
    312     (++args)->setName("numOfStrides");
     321    if (LLVM_LIKELY(isStateful())) {
     322        (args++)->setName("handle");
     323    }
     324    (args++)->setName("numOfStrides");
    313325    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
    314326        const Binding & input = mInputStreamSets[i];
    315         (++args)->setName(input.getName());
     327        (args++)->setName(input.getName());
    316328        if (LLVM_LIKELY(hasParam(input))) {
    317             (++args)->setName(input.getName() + "_processed");
    318         }
    319         (++args)->setName(input.getName() + "_accessible");
     329            (args++)->setName(input.getName() + "_processed");
     330        }
     331        (args++)->setName(input.getName() + "_accessible");
    320332        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresPopCountArray))) {
    321             (++args)->setName(input.getName() + "_popCountArray");
     333            (args++)->setName(input.getName() + "_popCountArray");
    322334        }
    323335        if (LLVM_UNLIKELY(input.hasAttribute(AttrId::RequiresNegatedPopCountArray))) {
    324             (++args)->setName(input.getName() + "_negatedPopCountArray");
     336            (args++)->setName(input.getName() + "_negatedPopCountArray");
    325337        }
    326338    }
     
    328340        const Binding & output = mOutputStreamSets[i];
    329341        if (LLVM_LIKELY(!isLocalBuffer(output))) {
    330             (++args)->setName(output.getName());
     342            (args++)->setName(output.getName());
    331343        }
    332344        if (LLVM_LIKELY(hasParam(output))) {
    333             (++args)->setName(output.getName() + "_produced");
     345            (args++)->setName(output.getName() + "_produced");
    334346        }
    335347        if (LLVM_LIKELY(isLocalBuffer(output))) {
    336             (++args)->setName(output.getName() + "_consumed");
     348            (args++)->setName(output.getName() + "_consumed");
    337349        } else {
    338             (++args)->setName(output.getName() + "_writable");
    339         }
    340     }
    341     assert (std::next(args) == doSegment->arg_end());
     350            (args++)->setName(output.getName() + "_writable");
     351        }
     352    }
     353    assert (args == doSegment->arg_end());
    342354}
    343355
     
    358370    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
    359371    auto args = mCurrentMethod->arg_begin();
    360     setHandle(b, &*(args++));
     372    if (LLVM_LIKELY(isStateful())) {
     373        setHandle(b, &*(args++));
     374    }
    361375    mNumOfStrides = &*(args++);
    362376    mIsFinal = b->CreateIsNull(mNumOfStrides);
     
    578592        }
    579593    }
    580     PointerType * const selfType = mKernelStateType->getPointerTo();
    581     FunctionType * const terminateType = FunctionType::get(resultType, {selfType}, false);
     594    std::vector<Type *> params;
     595    if (LLVM_LIKELY(isStateful())) {
     596        params.push_back(mKernelStateType->getPointerTo());
     597    }
     598    FunctionType * const terminateType = FunctionType::get(resultType, params, false);
    582599    Function * const terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, b->getModule());
    583600    terminateFunc->setCallingConv(CallingConv::C);
    584601    terminateFunc->setDoesNotThrow();
    585602    auto args = terminateFunc->arg_begin();
    586     args->setName("self");
    587     assert (std::next(args) == terminateFunc->arg_end());
     603    if (LLVM_LIKELY(isStateful())) {
     604        (args++)->setName("handle");
     605    }
     606    assert (args == terminateFunc->arg_end());
    588607}
    589608
     
    597616    mCurrentMethod = getTerminateFunction(b->getModule());
    598617    b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
    599     auto args = mCurrentMethod->arg_begin();
    600     setHandle(b, &*(args++));
     618    if (LLVM_LIKELY(isStateful())) {
     619        auto args = mCurrentMethod->arg_begin();
     620        setHandle(b, &*(args++));
     621        assert (args == mCurrentMethod->arg_end());
     622    }
    601623    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
    602624        b->CreateMProtect(mHandle,CBuilder::Protect::WRITE);
     
    613635    generateFinalizeMethod(b); // may be overridden by the Kernel subtype
    614636    const auto outputs = getFinalOutputScalars(b);
    615     b->CreateFree(mHandle);
     637    if (LLVM_LIKELY(isStateful())) {
     638        b->CreateFree(mHandle);
     639    }
    616640    mHandle = nullptr;
    617641    if (outputs.empty()) {
     
    713737void Kernel::prepareKernel(const std::unique_ptr<KernelBuilder> & b) {
    714738    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    715         report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
     739        llvm_unreachable("Cannot call prepareKernel after constructing kernel state type");
    716740    }
    717741    if (LLVM_UNLIKELY(mStride == 0)) {
     
    727751    if (LLVM_LIKELY(mKernelStateType == nullptr)) {
    728752        std::vector<Type *> fields;
    729         fields.reserve(mInputScalars.size() + mOutputScalars.size() + mInternalScalars.size() + 1);
     753        fields.reserve(mInputScalars.size() + mOutputScalars.size() + mInternalScalars.size());
    730754        for (const Binding & scalar : mInputScalars) {
    731755            assert (scalar.getType());
     
    768792void Kernel::prepareCachedKernel(const std::unique_ptr<KernelBuilder> & b) {
    769793    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    770         report_fatal_error(getName() + ": cannot prepare kernel after kernel state finalized");
     794        llvm_unreachable("Cannot call prepareCachedKernel after constructing kernel state type");
    771795    }
    772796    addBaseKernelProperties(b);
    773797    mKernelStateType = getModule()->getTypeByName(getName());
     798    // If we have a stateless object, the type would be optimized out of the
     799    // cached IR. Consequently, we create a dummy "empty struct" to simplify
     800    // the assumptions of the other Kernel functions.
    774801    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
    775         report_fatal_error("Kernel definition for " + getName() + " was not found in the cache!");
     802        mKernelStateType = StructType::get(b->getContext());
    776803    }
    777804    assert (isa<StructType>(mKernelStateType));
     
    822849 ** ------------------------------------------------------------------------------------------------------------- */
    823850Value * Kernel::createInstance(const std::unique_ptr<KernelBuilder> & b) {
    824     assert (mKernelStateType && "cannot create instance before calling prepareKernel() or prepareCachedKernel()");
    825     Constant * const size = ConstantExpr::getSizeOf(mKernelStateType);
    826     Value * handle = nullptr;
    827     if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
    828         handle = b->CreateAlignedMalloc(size, b->getPageSize());
    829         b->CreateMProtect(handle, size, CBuilder::Protect::READ);
     851    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
     852        llvm_unreachable("Kernel state must be constructed prior to calling createInstance");
     853    }
     854    if (LLVM_LIKELY(isStateful())) {
     855        Constant * const size = ConstantExpr::getSizeOf(mKernelStateType);
     856        Value * handle = nullptr;
     857        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableMProtect))) {
     858            handle = b->CreateAlignedMalloc(size, b->getPageSize());
     859            b->CreateMProtect(handle, size, CBuilder::Protect::READ);
     860        } else {
     861            handle = b->CreateAlignedMalloc(size, b->getCacheAlignment());
     862        }
     863        return b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
    830864    } else {
    831         handle = b->CreateAlignedMalloc(size, b->getCacheAlignment());
    832     }
    833     return b->CreatePointerCast(handle, mKernelStateType->getPointerTo());
     865        llvm_unreachable("createInstance should not be called on stateless kernels");
     866        return nullptr;
     867    }
    834868}
    835869
     
    894928Value * Kernel::getScalarFieldPtr(KernelBuilder & b, const StringRef name) const {
    895929    const auto & field = getScalarField(name);
    896     assert (mKernelStateType);
     930    if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
     931        llvm_unreachable("Kernel state must be constructed prior to calling getScalarFieldPtr");
     932    }
    897933    unsigned index = field.Index;
    898934    switch (field.Type) {
     
    932968
    933969/** ------------------------------------------------------------------------------------------------------------- *
    934  * @brief isStateless
    935  ** ------------------------------------------------------------------------------------------------------------- */
    936 bool Kernel::isStateless() const {
    937     #warning return whether the kernel struct is zero-length; move cycle count to pipeline first.
    938     return false;
    939 }
    940 
    941 /** ------------------------------------------------------------------------------------------------------------- *
    942970 * @brief getInputScalarBinding
    943971 ** ------------------------------------------------------------------------------------------------------------- */
     
    11561184    std::string tmp;
    11571185    raw_string_ostream out(tmp);
    1158     out << "F";
     1186    if (LLVM_LIKELY(isStateful())) {
     1187        out << "F";
     1188    } else {
     1189        out << "L";
     1190    }
    11591191    out << getStride();
    11601192    AttributeSet::print(out);
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r6274 r6275  
    148148    virtual bool isCachable() const { return false; }
    149149
    150     bool isStateless() const;
     150    LLVM_READNONE bool isStateful() const {
     151        if (LLVM_UNLIKELY(mKernelStateType == nullptr)) {
     152            llvm_unreachable("kernel state must be constructed prior to calling isStateful");
     153        }
     154        return !mKernelStateType->isEmptyTy();
     155    }
    151156
    152157    unsigned getStride() const { return mStride; }
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/consumer_logic.hpp

    r6261 r6275  
    132132    const auto pe = in_edge(bufferVertex, mConsumerGraph);
    133133    const auto producerVertex = source(pe, mConsumerGraph);
    134     const Kernel * const producer = mPipeline[producerVertex]; assert (producer->getHandle());
     134    const Kernel * const producer = mPipeline[producerVertex];
    135135    const auto outputPort = mConsumerGraph[pe];
    136136    const Binding & output = producer->getOutputStreamSetBinding(outputPort);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/cycle_counter_logic.hpp

    r6274 r6275  
    88 * @brief addInternalKernelCycleCountProperties
    99 ** ------------------------------------------------------------------------------------------------------------- */
    10 inline void PipelineCompiler::addInternalKernelCycleCountProperties(BuilderRef b, const unsigned kernel) {
     10inline void PipelineCompiler::addCycleCounterProperties(BuilderRef b, const unsigned kernel) {
    1111    if (LLVM_UNLIKELY(DebugOptionIsSet(codegen::EnableCycleCounter))) {
    1212        const auto prefix = makeKernelName(kernel);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/kernel_logic.hpp

    r6272 r6275  
    1919    mKernelIndex = index;
    2020    mKernel = mPipeline[index];
    21     assert (mKernel);
    2221    b->setKernel(mPipelineKernel);
    23     Value * handle = b->getScalarField(makeKernelName(index));
    24     if (mKernel->hasFamilyName()) {
    25         handle = b->CreateBitCast(handle, mKernel->getKernelType()->getPointerTo());
    26     }
    27     mPipeline[index]->setHandle(b, handle);
     22    if (LLVM_LIKELY(mKernel->isStateful())) {
     23        Value * handle = b->getScalarField(makeKernelName(index));
     24        if (mKernel->hasFamilyName()) {
     25            handle = b->CreateBitCast(handle, mKernel->getKernelType()->getPointerTo());
     26        }
     27        mKernel->setHandle(b, handle);
     28    }
    2829    b->setKernel(mKernel);
    2930}
     
    438439    std::vector<Value *> args;
    439440    args.reserve((numOfInputs + numOfOutputs) * 4 + 2);
    440     args.push_back(mKernel->getHandle());
     441    if (LLVM_LIKELY(mKernel->isStateful())) {
     442        args.push_back(mKernel->getHandle());
     443    }
    441444    args.push_back(mNumOfLinearStrides);
    442445    for (unsigned i = 0; i < numOfInputs; ++i) {
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_builder.cpp

    r6274 r6275  
    1616#warning the pipeline ordering should be canonicalized to ensure that when multiple kernels could be scheduled the same one will always be chosen.
    1717
    18 
    1918using namespace llvm;
    2019using namespace boost;
     
    2928 ** ------------------------------------------------------------------------------------------------------------- */
    3029void * ProgramBuilder::compile() {
    31     PipelineKernel * const pk =
    32         cast<PipelineKernel>(makeKernel());
     30    // generate any nested kernels
     31    mDriver.generateUncachedKernels();
     32    // generate the actual pipeline (unless we can extract it from the cache)
     33    PipelineKernel * const pk = cast<PipelineKernel>(makeKernel());
    3334    pk->initializeBindings(mDriver);
    3435    mDriver.addKernel(pk);
    3536    mDriver.generateUncachedKernels();
    36     Function * const main =
    37         addOrDeclareMainFunction(pk);
     37    Function * const main = addOrDeclareMainFunction(pk);
    3838    return mDriver.finalizeObject(main);
    3939}
     
    324324            if (k->hasFamilyName()) {
    325325                const auto kn = PipelineKernel::makeKernelName(k, index[i] + 1);
    326                 addInputScalar(addrPtrTy, kn);
     326                if (LLVM_LIKELY(k->isStateful())) {
     327                    addInputScalar(addrPtrTy, kn);
     328                }
    327329                addInputScalar(addrPtrTy, kn + INITIALIZE_FUNCTION_POINTER_SUFFIX);
    328330                addInputScalar(addrPtrTy, kn + DO_SEGMENT_FUNCTION_POINTER_SUFFIX);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_compiler.hpp

    r6274 r6275  
    5757
    5858struct BufferNode {
    59     Value *             TotalItems;
    60 
    61     StreamSetBuffer *   Buffer;
    62 
    63     RateValue           Lower;
    64     RateValue           Upper;
    65 
    66     unsigned            Overflow;
    67     unsigned            Fasimile;
    68 
    69     BufferType          Type;
    70 
    71     BufferNode() : TotalItems(nullptr), Buffer(nullptr), Lower(), Upper(), Overflow(0), Fasimile(0), Type(BufferType::Internal) {}
     59    Value *             TotalItems = nullptr;
     60    StreamSetBuffer *   Buffer = nullptr;
     61    RateValue           Lower{};
     62    RateValue           Upper{};
     63    unsigned            Overflow = 0;
     64    unsigned            Fasimile = 0;
     65    BufferType          Type = BufferType::Internal;
    7266};
    7367
     
    178172    void acquireCurrentSegment(BuilderRef b);
    179173    void releaseCurrentSegment(BuilderRef b);
     174    LLVM_READNONE bool requiresSynchronization(const unsigned kernelIndex) const;
    180175
    181176// main pipeline functions
     
    356351// cycle counter functions
    357352
    358     void addInternalKernelCycleCountProperties(BuilderRef b, const unsigned kernel);
     353    void addCycleCounterProperties(BuilderRef b, const unsigned kernel);
    359354    void startOptionalCycleCounter(BuilderRef b);
    360355    void updateOptionalCycleCounter(BuilderRef b);
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_kernel.cpp

    r6272 r6275  
    4141            if (kernel->hasFamilyName()) {
    4242                kernel->addKernelDeclarations(b);
    43                 Value * const handle = kernel->createInstance(b);
    4443                PointerType * const voidPtrTy = b->getVoidPtrTy();
    45                 args.push_back(b->CreatePointerCast(handle, voidPtrTy));
     44                if (LLVM_UNLIKELY(kernel->isStateful())) {
     45                    Value * const handle = kernel->createInstance(b);
     46                    args.push_back(b->CreatePointerCast(handle, voidPtrTy));
     47                }
    4648                args.push_back(b->CreatePointerCast(kernel->getInitFunction(m), voidPtrTy));
    4749                args.push_back(b->CreatePointerCast(kernel->getDoSegmentFunction(m), voidPtrTy));
     
    9597        Value * result = b->CreateCall(getTerminateFunction(b->getModule()), { mHandle });
    9698        mHandle = nullptr;
    97         if (mOutputScalars.empty()) {
    98             assert (!result || result->getType()->isVoidTy());
     99        if (LLVM_LIKELY(mOutputScalars.empty())) {
     100            assert ("pipeline termination must have output scalars or a void return type!" && result->getType()->isVoidTy());
    99101            result = nullptr;
    100102        }
  • icGREP/icgrep-devel/icgrep/kernels/pipeline/pipeline_logic.hpp

    r6274 r6275  
    4545        addConsumerKernelProperties(b, i);
    4646        addPopCountScalarsToPipelineKernel(b, i);
     47        addCycleCounterProperties(b, i);
    4748    }
    4849    b->setKernel(mPipelineKernel);
     
    5354 ** ------------------------------------------------------------------------------------------------------------- */
    5455inline void PipelineCompiler::addInternalKernelProperties(BuilderRef b, const unsigned kernelIndex) {
    55 
    56     IntegerType * const sizeTy = b->getSizeTy();
    57 
    58     const auto name = makeKernelName(kernelIndex);
    59 
    60     mPipelineKernel->addInternalScalar(sizeTy, name + LOGICAL_SEGMENT_SUFFIX);
    61 
    62     // TODO: non deferred item count for fixed rates could be calculated from total # of segments.
    6356    const Kernel * const kernel = mPipeline[kernelIndex];
    64     const auto numOfInputs = kernel->getNumOfStreamInputs();
    65     for (unsigned i = 0; i < numOfInputs; i++) {
    66         const Binding & input = kernel->getInputStreamSetBinding(i);
    67         const auto prefix = makeBufferName(kernelIndex, input);
    68         if (input.isDeferred()) {
    69             mPipelineKernel->addInternalScalar(sizeTy, prefix + DEFERRED_ITEM_COUNT_SUFFIX);
    70         }
    71         mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
    72     }
    73 
    74     const auto numOfOutputs = kernel->getNumOfStreamOutputs();
    75     for (unsigned i = 0; i < numOfOutputs; i++) {
    76         const Binding & output = kernel->getOutputStreamSetBinding(i);
    77         const auto prefix = makeBufferName(kernelIndex, output);
    78         mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
    79     }
    80 
    81     addInternalKernelCycleCountProperties(b, kernelIndex);
     57    if (requiresSynchronization(kernelIndex)) {
     58
     59        IntegerType * const sizeTy = b->getSizeTy();
     60        const auto name = makeKernelName(kernelIndex);
     61        mPipelineKernel->addInternalScalar(sizeTy, name + LOGICAL_SEGMENT_SUFFIX);
     62
     63        // TODO: non deferred item count for fixed rates could be calculated from total # of segments.
     64        const auto numOfInputs = kernel->getNumOfStreamInputs();
     65        for (unsigned i = 0; i < numOfInputs; i++) {
     66            const Binding & input = kernel->getInputStreamSetBinding(i);
     67            const auto prefix = makeBufferName(kernelIndex, input);
     68            if (input.isDeferred()) {
     69                mPipelineKernel->addInternalScalar(sizeTy, prefix + DEFERRED_ITEM_COUNT_SUFFIX);
     70            }
     71            // If we've proven we do not need synchronization then we've already proven that
     72            // we can calculate the item count and num of strides from the input item counts
     73            mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
     74        }
     75
     76        const auto numOfOutputs = kernel->getNumOfStreamOutputs();
     77        for (unsigned i = 0; i < numOfOutputs; i++) {
     78            const Binding & output = kernel->getOutputStreamSetBinding(i);
     79            const auto prefix = makeBufferName(kernelIndex, output);
     80                mPipelineKernel->addInternalScalar(sizeTy, prefix + ITEM_COUNT_SUFFIX);
     81        }
     82    }
    8283}
    8384
     
    9192    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    9293        Kernel * const kernel = mPipeline[i];
    93         if (!kernel->hasFamilyName()) {
     94        if (kernel->isStateful() && !kernel->hasFamilyName()) {
    9495            Value * const handle = kernel->createInstance(b);
    9596            b->setScalarField(makeKernelName(i), handle);
     
    100101    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    101102        setActiveKernel(b, i);
    102         args.resize(in_degree(i, mScalarDependencyGraph) + 1);
    103         args[0] = mKernel->getHandle();
     103        const auto hasHandle = mKernel->isStateful() ? 1U : 0U;
     104        args.resize(hasHandle + in_degree(i, mScalarDependencyGraph));
     105        if (LLVM_LIKELY(hasHandle != 0U)) {
     106            args[0] = mKernel->getHandle();
     107        }
    104108        b->setKernel(mPipelineKernel);
    105109        for (const auto ce : make_iterator_range(in_edges(i, mScalarDependencyGraph))) {
    106             const auto j = mScalarDependencyGraph[ce] + 1;
     110            const auto j = hasHandle + mScalarDependencyGraph[ce];
    107111            const auto pe = in_edge(source(ce, mScalarDependencyGraph), mScalarDependencyGraph);
    108112            const auto k = mScalarDependencyGraph[pe];
     
    242246void PipelineCompiler::generateFinalizeMethod(BuilderRef b) {
    243247    printOptionalCycleCounter(b);
     248    std::vector<Value *> params;
    244249    for (unsigned i = mFirstKernel; i < mLastKernel; ++i) {
    245250        setActiveKernel(b, i);
    246251        loadBufferHandles(b);
    247         mScalarDependencyGraph[i] = b->CreateCall(getFinalizeFunction(b), mKernel->getHandle());
     252        params.clear();
     253        if (LLVM_LIKELY(mKernel->isStateful())) {
     254            params.push_back(mKernel->getHandle());
     255        }
     256        mScalarDependencyGraph[i] = b->CreateCall(getFinalizeFunction(b), params);
    248257    }
    249258    releaseBuffers(b);
     
    321330 ** ------------------------------------------------------------------------------------------------------------- */
    322331void PipelineCompiler::acquireCurrentSegment(BuilderRef b) {
    323 
    324     b->setKernel(mPipelineKernel);
    325     const auto prefix = makeKernelName(mKernelIndex);
    326     const auto serialize = codegen::DebugOptionIsSet(codegen::SerializeThreads);
    327     const unsigned waitingOnIdx = serialize ? (mLastKernel - 1) : mKernelIndex;
    328     const auto waitingOn = makeKernelName(waitingOnIdx);
    329     Value * const waitingOnPtr = b->getScalarFieldPtr(waitingOn + LOGICAL_SEGMENT_SUFFIX);
    330     BasicBlock * const kernelWait = b->CreateBasicBlock(prefix + "Wait", mPipelineEnd);
    331     b->CreateBr(kernelWait);
    332 
    333     b->SetInsertPoint(kernelWait);
    334     Value * const processedSegmentCount = b->CreateAtomicLoadAcquire(waitingOnPtr);
    335     assert (processedSegmentCount->getType() == mSegNo->getType());
    336     Value * const ready = b->CreateICmpEQ(mSegNo, processedSegmentCount);
    337     BasicBlock * const kernelStart = b->CreateBasicBlock(prefix + "Start", mPipelineEnd);
    338     b->CreateCondBr(ready, kernelStart, kernelWait);
    339 
    340     b->SetInsertPoint(kernelStart);
    341     b->setKernel(mKernel);
     332    if (LLVM_LIKELY(requiresSynchronization(mKernelIndex))) {
     333
     334        b->setKernel(mPipelineKernel);
     335        const auto prefix = makeKernelName(mKernelIndex);
     336        const auto serialize = codegen::DebugOptionIsSet(codegen::SerializeThreads);
     337        const unsigned waitingOnIdx = serialize ? (mLastKernel - 1) : mKernelIndex;
     338        const auto waitingOn = makeKernelName(waitingOnIdx);
     339        Value * const waitingOnPtr = b->getScalarFieldPtr(waitingOn + LOGICAL_SEGMENT_SUFFIX);
     340        BasicBlock * const kernelWait = b->CreateBasicBlock(prefix + "Wait", mPipelineEnd);
     341        b->CreateBr(kernelWait);
     342
     343        b->SetInsertPoint(kernelWait);
     344        Value * const processedSegmentCount = b->CreateAtomicLoadAcquire(waitingOnPtr);
     345        assert (processedSegmentCount->getType() == mSegNo->getType());
     346        Value * const ready = b->CreateICmpEQ(mSegNo, processedSegmentCount);
     347        BasicBlock * const kernelStart = b->CreateBasicBlock(prefix + "Start", mPipelineEnd);
     348        b->CreateCondBr(ready, kernelStart, kernelWait);
     349
     350        b->SetInsertPoint(kernelStart);
     351        b->setKernel(mKernel);
     352    }
    342353}
    343354
     
    348359 ** ------------------------------------------------------------------------------------------------------------- */
    349360inline void PipelineCompiler::releaseCurrentSegment(BuilderRef b) {
    350     b->setKernel(mPipelineKernel);
    351     Value * const nextSegNo = b->CreateAdd(mSegNo, b->getSize(1));
    352     const auto prefix = makeKernelName(mKernelIndex);
    353     Value * const waitingOnPtr = b->getScalarFieldPtr(prefix + LOGICAL_SEGMENT_SUFFIX);
    354     b->CreateAtomicStoreRelease(nextSegNo, waitingOnPtr);
    355 }
     361    if (LLVM_LIKELY(requiresSynchronization(mKernelIndex))) {
     362        b->setKernel(mPipelineKernel);
     363        Value * const nextSegNo = b->CreateAdd(mSegNo, b->getSize(1));
     364        const auto prefix = makeKernelName(mKernelIndex);
     365        Value * const waitingOnPtr = b->getScalarFieldPtr(prefix + LOGICAL_SEGMENT_SUFFIX);
     366        b->CreateAtomicStoreRelease(nextSegNo, waitingOnPtr);
     367        b->setKernel(mKernel);
     368    }
     369}
     370
     371/** ------------------------------------------------------------------------------------------------------------- *
     372 * @brief requiresSynchronization
     373 ** ------------------------------------------------------------------------------------------------------------- */
     374bool PipelineCompiler::requiresSynchronization(const unsigned /* kernelIndex */) const {
     375    // TODO: Not quite ready yet: we need a function to calculate how many items
     376    // will be processed/produced by the i-th execution of this kernel based
     377    // strictly on the number of items produced by the (i-1)-th and the i-th
     378    // segment for each input to this kernel. Moreover, if we have static buffers,
     379    // we must statically know how many items will be consumed by any segment
     380    // based only only the value of i and/or the above-mentioned information.
     381    return true;
     382#if 0
     383    const Kernel * const kernel = mPipeline[kernelIndex];
     384    if (LLVM_LIKELY(kernel->isStateful())) {
     385        return true;
     386    }
     387    const auto numOfInputs = kernel->getNumOfStreamInputs();
     388    for (unsigned i = 0; i < numOfInputs; i++) {
     389        const Binding & input = kernel->getInputStreamSetBinding(i);
     390        if (!input.getRate().isFixed()) {
     391            return true;
     392        }
     393    }
     394    const auto numOfOutputs = kernel->getNumOfStreamOutputs();
     395    for (unsigned i = 0; i < numOfOutputs; i++) {
     396        const Binding & output = kernel->getOutputStreamSetBinding(i);
     397        if (!output.getRate().isFixed()) {
     398            return true;
     399        }
     400    }
     401    return false;
     402#endif
     403}
     404
    356405
    357406enum : unsigned {
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r6184 r6275  
    197197    mCarryInfo = &mCarryMetadata[0];
    198198    assert (!mCarryInfo->hasSummary());
    199 
    200     mCurrentFrame = b->getScalarFieldPtr("carries");
     199    if (LLVM_LIKELY(mKernel->isStateful())) {
     200        mCurrentFrame = b->getScalarFieldPtr("carries");
     201    } else {
     202        mCurrentFrame = nullptr;
     203    }
    201204    mCurrentFrameIndex = 0;
    202205    mCarryScopes = 0;
     
    211214    mCarrySummaryStack.push_back(Constant::getNullValue(carryTy));
    212215
    213     if (mHasLoop) {       
     216    if (mHasLoop) {
    214217        mLoopSelector = b->getScalarField("selector");
    215218        mNextLoopSelector = b->CreateXor(mLoopSelector, ConstantInt::get(mLoopSelector->getType(), 1));
     
    230233        b->setScalarField("CarryBlockIndex", idx);
    231234    }
    232     assert (mCarryFrameStack.empty());   
     235    assert (mCarryFrameStack.empty());
    233236    assert ("base summary value was deleted!" && mCarrySummaryStack.size() == 1);
    234237    assert ("base summary value was overwritten with non-zero value!" && isa<Constant>(mCarrySummaryStack[0]) && cast<Constant>(mCarrySummaryStack[0])->isNullValue());
     
    748751                if (LLVM_LIKELY(i == summarySize)) {
    749752                    const auto n = bitBlockTy->getVectorNumElements();
    750                     Constant * mask[n];                                       
     753                    Constant * mask[n];
    751754                    const auto m = udiv(summaryBlocks, laneWidth);
    752755                    if (m) {
     
    921924 ** ------------------------------------------------------------------------------------------------------------- */
    922925inline void CarryManager::addToCarryOutSummary(const std::unique_ptr<kernel::KernelBuilder> & b, Value * const value) {
    923     assert ("cannot add null summary value!" && value);   
     926    assert ("cannot add null summary value!" && value);
    924927    assert ("summary stack is empty!" && !mCarrySummaryStack.empty());
    925928    assert (mCarryInfo->hasSummary());
  • icGREP/icgrep-devel/icgrep/toolchain/cpudriver.cpp

    r6243 r6275  
    6767, mEngine(nullptr)
    6868#endif
     69, mPassManager(nullptr)
    6970, mUnoptimizedIROutputStream(nullptr)
    7071, mIROutputStream(nullptr)
     
    163164}
    164165
    165 inline legacy::PassManager CPUDriver::preparePassManager() {
    166 
    167     legacy::PassManager PM;
     166inline void CPUDriver::preparePassManager() {
     167
     168    if (mPassManager) return;
     169
     170    mPassManager = make_unique<legacy::PassManager>();
    168171
    169172    PassRegistry * Registry = PassRegistry::getPassRegistry();
     
    181184            }
    182185        }
    183         PM.add(createPrintModulePass(*mUnoptimizedIROutputStream));
     186        mPassManager->add(createPrintModulePass(*mUnoptimizedIROutputStream));
    184187    }
    185188    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
    186         PM.add(createVerifierPass());
    187     }
    188     PM.add(createDeadCodeEliminationPass());        // Eliminate any trivially dead code
    189     PM.add(createPromoteMemoryToRegisterPass());    // Promote stack variables to constants or PHI nodes
    190     PM.add(createCFGSimplificationPass());          // Remove dead basic blocks and unnecessary branch statements / phi nodes
    191     PM.add(createEarlyCSEPass());                   // Simple common subexpression elimination pass
    192     PM.add(createInstructionCombiningPass());       // Simple peephole optimizations and bit-twiddling.
    193     PM.add(createReassociatePass());                // Canonicalizes commutative expressions
    194     PM.add(createGVNPass());                        // Global value numbering redundant expression elimination pass
    195     PM.add(createCFGSimplificationPass());          // Repeat CFG Simplification to "clean up" any newly found redundant phi nodes
     189        mPassManager->add(createVerifierPass());
     190    }
     191    mPassManager->add(createDeadCodeEliminationPass());        // Eliminate any trivially dead code
     192    mPassManager->add(createPromoteMemoryToRegisterPass());    // Promote stack variables to constants or PHI nodes
     193    mPassManager->add(createCFGSimplificationPass());          // Remove dead basic blocks and unnecessary branch statements / phi nodes
     194    mPassManager->add(createEarlyCSEPass());                   // Simple common subexpression elimination pass
     195    mPassManager->add(createInstructionCombiningPass());       // Simple peephole optimizations and bit-twiddling.
     196    mPassManager->add(createReassociatePass());                // Canonicalizes commutative expressions
     197    mPassManager->add(createGVNPass());                        // Global value numbering redundant expression elimination pass
     198    mPassManager->add(createCFGSimplificationPass());          // Repeat CFG Simplification to "clean up" any newly found redundant phi nodes
    196199    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    197         PM.add(createRemoveRedundantAssertionsPass());
    198         PM.add(createDeadCodeEliminationPass());
    199         PM.add(createCFGSimplificationPass());
     200        mPassManager->add(createRemoveRedundantAssertionsPass());
     201        mPassManager->add(createDeadCodeEliminationPass());
     202        mPassManager->add(createCFGSimplificationPass());
    200203    }
    201204    if (LLVM_UNLIKELY(!codegen::TraceOption.empty())) {
    202         PM.add(createTracePass(iBuilder.get(), codegen::TraceOption));
     205        mPassManager->add(createTracePass(iBuilder.get(), codegen::TraceOption));
    203206    }
    204207    if (LLVM_UNLIKELY(codegen::ShowIROption != codegen::OmittedOption)) {
     
    211214            }
    212215        }
    213         PM.add(createPrintModulePass(*mIROutputStream));
     216        mPassManager->add(createPrintModulePass(*mIROutputStream));
    214217    }
    215218    #if LLVM_VERSION_INTEGER >= LLVM_VERSION_CODE(3, 7, 0)
     
    221224            mASMOutputStream = make_unique<raw_fd_ostream>(STDERR_FILENO, false, true);
    222225        }
    223         if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *mASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
     226        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(*mPassManager, *mASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
    224227            report_fatal_error("LLVM error: could not add emit assembly pass");
    225228        }
    226229    }
    227230    #endif
    228 
    229     return PM;
    230231}
    231232
    232233void CPUDriver::generateUncachedKernels() {
    233234    if (mUncachedKernel.empty()) return;
    234     auto PM = preparePassManager();
     235    preparePassManager();
    235236    for (auto & kernel : mUncachedKernel) {
    236237        kernel->prepareKernel(iBuilder);
     
    241242        Module * const module = kernel->getModule(); assert (module);
    242243        module->setTargetTriple(mMainModule->getTargetTriple());
    243         PM.run(*module);
     244        mPassManager->run(*module);
    244245        mCachedKernel.emplace_back(kernel.release());
    245246    }
     
    325326    static char ID;
    326327    TracePass(kernel::KernelBuilder * kb, StringRef to_trace) : ModulePass(ID), iBuilder(kb), mToTrace(to_trace) { }
    327    
     328
    328329    bool addTraceStmt(BasicBlock * BB, BasicBlock::iterator to_trace, BasicBlock::iterator insert_pt) {
    329330        bool modified = false;
     
    342343        return modified;
    343344    }
    344    
     345
    345346    virtual bool runOnModule(Module &M) override;
    346347private:
     
    385386    return new TracePass(iBuilder.get(), to_trace);
    386387}
    387                    
     388
  • icGREP/icgrep-devel/icgrep/toolchain/cpudriver.h

    r6220 r6275  
    4747
    4848    bool hasExternalFunction(const llvm::StringRef functionName) const override;
    49    
     49
    5050    llvm::ModulePass * createTracePass(kernel::KernelBuilder * kb, llvm::StringRef to_trace);
    5151
     
    5555    std::string getMangledName(std::string s);
    5656
    57     llvm::legacy::PassManager preparePassManager();
     57    void preparePassManager();
    5858
    5959    llvm::Function * addLinkFunction(llvm::Module * mod, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const override;
     
    6767    llvm::ExecutionEngine *                                 mEngine;
    6868    #endif
     69    std::unique_ptr<llvm::legacy::PassManager>              mPassManager;
    6970    std::unique_ptr<llvm::raw_fd_ostream>                   mUnoptimizedIROutputStream;
    7071    std::unique_ptr<llvm::raw_fd_ostream>                   mIROutputStream;
Note: See TracChangeset for help on using the changeset viewer.