Changeset 5639


Ignore:
Timestamp:
Sep 15, 2017, 8:08:01 AM (21 months ago)
Author:
cameron
Message:

Fixes for multiblock kernel builder

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5638 r5639  
    921921    Value * processedItemCount[inputSetCount];
    922922    Value * inputBlockPtr[inputSetCount];
    923     std::vector<Value *> producedItemCount;
    924     std::vector<Value *> outputBlockPtr;
    925 
    926     //  Now determine the linearly available blocks, based on blocks remaining reduced
    927     //  by limitations of linearly available input buffer space.
     923    Value * linearlyAvailItems[inputSetCount];
     924
    928925    Value * linearlyAvailStrides = stridesRemaining;
    929926    for (unsigned i = 0; i < inputSetCount; i++) {
    930         Value * p = kb->getProcessedItemCount(mStreamSetInputs[i].name);
    931         Value * blkNo = kb->CreateUDiv(p, blockSize);
    932         Value * b = kb->getInputStreamBlockPtr(mStreamSetInputs[i].name, kb->getInt32(0));
    933         // processedItemCount.push_back(p);
    934         processedItemCount[i] = p;
    935         // inputBlockPtr.push_back(b);
    936         inputBlockPtr[i] = b;
     927        processedItemCount[i] = kb->getProcessedItemCount(mStreamSetInputs[i].name);
     928        inputBlockPtr[i] = kb->getInputStreamBlockPtr(mStreamSetInputs[i].name, kb->getInt32(0));
     929        Value * avail = kb->CreateSub(mAvailableItemCount[i], processedItemCount[i]);
     930        Value * linearlyAvail = kb->getLinearlyAccessibleItems(mStreamSetInputs[i].name, processedItemCount[i]);
     931        linearlyAvailItems[i] = kb->CreateSelect(kb->CreateICmpULT(avail, linearlyAvail), avail, linearlyAvail);
    937932        auto & rate = mStreamSetInputs[i].rate;
    938933        if (rate.isUnknownRate()) continue;  // No calculation possible for unknown rates.
    939         Value * linearlyAvailItems = kb->CreateMul(kb->getLinearlyAccessibleBlocks(mStreamSetInputs[i].name, blkNo), blockSize);
    940         Value * maxReferenceItems = rate.CreateMaxReferenceItemsCalculation(kb.get(), linearlyAvailItems);
     934        linearlyAvailItems[i] = kb->getLinearlyAccessibleItems(mStreamSetInputs[i].name, processedItemCount[i]);
     935        Value * maxReferenceItems = rate.CreateMaxReferenceItemsCalculation(kb.get(), linearlyAvailItems[i]);
    941936        Value * maxStrides = kb->CreateUDiv(maxReferenceItems, strideSize);
    942937        linearlyAvailStrides = kb->CreateSelect(kb->CreateICmpULT(maxStrides, linearlyAvailStrides), maxStrides, linearlyAvailStrides);
    943938    }
    944939
     940    Value * producedItemCount[outputSetCount];
     941    Value * outputBlockPtr[outputSetCount];
    945942    //  Now determine the linearly writeable blocks, based on available blocks reduced
    946943    //  by limitations of output buffer space.
    947944    Value * linearlyWritableStrides = linearlyAvailStrides;
    948945    for (unsigned i = 0; i < outputSetCount; i++) {
    949         Value * p = kb->getProducedItemCount(mStreamSetOutputs[i].name);
    950         Value * blkNo = kb->CreateUDiv(p, blockSize);
    951         Value * b = kb->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, kb->getInt32(0));
    952         producedItemCount.push_back(p);
    953         outputBlockPtr.push_back(b);
     946        producedItemCount[i] = kb->getProducedItemCount(mStreamSetOutputs[i].name);
     947        outputBlockPtr[i] = kb->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, kb->getInt32(0));
     948       
    954949        auto & rate = mStreamSetOutputs[i].rate;
    955950        if (rate.isUnknownRate()) continue;  // No calculation possible for unknown rates.
    956         Value * writableItems = kb->CreateMul(kb->getLinearlyWritableBlocks(mStreamSetOutputs[i].name, blkNo), blockSize);
     951        Value * writableItems = kb->getLinearlyWritableItems(mStreamSetOutputs[i].name, producedItemCount[i]);
    957952        Value * maxReferenceItems = rate.CreateMaxReferenceItemsCalculation(kb.get(), writableItems);
    958953        Value * maxStrides = kb->CreateUDiv(maxReferenceItems, strideSize);
     
    966961    kb->SetInsertPoint(doMultiBlockCall);
    967962
    968     Value * linearlyAvailItems = kb->CreateMul(linearlyWritableStrides, strideSize);
     963    Value * principalItemsToDo = kb->CreateMul(linearlyWritableStrides, strideSize);
    969964
    970965    std::vector<Value *> doMultiBlockArgs;
    971966    doMultiBlockArgs.push_back(getInstance());
    972     doMultiBlockArgs.push_back(linearlyAvailItems);
     967    doMultiBlockArgs.push_back(principalItemsToDo);
    973968    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
    974969        if (!mIsDerived[i]) {
    975             Value * avail = kb->CreateSub(mAvailableItemCount[i], processedItemCount[i]);
    976             Value * linearlyAvail = kb->getLinearlyAccessibleItems(mStreamSetInputs[i].name, processedItemCount[i]);
    977             doMultiBlockArgs.push_back(kb->CreateSelect(kb->CreateICmpULT(avail, linearlyAvail), avail, linearlyAvail));
     970            doMultiBlockArgs.push_back(linearlyAvailItems[i]);
    978971        }
    979972    }
     
    10311024        Value * reducedStridesToDo = kb->CreateSub(stridesRemaining, linearlyWritableStrides);
    10321025        stridesRemaining->addIncoming(reducedStridesToDo, kb->GetInsertBlock());
    1033         Value * nowProcessed = kb->CreateAdd(processedItemCount[0], linearlyAvailItems);
     1026        Value * nowProcessed = kb->CreateAdd(processedItemCount[0], principalItemsToDo);
    10341027        kb->setProcessedItemCount(mStreamSetInputs[0].name, nowProcessed);
    10351028        kb->CreateBr(doSegmentOuterLoop);
  • icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.cpp

    r5638 r5639  
    4040    // Get pointer to start of the output StreamSetBlock we're currently writing to
    4141    Value * outputStreamPtr = &*(args);
    42    
     42
    4343    Constant * blockWidth = kb->getSize(kb->getBitBlockWidth());
    4444    Value * blocksToDo = kb->CreateUDivCeil(itemsToDo, blockWidth); // 1 if this is the final block
     
    134134    Value * itemsDone = kb->CreateMul(blockOffsetPhi, blockWidth);
    135135    itemsDone = kb->CreateSelect(kb->CreateICmpULT(itemsToDo, itemsDone), itemsToDo, itemsDone);
    136     kb->setProcessedItemCount("PDEPmarkerStream", kb->CreateAdd(itemsDone, kb->getProcessedItemCount("PDEPmarkerStream")));
     136    kb->setProcessedItemCount("PDEPmarkerStream", kb->CreateAdd(itemsDone, kb->getProcessedItemCount("PDEPmarkerStream")));   
    137137    kb->setProcessedItemCount("sourceStreamSet", updatedProcessedBitsPhi);   
    138138}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5620 r5639  
    141141
    142142Value * StreamSetBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition, bool reverse) const {
    143     if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
    144         Constant * stride = iBuilder->getSize(iBuilder->getStride());
    145         Value * strideRem = iBuilder->CreateURem(fromPosition, stride);
    146         if (reverse) {
    147             return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(strideRem, iBuilder->getSize(0)), stride, strideRem);
    148         }
    149         else return iBuilder->CreateSub(stride, strideRem);
    150     } else {
    151         Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
    152         Value * bufRem = iBuilder->CreateURem(fromPosition, bufSize);
    153         if (reverse) {
    154             return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufSize, bufRem);
    155         }
    156         else return iBuilder->CreateSub(bufSize, bufRem, "linearItems");
    157     }
     143    Constant * bufSize = iBuilder->getSize(mBufferBlocks * iBuilder->getStride());
     144    Value * bufRem = iBuilder->CreateURem(fromPosition, bufSize);
     145    if (reverse) {
     146        return iBuilder->CreateSelect(iBuilder->CreateICmpEQ(bufRem, iBuilder->getSize(0)), bufSize, bufRem);
     147    }
     148    else return iBuilder->CreateSub(bufSize, bufRem, "linearItems");
    158149}
    159150
     
    663654Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, bool reverse) const {
    664655    Constant * blockSize = b->getSize(b->getBitBlockWidth());
    665     if (isa<ArrayType>(mType) && dyn_cast<ArrayType>(mType)->getNumElements() > 1) {
    666         Value * blockRem = b->CreateURem(fromPosition, blockSize);
    667         if (reverse) {
    668             return b->CreateSelect(b->CreateICmpEQ(blockRem, b->getSize(0)), blockSize, blockRem);
    669         }
    670         else return b->CreateSub(blockSize, blockRem);
    671     } else {
    672         Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
    673         Value * bufSize = b->CreateMul(bufBlocks, blockSize);
    674         Value * bufRem = b->CreateURem(fromPosition, bufSize);
    675         if (reverse) {
    676             return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
    677         }
    678         else return b->CreateSub(bufSize, bufRem, "linearItems");
    679     }
     656    Value * const bufBlocks = b->CreateLoad(b->CreateGEP(handle, {b->getInt32(0), b->getInt32(int(Field::WorkingBlocks))}));
     657    Value * bufSize = b->CreateMul(bufBlocks, blockSize);
     658    Value * bufRem = b->CreateURem(fromPosition, bufSize);
     659    if (reverse) {
     660        return b->CreateSelect(b->CreateICmpEQ(bufRem, b->getSize(0)), bufSize, bufRem);
     661    }
     662    else return b->CreateSub(bufSize, bufRem, "linearItems");
     663}
     664
     665Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * handle, Value * fromPosition, bool reverse) const {
     666    Value * accessibleItems = getLinearlyAccessibleItems(b, handle, fromPosition, reverse);
     667    if (reverse || (mOverflowBlocks == 0))  return accessibleItems;
     668    return b->CreateAdd(accessibleItems, b->getSize(mOverflowBlocks * b->getBitBlockWidth()));
    680669}
    681670
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5618 r5639  
    314314    llvm::Value * getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * fromBlock, bool reverse = false) const override;
    315315
     316    virtual llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * fromPosition, bool reverse = false) const;
     317   
    316318    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    317319
Note: See TracChangeset for help on using the changeset viewer.