Ignore:
Timestamp:
Jun 21, 2017, 1:23:59 PM (2 years ago)
Author:
cameron
Message:

Fixes for processing rates; multiblock kernel builder

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5454 r5522  
    1212
    1313namespace IDISA { class IDISA_Builder; }
     14namespace kernel { class Kernel; }
    1415namespace kernel { class KernelBuilder; }
    1516
     
    3334
    3435struct ProcessingRate  {
     36    friend class kernel::Kernel;
    3537    enum class ProcessingRateKind : uint8_t { FixedRatio, RoundUp, Add1, MaxRatio, Unknown };
    3638    ProcessingRateKind getKind() const {return mKind;}
     
    5456    ProcessingRate(ProcessingRateKind k, unsigned numerator, unsigned denominator, std::string && referenceStreamSet)
    5557    : mKind(k), mRatioNumerator(numerator), mRatioDenominator(denominator), mReferenceStreamSet(referenceStreamSet) {}
     58    void setReferenceStreamSet(const std::string & s) {mReferenceStreamSet = s;}
    5659private:
    5760    const ProcessingRateKind mKind;
    5861    const uint16_t mRatioNumerator;
    5962    const uint16_t mRatioDenominator;
    60     const std::string mReferenceStreamSet;
     63    std::string mReferenceStreamSet;
    6164};
    6265
     
    7275    llvm::Type * const        type;
    7376    const std::string         name;
    74     const ProcessingRate      rate;
     77    ProcessingRate      rate;
    7578};
    7679
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5507 r5522  
    1818#include <sstream>
    1919#include <kernels/kernel_builder.h>
     20#include <llvm/Support/Debug.h>
    2021
    2122using namespace llvm;
     
    141142
    142143    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     144        // Default reference stream set is the principal input stream set.
     145        if (mStreamSetInputs[i].rate.referenceStreamSet() == "") {
     146            mStreamSetInputs[i].rate.setReferenceStreamSet(mStreamSetInputs[0].name);
     147        }
    143148        if ((mStreamSetInputBuffers[i]->getBufferBlocks() != 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < requiredBlocks)) {
    144149            report_fatal_error(getName() + ": " + mStreamSetInputs[i].name + " requires buffer size " + std::to_string(requiredBlocks));
     
    152157    IntegerType * const sizeTy = idb->getSizeTy();
    153158    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     159        // Default reference stream set is the principal input stream set for the principal output stream set.
     160        // Default reference stream set is the principal output stream set for other output stream sets.
     161        if (mStreamSetOutputs[i].rate.referenceStreamSet() == "") {
     162            if ((mStreamSetInputs.size() > 0) && (i == 0)) {
     163                mStreamSetOutputs[i].rate.setReferenceStreamSet(mStreamSetInputs[0].name);
     164            }
     165            else {
     166                mStreamSetOutputs[i].rate.setReferenceStreamSet(mStreamSetOutputs[0].name);
     167            }
     168
     169        }
    154170        mScalarInputs.emplace_back(mStreamSetOutputBuffers[i]->getPointerType(), mStreamSetOutputs[i].name + BUFFER_PTR_SUFFIX);
    155171        if ((mStreamSetInputs.empty() && (i == 0)) || !mStreamSetOutputs[i].rate.isExact()) {
     
    663679    itemsPerStride[0] = mStride;
    664680    isDerived[0] = true;
    665    
    666681    for (unsigned i = 1; i < inputSetCount; i++) {
    667682        auto & rate = mStreamSetInputs[i].rate;
    668683        std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
    669684        if (rate.isExact()) {
    670             if (refSet.empty()) {
    671                 itemsPerStride[i] = rate.calculateRatio(itemsPerStride[0]);
     685            Port port; unsigned ssIdx;
     686            std::tie(port, ssIdx) = getStreamPort(refSet);
     687            assert (port == Port::Input && ssIdx < i);
     688            if ((ssIdx == 0) || isDerived[ssIdx]) {
     689                itemsPerStride[i] = rate.calculateRatio(itemsPerStride[ssIdx]);
    672690                isDerived[i] = true;
    673691                continue;
    674692            }
    675             else {
    676                 Port port; unsigned ssIdx;
    677                 std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
    678                 assert (port == Port::Input && ssIdx < i);
    679                 if (isDerived[ssIdx]) {
    680                     itemsPerStride[i] = rate.calculateRatio(itemsPerStride[ssIdx]);
    681                     isDerived[i] = true;
    682                     continue;
    683                 }
    684             }
    685693        }
    686694        isDerived[i] = false;
    687695    }
    688    
    689696    for (auto & ss : mStreamSetOutputs) {
    690697        unsigned i = inputSetCount;
     
    692699        std::string refSet = rate.referenceStreamSet();
    693700        if (rate.isExact() || rate.isMaxRatio()) {
    694             if (refSet.empty()) {
    695                 itemsPerStride[i] = rate.calculateRatio(bitBlockWidth);
     701            Port port; unsigned ssIdx;
     702            std::tie(port, ssIdx) = getStreamPort(refSet);
     703            if (port == Port::Output) ssIdx += inputSetCount;
     704            if ((ssIdx == 0) || isDerived[ssIdx]) {
     705                itemsPerStride[i] = rate.calculateRatio(itemsPerStride[ssIdx]);
    696706                isDerived[i] = rate.isExact();
    697707                continue;
    698708            }
    699             else {
    700                 Port port; unsigned ssIdx;
    701                 std::tie(port, ssIdx) = getStreamPort(mStreamSetOutputs[i].name);
    702                 if (port == Port::Output) ssIdx += inputSetCount;
    703                 if (isDerived[ssIdx]) {
    704                     itemsPerStride[i] = rate.calculateRatio(itemsPerStride[ssIdx]);
    705                     isDerived[i] = rate.isExact();
    706                     continue;
    707                 }
    708             }
    709709        }
    710710        isDerived[i] = false;
     
    712712    }
    713713    int maxBlocksToCopy[totalSetCount];
    714    
    715714    for (unsigned i = 0; i < totalSetCount; i++) {
    716715        if (isDerived[i]) {
     
    730729    }
    731730    auto ip = kb->saveIP();
    732    
    733731    Function * const cp = mCurrentMethod;
    734732    const auto saveInstance = getInstance();
     
    793791
    794792    Value * blockBaseMask = kb->CreateNot(kb->getSize(kb->getBitBlockWidth() - 1));
    795 
    796793    //
    797794    // Define and allocate the temporary buffer area.
     
    817814
    818815    //  Make sure that corresponding data is available depending on processing rate
    819     //  for all input stream sets.
    820 
     816    //  for all derived input stream sets.
    821817    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
    822818        Value * a = mAvailableItemCount[i];
    823819        auto & rate = mStreamSetInputs[i].rate;
    824         assert (((rate.referenceStreamSet().empty()) || (rate.referenceStreamSet() == mStreamSetInputs[0].name)) && "Multiblock kernel input rate not with respect to principal stream.");
    825         Value * maxItems = rate.CreateMaxReferenceItemsCalculation(kb.get(), a);
    826         itemsAvail = kb->CreateSelect(kb->CreateICmpULT(itemsAvail, maxItems), itemsAvail, maxItems);
     820        if (isDerived[i]) {
     821            Value * maxItems = rate.CreateMaxReferenceItemsCalculation(kb.get(), a);
     822            itemsAvail = kb->CreateSelect(kb->CreateICmpULT(itemsAvail, maxItems), itemsAvail, maxItems);
     823        }
    827824    }
    828825
     
    857854
    858855    Value * linearlyAvailStrides = stridesRemaining;
    859 
    860856    for (unsigned i = 0; i < inputSetCount; i++) {
    861857        Value * p = kb->getProcessedItemCount(mStreamSetInputs[i].name);
     
    867863            auto & rate = mStreamSetInputs[i].rate;
    868864            Value * maxReferenceItems = nullptr;
    869             if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator()) && (rate.referenceStreamSet() == "")) {
     865            if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator())) {
    870866                maxReferenceItems = kb->CreateMul(kb->getLinearlyAccessibleBlocks(mStreamSetInputs[i].name, blkNo), blockSize);
    871867
     
    881877    //  by limitations of output buffer space.
    882878    Value * linearlyWritableStrides = linearlyAvailStrides;
    883 
    884879    for (unsigned i = 0; i < outputSetCount; i++) {
    885880        Value * p = kb->getProducedItemCount(mStreamSetOutputs[i].name);
     
    901896        }
    902897    }
    903 
    904898    Value * haveStrides = kb->CreateICmpUGT(linearlyWritableStrides, kb->getSize(0));
    905899    kb->CreateCondBr(haveStrides, doMultiBlockCall, tempBlockCheck);
     
    999993    tempArgs.push_back(getInstance());
    1000994    tempArgs.push_back(tempBlockItems);
    1001    
    1002995    // For non-derived inputs, add the available items.
    1003996    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     
    10071000        }
    10081001    }
    1009 
    10101002    // Prepare the temporary buffer area.
    10111003    //
     
    10151007    // For each input and output buffer, copy over necessary data starting from the last
    10161008    // block boundary.
    1017     std::vector<Value *> finalItemCountNeeded;
    1018     finalItemCountNeeded.push_back(kb->CreateAdd(processedItemCount[0], tempBlockItems));
     1009    Value * finalItemCountNeeded[inputSetCount];
     1010    finalItemCountNeeded[0] = kb->CreateAdd(processedItemCount[0], tempBlockItems);
    10191011
    10201012    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); i++) {
    1021         Value * tempBufPtr = kb->CreateGEP(tempParameterArea, {kb->getInt32(0), kb->getInt32(i)});
    10221013        Type * bufPtrType = mStreamSetInputBuffers[i]->getPointerType();
    1023         tempBufPtr = kb->CreatePointerCast(tempBufPtr, bufPtrType);
    1024         ConstantInt * strideItems = kb->getSize(itemsPerStride[i]);
    1025         Value * strideBasePos = kb->CreateSub(processedItemCount[i], kb->CreateURem(processedItemCount[i], strideItems));
    1026         Value * blockBasePos = (itemsPerStride[i] % bitBlockWidth == 0) ? strideBasePos : kb->CreateAnd(strideBasePos, blockBaseMask);
    1027 
    1028         // The number of items to copy is determined by the processing rate requirements.
    1029         if (i > 1) {
    1030             auto & rate = mStreamSetInputs[i].rate;
    1031             std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
    1032             if (refSet.empty()) {
    1033                 finalItemCountNeeded.push_back(rate.CreateRatioCalculation(kb.get(), finalItemCountNeeded[0], doFinal));
    1034             }
    1035             else if (isDerived[i]) {
     1014        if (isDerived[i]) {
     1015            Value * tempBufPtr = kb->CreateGEP(tempParameterArea, {kb->getInt32(0), kb->getInt32(i)});
     1016            tempBufPtr = kb->CreatePointerCast(tempBufPtr, bufPtrType);
     1017            ConstantInt * strideItems = kb->getSize(itemsPerStride[i]);
     1018            Value * strideBasePos = kb->CreateSub(processedItemCount[i], kb->CreateURem(processedItemCount[i], strideItems));
     1019            Value * blockBasePos = (itemsPerStride[i] % bitBlockWidth == 0) ? strideBasePos : kb->CreateAnd(strideBasePos, blockBaseMask);
     1020
     1021            // The number of items to copy is determined by the processing rate requirements.
     1022            if (i > 1) {
     1023                auto & rate = mStreamSetInputs[i].rate;
     1024                std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
    10361025                Port port; unsigned ssIdx;
    10371026                std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
    1038                 assert (port == Port::Input && ssIdx < i);
    1039                 finalItemCountNeeded.push_back(rate.CreateRatioCalculation(kb.get(), finalItemCountNeeded[ssIdx], doFinal));
     1027                finalItemCountNeeded[i] = rate.CreateRatioCalculation(kb.get(), finalItemCountNeeded[ssIdx], doFinal);
     1028            }
     1029           
     1030            Value * inputPtr = kb->CreatePointerCast(kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), blockBasePos), bufPtrType);
     1031           
     1032            if (maxBlocksToCopy[i] == 1) {
     1033                // copy one block
     1034                mStreamSetInputBuffers[i]->createBlockCopy(kb.get(), tempBufPtr, inputPtr, kb->getSize(1));
    10401035            }
    10411036            else {
    1042                 // Ensure that there is up to a full stride of items, if available.
    1043                 Value * avail = kb->CreateSub(mAvailableItemCount[i], processedItemCount[i]);
    1044                 finalItemCountNeeded.push_back(kb->CreateSelect(kb->CreateICmpULT(avail, strideItems), avail, strideItems));
     1037                Value * neededItems = kb->CreateSub(finalItemCountNeeded[i], blockBasePos);
     1038                Value * availFromBase = kb->getLinearlyAccessibleItems(mStreamSetInputs[i].name, blockBasePos);
     1039                Value * allAvail = kb->CreateICmpULE(neededItems, availFromBase);
     1040                Value * copyItems1 = kb->CreateSelect(allAvail, neededItems, availFromBase);
     1041                mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, inputPtr, copyItems1);
     1042                BasicBlock * copyRemaining = kb->CreateBasicBlock("copyRemaining");
     1043                BasicBlock * copyDone = kb->CreateBasicBlock("copyDone");
     1044                kb->CreateCondBr(allAvail, copyDone, copyRemaining);
     1045                kb->SetInsertPoint(copyRemaining);
     1046                Value * copyItems2 = kb->CreateSub(neededItems, copyItems1);
     1047                Value * nextBasePos = kb->CreateAdd(blockBasePos, copyItems1);
     1048                Value * nextInputPtr = kb->CreatePointerCast(kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), nextBasePos), bufPtrType);
     1049                Value * nextBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
     1050                mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), nextBufPtr, nextInputPtr, copyItems2);
     1051                kb->CreateBr(copyDone);
     1052                kb->SetInsertPoint(copyDone);
    10451053            }
    1046         }
    1047        
    1048         Value * inputPtr = kb->CreatePointerCast(kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), blockBasePos), bufPtrType);
    1049        
    1050         if (maxBlocksToCopy[i] == 1) {
    1051             // copy one block
    1052             mStreamSetInputBuffers[i]->createBlockCopy(kb.get(), tempBufPtr, inputPtr, kb->getSize(1));
     1054            Value * itemAddress = kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), processedItemCount[i]);
     1055            itemAddress = kb->CreatePtrToInt(itemAddress, intAddrTy);
     1056            Value * baseAddress = inputBlockPtr[i];
     1057            baseAddress = kb->CreatePtrToInt(baseAddress, intAddrTy);
     1058            Value * tempAddress = kb->CreateAdd(kb->CreatePtrToInt(tempBufPtr, intAddrTy), kb->CreateSub(itemAddress, baseAddress));
     1059            tempArgs.push_back(kb->CreateIntToPtr(tempAddress, bufPtrType));
    10531060        }
    10541061        else {
    1055             Value * neededItems = kb->CreateSub(finalItemCountNeeded[i], blockBasePos);
    1056             Value * availFromBase = kb->getLinearlyAccessibleItems(mStreamSetInputs[i].name, blockBasePos);
    1057             Value * allAvail = kb->CreateICmpULE(neededItems, availFromBase);
    1058             Value * copyItems1 = kb->CreateSelect(allAvail, neededItems, availFromBase);
    1059             mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, inputPtr, copyItems1);
    1060             BasicBlock * copyRemaining = kb->CreateBasicBlock("copyRemaining");
    1061             BasicBlock * copyDone = kb->CreateBasicBlock("copyDone");
    1062             kb->CreateCondBr(allAvail, copyDone, copyRemaining);
    1063             kb->SetInsertPoint(copyRemaining);
    1064             Value * copyItems2 = kb->CreateSub(neededItems, copyItems1);
    1065             Value * nextBasePos = kb->CreateAdd(blockBasePos, copyItems1);
    1066             Value * nextInputPtr = kb->CreatePointerCast(kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), nextBasePos), bufPtrType);
    1067             Value * nextBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
    1068             mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), nextBufPtr, nextInputPtr, copyItems2);
    1069             kb->CreateBr(copyDone);
    1070             kb->SetInsertPoint(copyDone);
    1071         }
    1072         Value * itemAddress = kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), processedItemCount[i]);
    1073         itemAddress = kb->CreatePtrToInt(itemAddress, intAddrTy);
    1074         Value * baseAddress = inputBlockPtr[i];
    1075         baseAddress = kb->CreatePtrToInt(baseAddress, intAddrTy);
    1076         Value * tempAddress = kb->CreateAdd(kb->CreatePtrToInt(tempBufPtr, intAddrTy), kb->CreateSub(itemAddress, baseAddress));
    1077         tempArgs.push_back(kb->CreateIntToPtr(tempAddress, bufPtrType));
    1078     }
    1079 
     1062            Value * bufPtr = kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), processedItemCount[i]);
     1063            bufPtr = kb->CreatePointerCast(bufPtr, mStreamSetInputBuffers[i]->getPointerType());
     1064            tempArgs.push_back(bufPtr);           
     1065        }
     1066    }
    10801067    Value * outputBasePos[outputSetCount];
    10811068    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5501 r5522  
    5454    std::tie(port, index) = mKernel->getStreamPort(name);
    5555    assert (port == Kernel::Port::Output);
    56     const auto rate = mKernel->getStreamOutput(index).rate;
    57     if (rate.isExact()) {
    58         const auto & refSet = rate.referenceStreamSet();
    59         std::string principalField;
    60         if (refSet.empty()) {
    61             if (mKernel->getStreamInputs().empty()) {
    62                 principalField = mKernel->getStreamOutput(0).name + Kernel::PRODUCED_ITEM_COUNT_SUFFIX;
    63             } else {
    64                 principalField = mKernel->getStreamInput(0).name + Kernel::PROCESSED_ITEM_COUNT_SUFFIX;
    65             }
     56    const auto & rate = mKernel->getStreamOutput(index).rate;
     57    const auto & refSet = rate.referenceStreamSet();
     58    if ((refSet != name) && rate.isExact()) {
     59        Value * principalCount;
     60        std::tie(port, index) = mKernel->getStreamPort(refSet);
     61        if (port == Kernel::Port::Input) {
     62            principalCount = getProcessedItemCount(refSet);
    6663        } else {
    67             std::tie(port, index) = mKernel->getStreamPort(refSet);
    68             if (port == Kernel::Port::Input) {
    69                principalField = refSet + Kernel::PROCESSED_ITEM_COUNT_SUFFIX;
    70             } else {
    71                principalField = refSet + Kernel::PRODUCED_ITEM_COUNT_SUFFIX;
    72             }
    73         }
    74         Value * const principleCount = getScalarField(principalField);
    75         return rate.CreateRatioCalculation(this, principleCount, doFinal);
     64            principalCount = getProducedItemCount(refSet);
     65        }
     66        return rate.CreateRatioCalculation(this, principalCount, doFinal);
    7667    }
    7768    return getScalarField(name + Kernel::PRODUCED_ITEM_COUNT_SUFFIX);
     
    8374    assert (port == Kernel::Port::Input);
    8475    const auto & rate = mKernel->getStreamInput(index).rate;
    85     if (rate.isExact()) {
    86         std::string refSet = rate.referenceStreamSet();
    87         if (refSet.empty()) {
    88             refSet = mKernel->getStreamInput(0).name;
    89         }
    90         Value * const principleCount = getScalarField(refSet + Kernel::PROCESSED_ITEM_COUNT_SUFFIX);
    91         return rate.CreateRatioCalculation(this, principleCount);
     76    const auto & refSet = rate.referenceStreamSet();
     77    if ((refSet != name) && rate.isExact()) {
     78        Value * const principalCount = getProcessedItemCount(refSet);
     79        return rate.CreateRatioCalculation(this, principalCount);
    9280    }
    9381    return getScalarField(name + Kernel::PROCESSED_ITEM_COUNT_SUFFIX);
Note: See TracChangeset for help on using the changeset viewer.