Changeset 5479 for icGREP


Ignore:
Timestamp:
May 27, 2017, 10:36:18 AM (2 years ago)
Author:
cameron
Message:

Multiblock kernels use whole block copy to/from temp buffers; allow input stream sets with variable rates

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5464 r5479  
    645645void MultiBlockKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) {
    646646
     647    // Stream set and buffer analysis.  When near the end of buffers
     648    // or for final block processing, data for each streamset may need
     649    // to be copied into temporary buffers to ensure linear access.
     650    // Data is always copied as a number of whole blocks, dependent
     651    // on the stream set processing rate.
     652   
     653    const unsigned bitBlockWidth = kb->getBitBlockWidth();
     654    const unsigned inputSetCount = mStreamSetInputs.size();
     655    const unsigned outputSetCount = mStreamSetOutputs.size();
     656    const unsigned totalSetCount = inputSetCount + outputSetCount;
     657    bool isDerived[totalSetCount];
     658    int itemsPerPrincipalBlock[totalSetCount];
     659   
     660    for (unsigned i = 0; i < inputSetCount; i++) {
     661        auto & rate = mStreamSetInputs[i].rate;
     662        std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
     663        if (rate.isExact()) {
     664            if (refSet.empty()) {
     665                itemsPerPrincipalBlock[i] = rate.calculateRatio(bitBlockWidth);
     666                isDerived[i] = true;
     667                continue;
     668            }
     669            else {
     670                Port port; unsigned ssIdx;
     671                std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
     672                assert (port == Port::Input && ssIdx < i);
     673                if (isDerived[ssIdx]) {
     674                    itemsPerPrincipalBlock[i] = rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]);
     675                    isDerived[i] = true;
     676                    continue;
     677                }
     678            }
     679        }
     680        isDerived[i] = false;
     681    }
     682   
     683    for (auto & ss : mStreamSetOutputs) {
     684        unsigned i = inputSetCount;
     685        auto & rate = ss.rate;
     686        std::string refSet = rate.referenceStreamSet();
     687        if (rate.isExact() || rate.isMaxRatio()) {
     688            if (refSet.empty()) {
     689                itemsPerPrincipalBlock[i] = rate.calculateRatio(bitBlockWidth);
     690                isDerived[i] = rate.isExact();
     691                continue;
     692            }
     693            else {
     694                Port port; unsigned ssIdx;
     695                std::tie(port, ssIdx) = getStreamPort(mStreamSetOutputs[i].name);
     696                if (port == Port::Output) ssIdx += inputSetCount;
     697                if (isDerived[ssIdx]) {
     698                    itemsPerPrincipalBlock[i] = rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]);
     699                    isDerived[i] = rate.isExact();
     700                    continue;
     701                }
     702            }
     703        }
     704        isDerived[i] = false;
     705        i++;
     706    }
     707    int maxBlocksToCopy[totalSetCount];
     708   
     709    for (unsigned i = 0; i < totalSetCount; i++) {
     710        if (isDerived[i]) {
     711            if (itemsPerPrincipalBlock[i] == bitBlockWidth) {
     712                maxBlocksToCopy[i] = 1;
     713            }
     714            else {
     715                // May not be block aligned, can overlap partial blocks at both ends.
     716                maxBlocksToCopy[i] = itemsPerPrincipalBlock[i]/bitBlockWidth + 2;
     717            }
     718        }
     719        else {
     720            // For variable input stream sets, we make a single block of items
     721            // available, if possible, but this block could be nonaligned.
     722            maxBlocksToCopy[i] = 2;
     723        }
     724    }
    647725    auto ip = kb->saveIP();
     726   
    648727    Function * const cp = mCurrentMethod;
    649728    const auto saveInstance = getInstance();
     
    657736    multiBlockParmTypes.push_back(mKernelStateType->getPointerTo());
    658737    multiBlockParmTypes.push_back(kb->getSizeTy());
     738    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
     739        if (!isDerived[i]) multiBlockParmTypes.push_back(kb->getSizeTy());
     740    }
    659741    for (auto buffer : mStreamSetInputBuffers) {
    660742        multiBlockParmTypes.push_back(buffer->getPointerType());
     
    672754    setInstance(&*args);
    673755    (++args)->setName("itemsToDo");
     756    for (unsigned i = 1; i < mStreamSetInputs.size(); i++) {
     757        if (!isDerived[i]) (++args)->setName(mStreamSetInputs[i].name + "_availItems");
     758    }
    674759    for (auto binding : mStreamSetInputs) {
    675760        (++args)->setName(binding.name + "BufPtr");
     
    703788
    704789    //
    705     //  A. Temporary Buffer Area Determination
     790    // Define and allocate the temporary buffer area.
    706791    //
    707     // For final block processing and for processing near the end of physical buffer
    708     // boundaries, we need to allocate temporary space for processing a full block of input.
    709     // Compute the size requirements to store stream set data at the declared processing
    710     // rates in reference to one block of the principal input stream.
    711     //
    712 
    713     unsigned bitBlockWidth = kb->getBitBlockWidth();
    714     std::vector<Type *> tempBuffers;
    715     std::vector<unsigned> itemsPerPrincipalBlock;
    716     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    717         auto & rate = mStreamSetInputs[i].rate;
    718         std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
    719         if (refSet.empty()) {
    720             itemsPerPrincipalBlock.push_back(rate.calculateRatio(bitBlockWidth));
     792    Type * tempBuffers[totalSetCount];
     793    for (unsigned i = 0; i < totalSetCount; i++) {
     794        unsigned blocks = maxBlocksToCopy[i];
     795        Type * bufType = i < inputSetCount ? mStreamSetInputBuffers[i]->getType() : mStreamSetOutputBuffers[i -inputSetCount]->getType();
     796        if (blocks > 1) {
     797            tempBuffers[i] = ArrayType::get(bufType, blocks);
    721798        }
    722799        else {
    723             Port port; unsigned ssIdx;
    724             std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
    725             assert (port == Port::Input && ssIdx < i);
    726             itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
    727         }
    728         //
    729         unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth +2;
    730         if (blocks > 1) {
    731             tempBuffers.push_back(ArrayType::get(mStreamSetInputBuffers[i]->getType(), blocks));
    732         }
    733         else {
    734             tempBuffers.push_back(mStreamSetInputBuffers[i]->getType());
    735         }
    736     }
    737 
    738     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    739         auto & rate = mStreamSetOutputs[i].rate;
    740         std::string refSet = mStreamSetOutputs[i].rate.referenceStreamSet();
    741         if (refSet.empty()) {
    742             itemsPerPrincipalBlock.push_back(rate.calculateRatio(bitBlockWidth));
    743         }
    744         else {
    745             Port port; unsigned ssIdx;
    746             std::tie(port, ssIdx) = getStreamPort(mStreamSetOutputs[i].name);
    747             if (port == Port::Output) ssIdx += mStreamSetInputs.size();
    748             itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
    749         }
    750         unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth +2;
    751         if (blocks > 1) {
    752             tempBuffers.push_back(ArrayType::get(mStreamSetOutputBuffers[i]->getType(), blocks));
    753         }
    754         else {
    755             tempBuffers.push_back(mStreamSetOutputBuffers[i]->getType());
    756         }
    757     }
    758 
    759     Type * tempParameterStructType = StructType::create(kb->getContext(), tempBuffers);
     800            tempBuffers[i] = bufType;
     801        }
     802    }
     803    Type * tempParameterStructType = StructType::create(kb->getContext(), ArrayRef<Type *>(tempBuffers, totalSetCount), "tempBuf");
    760804    Value * tempParameterArea = kb->CreateCacheAlignedAlloca(tempParameterStructType);
    761805
     
    806850
    807851    Value * linearlyAvailBlocks = blocksRemaining;
    808     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     852    for (unsigned i = 0; i < inputSetCount; i++) {
    809853        Value * p = kb->getProcessedItemCount(mStreamSetInputs[i].name);
    810854        Value * blkNo = kb->CreateUDiv(p, blockSize);
     
    812856        processedItemCount.push_back(p);
    813857        inputBlockPtr.push_back(b);
    814         auto & rate = mStreamSetInputs[i].rate;
    815         Value * blocks = nullptr;
    816         if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator()) && (rate.referenceStreamSet() == "")) {
    817             blocks = mStreamSetInputBuffers[i]->getLinearlyAccessibleBlocks(kb.get(), blkNo);
    818         } else {
    819             Value * linearlyAvailItems = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(kb.get(), p);
    820             Value * items = rate.CreateMaxReferenceItemsCalculation(kb.get(), linearlyAvailItems);
    821             blocks = kb->CreateUDiv(items, blockSize);
    822         }
    823         linearlyAvailBlocks = kb->CreateSelect(kb->CreateICmpULT(blocks, linearlyAvailBlocks), blocks, linearlyAvailBlocks);
     858        if (isDerived[i]) {
     859            auto & rate = mStreamSetInputs[i].rate;
     860            Value * blocks = nullptr;
     861            if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator()) && (rate.referenceStreamSet() == "")) {
     862                blocks = mStreamSetInputBuffers[i]->getLinearlyAccessibleBlocks(kb.get(), blkNo);
     863            } else {
     864                Value * linearlyAvailItems = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(kb.get(), p);
     865                Value * items = rate.CreateMaxReferenceItemsCalculation(kb.get(), linearlyAvailItems);
     866                blocks = kb->CreateUDiv(items, blockSize);
     867            }
     868            linearlyAvailBlocks = kb->CreateSelect(kb->CreateICmpULT(blocks, linearlyAvailBlocks), blocks, linearlyAvailBlocks);
     869        }
    824870    }
    825871    //  Now determine the linearly writeable blocks, based on available blocks reduced
     
    827873    Value * linearlyWritableBlocks = linearlyAvailBlocks;
    828874
    829     for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     875    for (unsigned i = 0; i < outputSetCount; i++) {
    830876        Value * p = kb->getProducedItemCount(mStreamSetOutputs[i].name);
    831877        Value * blkNo = kb->CreateUDiv(p, blockSize);
     
    833879        producedItemCount.push_back(p);
    834880        outputBlockPtr.push_back(b);
    835         auto & rate = mStreamSetOutputs[i].rate;
    836         Value * blocks = nullptr;
    837         if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator())) {
    838             blocks = mStreamSetOutputBuffers[0]->getLinearlyWritableBlocks(kb.get(), blkNo);
    839         } else {
    840             Value * writableItems = mStreamSetOutputBuffers[0]->getLinearlyWritableItems(kb.get(), p);
    841             blocks = kb->CreateUDiv(writableItems, blockSize);
    842         }
    843         linearlyWritableBlocks = kb->CreateSelect(kb->CreateICmpULT(blocks, linearlyWritableBlocks), blocks, linearlyWritableBlocks);
     881        if (isDerived[inputSetCount + i]) {
     882            auto & rate = mStreamSetOutputs[i].rate;
     883            Value * blocks = nullptr;
     884            if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator())) {
     885                blocks = mStreamSetOutputBuffers[0]->getLinearlyWritableBlocks(kb.get(), blkNo);
     886            } else {
     887                Value * writableItems = mStreamSetOutputBuffers[0]->getLinearlyWritableItems(kb.get(), p);
     888                blocks = kb->CreateUDiv(writableItems, blockSize);
     889            }
     890            linearlyWritableBlocks = kb->CreateSelect(kb->CreateICmpULT(blocks, linearlyWritableBlocks), blocks, linearlyWritableBlocks);
     891        }
    844892    }
    845893
     
    856904    doMultiBlockArgs.push_back(getInstance());
    857905    doMultiBlockArgs.push_back(linearlyAvailItems);
     906    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     907        if (!isDerived[i]) {
     908            Value * avail = kb->CreateSub(mAvailableItemCount[i], processedItemCount[i]);
     909            Value * linearlyAvail = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(kb.get(), processedItemCount[i]);
     910            doMultiBlockArgs.push_back(kb->CreateSelect(kb->CreateICmpULT(avail, linearlyAvail), avail, linearlyAvail));
     911        }
     912    }
    858913    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    859914        Value * bufPtr = kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), processedItemCount[i]);
     
    926981    kb->CreateCondBr(kb->CreateOr(mIsFinal, haveBlocks), doTempBufferBlock, segmentDone);
    927982
    928     // We use temporary buffers in 3 different cases that preclude full block processing.
    929     // (a) One or more input buffers does not have a sufficient number of input items linearly available.
    930     // (b) One or more output buffers does not have sufficient linearly available buffer space.
    931     // (c) We have processed all the full blocks of input and only the excessItems remain.
    932     // In each case we set up temporary buffers for input and output and then
    933     // call the Multiblock routine.
    934     //
    935983    kb->SetInsertPoint(doTempBufferBlock);
    936984    Value * tempBlockItems = kb->CreateSelect(haveBlocks, blockSize, excessItems);
     
    941989    tempArgs.push_back(getInstance());
    942990    tempArgs.push_back(tempBlockItems);
     991   
     992    // For non-derived inputs, add the available items.
     993    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     994        if (!isDerived[i]) {
     995            Value * avail = kb->CreateSub(mAvailableItemCount[i], processedItemCount[i]);
     996            doMultiBlockArgs.push_back(kb->CreateSelect(kb->CreateICmpULT(avail, blockSize), avail, blockSize));
     997        }
     998    }
    943999
    9441000    // Prepare the temporary buffer area.
     
    9651021                finalItemCountNeeded.push_back(rate.CreateRatioCalculation(kb.get(), finalItemCountNeeded[0], doFinal));
    9661022            }
    967             else {
     1023            else if (isDerived[i]) {
    9681024                Port port; unsigned ssIdx;
    9691025                std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
     
    9711027                finalItemCountNeeded.push_back(rate.CreateRatioCalculation(kb.get(), finalItemCountNeeded[ssIdx], doFinal));
    9721028            }
    973         }
    974         Value * neededItems = kb->CreateSub(finalItemCountNeeded[i], blockBasePos);
    975         Value * availFromBase = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(kb.get(), blockBasePos);
    976         Value * copyItems1 = kb->CreateSelect(kb->CreateICmpULT(neededItems, availFromBase), neededItems, availFromBase);
    977         Value * copyItems2 = kb->CreateSub(neededItems, copyItems1);
     1029            else {
     1030                // Ensure that there is up to a full block of items, if available.
     1031                Value * avail = kb->CreateSub(mAvailableItemCount[i], processedItemCount[i]);
     1032                finalItemCountNeeded.push_back(kb->CreateSelect(kb->CreateICmpULT(avail, blockSize), avail, blockSize));
     1033            }
     1034        }
    9781035        Value * inputPtr = kb->CreatePointerCast(kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), blockBasePos), bufPtrType);
    979         mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, inputPtr, copyItems1);
    980         Value * nextBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
    981         mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), nextBufPtr, kb->getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
     1036        if (maxBlocksToCopy[i] == 1) {
     1037            // copy one block
     1038            mStreamSetInputBuffers[i]->createBlockCopy(kb.get(), tempBufPtr, inputPtr, kb->getSize(1));
     1039        }
     1040        else {
     1041            Value * neededItems = kb->CreateSub(finalItemCountNeeded[i], blockBasePos);
     1042            // Round up to exact multiple of block size.
     1043            neededItems = kb->CreateAnd(kb->CreateAdd(neededItems, kb->getSize(bitBlockWidth - 1)), blockBaseMask);
     1044            Value * availFromBase = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(kb.get(), blockBasePos);
     1045            Value * copyItems1 = kb->CreateSelect(kb->CreateICmpULT(neededItems, availFromBase), neededItems, availFromBase);
     1046            Value * copyItems2 = kb->CreateSub(neededItems, copyItems1);
     1047            mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, inputPtr, copyItems1);
     1048            Value * nextBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
     1049            mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), nextBufPtr, kb->getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
     1050        }
    9821051        Value * itemAddress = kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), processedItemCount[i]);
    9831052        itemAddress = kb->CreatePtrToInt(itemAddress, intAddrTy);
     
    10031072    }
    10041073
    1005 
    10061074    kb->CreateCall(multiBlockFunction, tempArgs);
    10071075
     
    10131081        Value * finalItems = kb->getProducedItemCount(mStreamSetOutputs[i].name);
    10141082        Value * copyItems = kb->CreateSub(finalItems, blockBasePos[i]);
    1015        
     1083        // Round up to exact multiple of block size.
     1084        copyItems = kb->CreateAnd(kb->CreateAdd(copyItems, kb->getSize(bitBlockWidth - 1)), blockBaseMask);
    10161085        Value * writableFromBase = mStreamSetOutputBuffers[i]->getLinearlyWritableItems(kb.get(), blockBasePos[i]); // must be a whole number of blocks.
    10171086        Value * copyItems1 = kb->CreateSelect(kb->CreateICmpULT(copyItems, writableFromBase), copyItems, writableFromBase);
    1018         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), outputBlockPtr[i], tempBufPtr, copyItems1);
     1087        Value * copyBlocks1 = kb->CreateUDiv(copyItems1, blockSize);
     1088        mStreamSetOutputBuffers[i]->createBlockCopy(kb.get(), outputBlockPtr[i], tempBufPtr, copyBlocks1);
    10191089        Value * copyItems2 = kb->CreateSub(copyItems, copyItems1);
    1020         tempBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
     1090        tempBufPtr = kb->CreateGEP(tempBufPtr, copyBlocks1);
    10211091        Value * outputBaseBlockPtr = kb->CreateGEP(kb->getBaseAddress(mStreamSetOutputs[i].name), kb->getInt32(0));
    1022         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), outputBaseBlockPtr, tempBufPtr, copyItems2);
     1092        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), outputBaseBlockPtr, tempBufPtr, kb->CreateUDiv(copyItems2, blockSize));
    10231093    }
    10241094
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5464 r5479  
    299299    known as the principal input stream set.
    300300
    301 #.  If there is more than one input stream set, the additional stream sets must
     301#.  If there is more than one input stream set, the additional stream sets
     302    are first classified as having either a derived processing rate or
     303    a variable processing rate.   Stream sets with a derived processing rate
    302304    have a processing rate defined with respect to the input stream set of one
    303305    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
    304306    declared without a processing rate attribute have the FixedRate(1) attribute
    305     by default and therefore satisfy this constraint.
     307    by default and therefore satisfy this constraint.  All other processing rate
     308    types are classified as variable rate.
    306309
    307310#.  All output stream sets must be declared with processing rate attributes
     
    317320#.  The doMultiBlockMethod will be called with the following parameters:
    318321    * the number of items of the principal input stream to process (itemsToDo),
     322    * additional items available parameters for each additional input stream set
     323      that is classified as a variable rate stream set
    319324    * pointers to linear contiguous buffer areas for each of the input stream sets, and
    320325    * pointers to linear contiguous output buffer areas for each of the output stream sets.
    321     * pointers are to the address of the first item of the first stream of the stream set.
     326 
     327    Notes:
     328    * if the kernel has a Lookahead dependency declared on any input stream set, then
     329      there will be two buffer pointers for that stream set, one for accessing stream set
     330      items without lookahead and one for accessing the items with lookahead.   
     331    * pointers are to the address of the first unprocessed item of the first stream
     332      of the stream set.
     333    * the base type of each pointer is the StreamSetBlockType of that streamset
    322334
    323335#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
     
    325337    * the number of itemsToDo will either be an exact multiple of the BlockSize,
    326338      or, for processing the final block, a value less than BlockSize
    327     * all input buffers will be safe to access and have data available in
     339    * the input buffer of the principal stream set and all input buffers of stream sets
     340      with derived processing rates will be safe to access and have data available in
    328341      accord with their processing rates based on the given number of itemsToDo
    329342      of the principal input stream set; no further bounds checking is needed.
     343    * the kernel programmer is responsible for safe access and bounds checking for any
     344      input stream set classified as variable rate.
    330345    * all output buffers will be safe to access and have space available
    331346      for the given maximum output generation rates based on the given number
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5464 r5479  
    137137}
    138138
     139void StreamSetBuffer::createBlockCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * blocksToCopy) const {
     140    Type * i8ptr = iBuilder->getInt8PtrTy();
     141    unsigned alignment = iBuilder->getBitBlockWidth() / 8;
     142    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
     143    unsigned numStreams = getType()->getArrayNumElements();
     144    auto elemTy = getType()->getArrayElementType();
     145    unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
     146    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
     147    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
     148}
     149
    139150void StreamSetBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
    140151    Type * i8ptr = iBuilder->getInt8PtrTy();
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5457 r5479  
    8080    virtual llvm::Value * getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, llvm::Value * fromBlock) const;
    8181   
     82    void createBlockCopy(IDISA::IDISA_Builder * const iBuilder, llvm::Value * targetBlockPtr, llvm::Value * sourceBlockPtr, llvm::Value * blocksToCopy) const;
     83
    8284    virtual void createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, llvm::Value * targetBlockPtr, llvm::Value * sourceBlockPtr, llvm::Value * itemsToCopy) const;
    8385
Note: See TracChangeset for help on using the changeset viewer.