Ignore:
Timestamp:
May 15, 2017, 4:20:45 PM (2 years ago)
Author:
cameron
Message:

Fix some issues with MultiBlockKernel? builder

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5449 r5452  
    676676            itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
    677677        }
     678        //
    678679        unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth +2;
    679680        if (blocks > 1) {
     
    792793        linearlyWritableBlocks = kb->CreateSelect(kb->CreateICmpULT(blocks, linearlyWritableBlocks), blocks, linearlyWritableBlocks);
    793794    }
     795
    794796    Value * haveBlocks = kb->CreateICmpUGT(linearlyWritableBlocks, kb->getSize(0));
    795797    kb->CreateCondBr(haveBlocks, doMultiBlockCall, tempBlockCheck);
     
    854856        }
    855857    }
    856     kb->setProcessedItemCount(mStreamSetInputs[0].name, kb->CreateAdd(processedItemCount[0], linearlyAvailItems));
     858
     859    Value * nowProcessed = kb->CreateAdd(processedItemCount[0], linearlyAvailItems);
     860    kb->setProcessedItemCount(mStreamSetInputs[0].name, nowProcessed);
    857861    Value * reducedBlocksToDo = kb->CreateSub(blocksRemaining, linearlyWritableBlocks);
    858     Value * fullBlocksRemain = kb->CreateICmpUGT(reducedBlocksToDo, kb->getSize(0));
    859862    BasicBlock * multiBlockFinal = kb->GetInsertBlock();
    860863    blocksRemaining->addIncoming(reducedBlocksToDo, multiBlockFinal);
    861     kb->CreateCondBr(fullBlocksRemain, doSegmentOuterLoop, tempBlockCheck);
    862     //iBuilder->CreateBr(doSegmentOuterLoop);
     864    kb->CreateBr(doSegmentOuterLoop);
    863865    //
    864866    // We use temporary buffers in 3 different cases that preclude full block processing.
     
    871873
    872874    kb->SetInsertPoint(tempBlockCheck);
    873     PHINode * const tempBlocksRemain = kb->CreatePHI(kb->getSizeTy(), 2, "tempBlocksRemain");
    874     tempBlocksRemain->addIncoming(blocksRemaining, doSegmentOuterLoop);
    875     tempBlocksRemain->addIncoming(reducedBlocksToDo, multiBlockFinal);
    876 
    877     haveBlocks = kb->CreateICmpUGT(tempBlocksRemain, kb->getSize(0));
     875    PHINode * itemsProcessedSoFar = kb->CreatePHI(kb->getSizeTy(), 2);
     876    itemsProcessedSoFar->addIncoming(processedItemCount[0], doSegmentOuterLoop);
     877    itemsProcessedSoFar->addIncoming(nowProcessed, multiBlockFinal);
     878   
     879    haveBlocks = kb->CreateICmpUGT(blocksRemaining, kb->getSize(0));
    878880    kb->CreateCondBr(kb->CreateOr(mIsFinal, haveBlocks), doTempBufferBlock, segmentDone);
    879881
    880     //
    881882    // We use temporary buffers in 3 different cases that preclude full block processing.
    882883    // (a) One or more input buffers does not have a sufficient number of input items linearly available.
     
    888889    kb->SetInsertPoint(doTempBufferBlock);
    889890    Value * tempBlockItems = kb->CreateSelect(haveBlocks, blockSize, excessItems);
     891    Value * doFinal = kb->CreateNot(haveBlocks);
    890892
    891893    // Begin constructing the doMultiBlock args.
     
    902904    // For each input and output buffer, copy over necessary data starting from the last
    903905    // block boundary.
    904     std::vector<Value *> finalItemPos;
    905     finalItemPos.push_back(kb->CreateAdd(processedItemCount[0], tempBlockItems));
     906    std::vector<Value *> finalItemCountNeeded;
     907    finalItemCountNeeded.push_back(kb->CreateAdd(itemsProcessedSoFar, tempBlockItems));
    906908
    907909    for (unsigned i = 0; i < mStreamSetInputBuffers.size(); i++) {
    908910        Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(i));
    909911        tempBufPtr = kb->CreatePointerCast(tempBufPtr, mStreamSetInputBuffers[i]->getPointerType());
    910 
    911         Value * blockItemPos = kb->CreateAnd(processedItemCount[i], blockBaseMask);
    912 
     912        Value * blockBasePos = kb->CreateAnd(processedItemCount[i], blockBaseMask);
    913913        // The number of items to copy is determined by the processing rate requirements.
    914914        if (i > 1) {
     
    916916            std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
    917917            if (refSet.empty()) {
    918                 finalItemPos.push_back(rate.CreateRatioCalculation(kb.get(), finalItemPos[0], kb->CreateNot(haveBlocks)));
     918                finalItemCountNeeded.push_back(rate.CreateRatioCalculation(kb.get(), finalItemCountNeeded[0], doFinal));
    919919            }
    920920            else {
     
    922922                std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
    923923                assert (port == Port::Input && ssIdx < i);
    924                 finalItemPos.push_back(rate.CreateRatioCalculation(kb.get(), finalItemPos[ssIdx], kb->CreateNot(haveBlocks)));
     924                finalItemCountNeeded.push_back(rate.CreateRatioCalculation(kb.get(), finalItemCountNeeded[ssIdx], doFinal));
    925925            }
    926926        }
    927         Value * neededItems = kb->CreateSub(finalItemPos[i], blockItemPos);
    928         Value * availFromBase = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(kb.get(), blockItemPos);
     927        Value * neededItems = kb->CreateSub(finalItemCountNeeded[i], blockBasePos);
     928        Value * availFromBase = mStreamSetInputBuffers[i]->getLinearlyAccessibleItems(kb.get(), blockBasePos);
    929929        Value * copyItems1 = kb->CreateSelect(kb->CreateICmpULT(neededItems, availFromBase), neededItems, availFromBase);
    930930        Value * copyItems2 = kb->CreateSub(neededItems, copyItems1);
    931         Value * inputPtr = kb->getInputStreamBlockPtr(mStreamSetInputs[i].name, kb->getInt32(0));
     931        Value * inputPtr = kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), blockBasePos);
    932932        mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, inputPtr, copyItems1);
    933         Value * nextBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(availFromBase, blockSize));
     933        Value * nextBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
    934934        mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), nextBufPtr, kb->getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
    935 
    936935        Value * itemAddress = kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), processedItemCount[i]);
    937936        itemAddress = kb->CreatePtrToInt(itemAddress, intAddressTy);
    938 
    939937        Value * baseAddress = inputBlockPtr[i];
    940938        baseAddress = kb->CreatePtrToInt(baseAddress, intAddressTy);
    941 
    942 
    943939        Value * tempAddress = kb->CreateAdd(kb->CreatePtrToInt(tempBufPtr, intAddressTy), kb->CreateSub(itemAddress, baseAddress));
    944940        tempArgs.push_back(kb->CreateIntToPtr(tempAddress, mStreamSetInputBuffers[i]->getPointerType()));
    945941    }
    946942
    947     std::vector<Value *> blockItemPos;
     943    std::vector<Value *> blockBasePos;
    948944    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); i++) {
    949945        Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(mStreamSetInputs.size() + i));
    950946        tempBufPtr = kb->CreatePointerCast(tempBufPtr, mStreamSetOutputBuffers[i]->getPointerType());
    951         blockItemPos.push_back(kb->CreateAnd(producedItemCount[i], blockBaseMask));
    952         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, outputBlockPtr[i], kb->CreateSub(producedItemCount[i], blockItemPos[i]));
    953         Value * itemAddress = kb->CreatePtrToInt(kb->getRawOutputPointer(mStreamSetInputs[i].name, kb->getInt32(0), producedItemCount[i]), intAddressTy);
     947        producedItemCount[i] = kb->getProducedItemCount(mStreamSetOutputs[i].name);
     948        blockBasePos.push_back(kb->CreateAnd(producedItemCount[i], blockBaseMask));
     949        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, outputBlockPtr[i], kb->CreateSub(producedItemCount[i], blockBasePos[i]));
     950        Value * itemAddress = kb->CreatePtrToInt(kb->getRawOutputPointer(mStreamSetOutputs[i].name, kb->getInt32(0), producedItemCount[i]), intAddressTy);
    954951        Value * outputPtr = kb->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, kb->getInt32(0));
    955952        Value * baseAddress = kb->CreatePtrToInt(outputPtr, intAddressTy);
     
    966963        Value * tempBufPtr = kb->CreateGEP(tempParameterArea, kb->getInt32(mStreamSetInputs.size() + i));
    967964        tempBufPtr = kb->CreatePointerCast(tempBufPtr, mStreamSetOutputBuffers[i]->getPointerType());
    968         Value * final_items = kb->getProducedItemCount(mStreamSetOutputs[i].name);
    969         Value * copyItems = kb->CreateSub(final_items, blockItemPos[i]);
    970         Value * copyItems1 = mStreamSetOutputBuffers[i]->getLinearlyWritableItems(kb.get(), blockItemPos[i]); // must be a whole number of blocks.
    971         Value * outputPtr = kb->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, kb->getInt32(0));
    972         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), outputPtr, tempBufPtr, copyItems1);
    973         Value * copyItems2 = kb->CreateSelect(kb->CreateICmpULT(copyItems, copyItems), kb->getSize(0), kb->CreateSub(copyItems, copyItems1));
     965        Value * finalItems = kb->getProducedItemCount(mStreamSetOutputs[i].name);
     966        Value * copyItems = kb->CreateSub(finalItems, blockBasePos[i]);
     967       
     968        Value * writableFromBase = mStreamSetOutputBuffers[i]->getLinearlyWritableItems(kb.get(), blockBasePos[i]); // must be a whole number of blocks.
     969        Value * copyItems1 = kb->CreateSelect(kb->CreateICmpULT(copyItems, writableFromBase), copyItems, writableFromBase);
     970        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), outputBlockPtr[i], tempBufPtr, copyItems1);
     971        Value * copyItems2 = kb->CreateSub(copyItems, copyItems1);
    974972        tempBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
    975         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), kb->getStreamSetBufferPtr(mStreamSetOutputs[i].name), tempBufPtr, copyItems2);
    976     }
    977 
    978     kb->setProcessedItemCount(mStreamSetInputs[0].name, finalItemPos[0]);
     973        Value * outputBaseBlockPtr = kb->CreateGEP(kb->getBaseAddress(mStreamSetOutputs[i].name), kb->getInt32(0));
     974        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), outputBaseBlockPtr, tempBufPtr, copyItems2);
     975    }
     976
     977    kb->setProcessedItemCount(mStreamSetInputs[0].name, finalItemCountNeeded[0]);
    979978
    980979    //  We've dealt with the partial block processing and copied information back into the
    981980    //  actual buffers.  If this isn't the final block, loop back for more multiblock processing.
    982981    //
    983     blocksRemaining->addIncoming(kb->CreateSub(tempBlocksRemain, kb->CreateZExt(haveBlocks, kb->getSizeTy())), kb->GetInsertBlock());
     982    blocksRemaining->addIncoming(kb->CreateSub(blocksRemaining, kb->CreateZExt(haveBlocks, kb->getSizeTy())), kb->GetInsertBlock());
    984983    kb->CreateCondBr(haveBlocks, doSegmentOuterLoop, segmentDone);
    985984    kb->SetInsertPoint(segmentDone);
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5448 r5452  
    156156    Value * blockCopyBytes = iBuilder->CreateMul(blocksToCopy, iBuilder->getSize(iBuilder->getBitBlockWidth() * numStreams * fieldWidth/8));
    157157    iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), blockCopyBytes, alignment);
    158     Value * partialCopyBitsPreStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
    159     Value * partialCopyBytesPreStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPreStream, iBuilder->getSize(7)), iBuilder->getSize(3));
     158    Value * partialCopyBitsPerStream = iBuilder->CreateMul(partialItems, iBuilder->getSize(fieldWidth));
     159    Value * partialCopyBytesPerStream = iBuilder->CreateLShr(iBuilder->CreateAdd(partialCopyBitsPerStream, iBuilder->getSize(7)), iBuilder->getSize(3));
    160160    for (unsigned strm = 0; strm < numStreams; strm++) {
    161161        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
    162162        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
    163         iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), partialCopyBytesPreStream, alignment);
     163        iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), partialCopyBytesPerStream, alignment);
    164164    }
    165165}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5446 r5452  
    101101    StreamSetBuffer(BufferKind k, llvm::Type * baseType, llvm::Type * resolvedType, unsigned BufferBlocks, unsigned AddressSpace);
    102102
    103     // Get the buffer pointer for a given block of the stream.
     103    // Get the buffer pointer for a given block of the stream set.
    104104    virtual llvm::Value * getStreamSetBlockPtr(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * blockNo) const = 0;
    105105
Note: See TracChangeset for help on using the changeset viewer.