Changeset 5442


Ignore:
Timestamp:
May 11, 2017, 7:58:32 PM (22 months ago)
Author:
cameron
Message:

Bug fixes for MultiBlockKernel?, StdOutKernel?

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5441 r5442  
    257257    mAvailableItemCount.clear();
    258258    idb->CreateRetVoid();
     259    //CurrentMethod->dump();
    259260}
    260261
     
    604605void MultiBlockKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) {
    605606
     607    KernelBuilder * const iBuilder = kb.get();
     608    auto ip = iBuilder->saveIP();
     609    Function * const cp = mCurrentMethod;
     610   
    606611    // First prepare the multi-block method that will be used.
    607     KernelBuilder * const iBuilder = kb.get();
    608612
    609613    std::vector<Type *> multiBlockParmTypes;
    610614    multiBlockParmTypes.push_back(mKernelStateType->getPointerTo());
     615    multiBlockParmTypes.push_back(iBuilder->getSizeTy());
    611616    for (auto buffer : mStreamSetInputBuffers) {
    612617        multiBlockParmTypes.push_back(buffer->getPointerType());
     
    615620        multiBlockParmTypes.push_back(buffer->getPointerType());
    616621    }
     622   
    617623    FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), multiBlockParmTypes, false);
    618624    Function * multiBlockFunction = Function::Create(type, GlobalValue::InternalLinkage, getName() + MULTI_BLOCK_SUFFIX, iBuilder->getModule());
     
    631637    // Now use the generateMultiBlockLogic method of the MultiBlockKernelBuilder subtype to
    632638    // provide the required multi-block kernel logic.
    633     auto ip = iBuilder->saveIP();
     639    mCurrentMethod = multiBlockFunction;
    634640    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "multiBlockEntry", multiBlockFunction, 0));
    635 
    636641    generateMultiBlockLogic(kb);
    637642
    638643    iBuilder->CreateRetVoid();
     644   
    639645    iBuilder->restoreIP(ip);
    640 
     646    mCurrentMethod = cp;
     647   
    641648    // Now proceed with creation of the doSegment method.
    642649
     
    644651    BasicBlock * const doSegmentOuterLoop = iBuilder->CreateBasicBlock(getName() + "_doSegmentOuterLoop");
    645652    BasicBlock * const doMultiBlockCall = iBuilder->CreateBasicBlock(getName() + "_doMultiBlockCall");
    646     BasicBlock * const finalBlockCheck = iBuilder->CreateBasicBlock(getName() + "_finalBlockCheck");
     653    BasicBlock * const tempBlockCheck = iBuilder->CreateBasicBlock(getName() + "_tempBlockCheck");
    647654    BasicBlock * const doTempBufferBlock = iBuilder->CreateBasicBlock(getName() + "_doTempBufferBlock");
    648655    BasicBlock * const segmentDone = iBuilder->CreateBasicBlock(getName() + "_segmentDone");
     
    674681            itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
    675682        }
    676         unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth;
     683        unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth +2;
    677684        if (blocks > 1) {
    678685            tempBuffers.push_back(ArrayType::get(mStreamSetInputBuffers[i]->getType(), blocks));
     
    694701            itemsPerPrincipalBlock.push_back(rate.calculateRatio(itemsPerPrincipalBlock[ssIdx]));
    695702        }
    696         unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth;
     703        unsigned blocks = (itemsPerPrincipalBlock.back() + bitBlockWidth - 1)/bitBlockWidth +2;
    697704        if (blocks > 1) {
    698705            tempBuffers.push_back(ArrayType::get(mStreamSetOutputBuffers[i]->getType(), blocks));
     
    706713
    707714    ConstantInt * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
     715
    708716    Value * availablePos = mAvailableItemCount[0];
    709717    Value * itemsAvail = availablePos;
     
    732740
    733741    //iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(fullBlocksToDo, iBuilder->getSize(0)), doSegmentOuterLoop, finalBlockCheck);
     742   
    734743    iBuilder->CreateBr(doSegmentOuterLoop);
    735 
    736744    iBuilder->SetInsertPoint(doSegmentOuterLoop);
    737745    PHINode * const blocksRemaining = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "blocksRemaining");
    738746    blocksRemaining->addIncoming(fullBlocksToDo, entry);
    739 
    740 
    741747    // For each input buffer, determine the processedItemCount, the block pointer for the
    742748    // buffer block containing the next item, and the number of linearly available items.
     
    747753    std::vector<Value *> outputBlockPtr;
    748754
    749     //  Calculate linearly available blocks for all input stream sets.
    750     Value * linearlyAvailBlocks = nullptr;
     755    //  Now determine the linearly available blocks, based on blocks remaining reduced
     756    //  by limitations of linearly available input buffer space.
     757    Value * linearlyAvailBlocks = blocksRemaining;
    751758    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    752759        Value * p = iBuilder->getProcessedItemCount(mStreamSetInputs[i].name);
     
    764771            blocks = iBuilder->CreateUDiv(items, blockSize);
    765772        }
    766         if (i == 0) {
    767             linearlyAvailBlocks = blocks;
    768         } else {
    769             linearlyAvailBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyAvailBlocks), blocks, linearlyAvailBlocks);
    770         }
    771     }
    772 
     773        linearlyAvailBlocks = iBuilder->CreateSelect(iBuilder->CreateICmpULT(blocks, linearlyAvailBlocks), blocks, linearlyAvailBlocks);
     774    }
    773775    //  Now determine the linearly writeable blocks, based on available blocks reduced
    774776    //  by limitations of output buffer space.
     
    792794    }
    793795    Value * haveBlocks = iBuilder->CreateICmpUGT(linearlyWritableBlocks, iBuilder->getSize(0));
    794 
    795     iBuilder->CreateCondBr(haveBlocks, doMultiBlockCall, doTempBufferBlock);
     796    iBuilder->CreateCondBr(haveBlocks, doMultiBlockCall, tempBlockCheck);
    796797
    797798    //  At this point we have verified the availability of one or more blocks of input data and output buffer space for all stream sets.
     
    805806    doMultiBlockArgs.push_back(linearlyAvailItems);
    806807    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    807         doMultiBlockArgs.push_back(iBuilder->getRawInputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]));
     808        Value * bufPtr = iBuilder->getRawInputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]);
     809        bufPtr = iBuilder->CreatePointerCast(bufPtr, mStreamSetInputBuffers[i]->getPointerType());
     810        doMultiBlockArgs.push_back(bufPtr);
    808811    }
    809812    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    810         doMultiBlockArgs.push_back(iBuilder->getRawOutputPointer(mStreamSetOutputs[i].name, iBuilder->getInt32(0), producedItemCount[i]));
     813        Value * bufPtr = iBuilder->getRawOutputPointer(mStreamSetOutputs[i].name, iBuilder->getInt32(0), producedItemCount[i]);
     814        bufPtr = iBuilder->CreatePointerCast(bufPtr, mStreamSetOutputBuffers[i]->getPointerType());
     815        doMultiBlockArgs.push_back(bufPtr);
    811816    }
    812817
    813818    iBuilder->CreateCall(multiBlockFunction, doMultiBlockArgs);
    814 
    815819    // Do copybacks if necessary.
    816820    unsigned priorIdx = 0;
     
    856860    BasicBlock * multiBlockFinal = iBuilder->GetInsertBlock();
    857861    blocksRemaining->addIncoming(reducedBlocksToDo, multiBlockFinal);
    858     iBuilder->CreateCondBr(fullBlocksRemain, doSegmentOuterLoop, finalBlockCheck);
    859 
    860     // All the full blocks of input have been processed.  If mIsFinal is true,
    861     // we should process the remaining partial block (i.e., excessItems as determined at entry).
    862     iBuilder->SetInsertPoint(finalBlockCheck);
    863     iBuilder->CreateCondBr(mIsFinal, doTempBufferBlock, segmentDone);
     862    iBuilder->CreateCondBr(fullBlocksRemain, doSegmentOuterLoop, tempBlockCheck);
     863    //iBuilder->CreateBr(doSegmentOuterLoop);
     864    //
     865    // We use temporary buffers in 3 different cases that preclude full block processing.
     866    // (a) One or more input buffers does not have a sufficient number of input items linearly available.
     867    // (b) One or more output buffers does not have sufficient linearly available buffer space.
     868    // (c) We have processed all the full blocks of input and only the excessItems remain.
     869    // In each case we set up temporary buffers for input and output and then
     870    // call the Multiblock routine.
     871    //
     872
     873    iBuilder->SetInsertPoint(tempBlockCheck);
     874    PHINode * const tempBlocksRemain = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "tempBlocksRemain");
     875    tempBlocksRemain->addIncoming(blocksRemaining, doSegmentOuterLoop);
     876    tempBlocksRemain->addIncoming(reducedBlocksToDo, multiBlockFinal);
     877   
     878    haveBlocks = iBuilder->CreateICmpUGT(tempBlocksRemain, iBuilder->getSize(0));
     879    iBuilder->CreateCondBr(iBuilder->CreateOr(mIsFinal, haveBlocks), doTempBufferBlock, segmentDone);
    864880
    865881    //
     
    872888    //
    873889    iBuilder->SetInsertPoint(doTempBufferBlock);
    874     PHINode * const tempBlockItems = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "tempBlockItems");
    875     tempBlockItems->addIncoming(blockSize, doSegmentOuterLoop);
    876     tempBlockItems->addIncoming(excessItems, finalBlockCheck);
    877 
    878     // Will this be the final block processing?
    879     Value * doFinal = iBuilder->CreateICmpULT(tempBlockItems, blockSize);
     890    Value * tempBlockItems = iBuilder->CreateSelect(haveBlocks, blockSize, excessItems);
    880891
    881892    // Begin constructing the doMultiBlock args.
     
    889900    Constant * const tempAreaSize = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(tempParameterStructType), iBuilder->getSizeTy(), false);
    890901    iBuilder->CreateMemZero(tempParameterArea, tempAreaSize);
    891 
     902   
    892903    // For each input and output buffer, copy over necessary data starting from the last
    893904    // block boundary.
     
    899910        tempBufPtr = iBuilder->CreatePointerCast(tempBufPtr, mStreamSetInputBuffers[i]->getPointerType());
    900911
    901         auto & rate = mStreamSetInputs[i].rate;
    902912        Value * blockItemPos = iBuilder->CreateAnd(processedItemCount[i], blockBaseMask);
    903913
    904914        // The number of items to copy is determined by the processing rate requirements.
    905915        if (i > 1) {
     916            auto & rate = mStreamSetInputs[i].rate;
    906917            std::string refSet = mStreamSetInputs[i].rate.referenceStreamSet();
    907918            if (refSet.empty()) {
    908                 finalItemPos.push_back(rate.CreateRatioCalculation(iBuilder, finalItemPos[0], doFinal));
     919                finalItemPos.push_back(rate.CreateRatioCalculation(iBuilder, finalItemPos[0], iBuilder->CreateNot(haveBlocks)));
    909920            }
    910921            else {
     
    912923                std::tie(port, ssIdx) = getStreamPort(mStreamSetInputs[i].name);
    913924                assert (port == Port::Input && ssIdx < i);
    914                 finalItemPos.push_back(rate.CreateRatioCalculation(iBuilder, finalItemPos[ssIdx], doFinal));
     925                finalItemPos.push_back(rate.CreateRatioCalculation(iBuilder, finalItemPos[ssIdx], iBuilder->CreateNot(haveBlocks)));
    915926            }
    916927        }
     
    919930        Value * copyItems1 = iBuilder->CreateSelect(iBuilder->CreateICmpULT(neededItems, availFromBase), neededItems, availFromBase);
    920931        Value * copyItems2 = iBuilder->CreateSub(neededItems, copyItems1);
    921         mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, tempBufPtr, inputBlockPtr[i], copyItems1);
     932        Value * inputPtr = iBuilder->getInputStreamBlockPtr(mStreamSetInputs[i].name, iBuilder->getInt32(0));
     933        mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, tempBufPtr, inputPtr, copyItems1);
    922934        Value * nextBufPtr = iBuilder->CreateGEP(tempBufPtr, iBuilder->CreateUDiv(availFromBase, blockSize));
    923935        mStreamSetInputBuffers[i]->createBlockAlignedCopy(iBuilder, nextBufPtr, iBuilder->getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
    924         Value * itemAddress = iBuilder->CreatePtrToInt(iBuilder->getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]), iBuilder->getSizeTy());
     936        Value * itemAddress = iBuilder->CreatePtrToInt(iBuilder->getRawInputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), processedItemCount[i]), iBuilder->getSizeTy());
    925937        Value * baseAddress = iBuilder->CreatePtrToInt(inputBlockPtr[i], iBuilder->getSizeTy());
    926938        Value * tempAddress = iBuilder->CreateAdd(iBuilder->CreatePtrToInt(tempBufPtr, iBuilder->getSizeTy()), iBuilder->CreateSub(itemAddress, baseAddress));
    927         tempArgs.push_back(iBuilder->CreateBitCast(tempAddress, mStreamSetInputBuffers[i]->getPointerType()));
     939        tempArgs.push_back(iBuilder->CreateIntToPtr(tempAddress, mStreamSetInputBuffers[i]->getPointerType()));
    928940    }
    929941
     
    935947        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, tempBufPtr, outputBlockPtr[i], iBuilder->CreateSub(producedItemCount[i], blockItemPos[i]));
    936948        Value * itemAddress = iBuilder->CreatePtrToInt(iBuilder->getRawOutputPointer(mStreamSetInputs[i].name, iBuilder->getInt32(0), producedItemCount[i]), iBuilder->getSizeTy());
    937         Value * baseAddress = iBuilder->CreatePtrToInt(outputBlockPtr[i], iBuilder->getSizeTy());
     949        Value * outputPtr = iBuilder->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, iBuilder->getInt32(0));
     950        Value * baseAddress = iBuilder->CreatePtrToInt(outputPtr, iBuilder->getSizeTy());
    938951        Value * tempAddress = iBuilder->CreateAdd(iBuilder->CreatePtrToInt(tempBufPtr, iBuilder->getSizeTy()), iBuilder->CreateSub(itemAddress, baseAddress));
    939         tempArgs.push_back(iBuilder->CreateBitCast(tempAddress, mStreamSetOutputBuffers[i]->getPointerType()));
    940     }
    941 
     952        tempArgs.push_back(iBuilder->CreateIntToPtr(tempAddress, mStreamSetOutputBuffers[i]->getPointerType()));
     953    }
     954
     955   
    942956    iBuilder->CreateCall(multiBlockFunction, tempArgs);
    943957
     
    950964        Value * copyItems = iBuilder->CreateSub(final_items, blockItemPos[i]);
    951965        Value * copyItems1 = mStreamSetOutputBuffers[i]->getLinearlyWritableItems(iBuilder, blockItemPos[i]); // must be a whole number of blocks.
    952         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, outputBlockPtr[i], tempBufPtr, copyItems1);
     966        Value * outputPtr = iBuilder->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, iBuilder->getInt32(0));
     967        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(iBuilder, outputPtr, tempBufPtr, copyItems1);
    953968        Value * copyItems2 = iBuilder->CreateSelect(iBuilder->CreateICmpULT(copyItems, copyItems), iBuilder->getSize(0), iBuilder->CreateSub(copyItems, copyItems1));
    954969        tempBufPtr = iBuilder->CreateGEP(tempBufPtr, iBuilder->CreateUDiv(copyItems1, blockSize));
     
    961976    //  actual buffers.  If this isn't the final block, loop back for more multiblock processing.
    962977    //
    963     iBuilder->CreateCondBr(doFinal, segmentDone, doSegmentOuterLoop);
     978    blocksRemaining->addIncoming(iBuilder->CreateSub(tempBlocksRemain, iBuilder->CreateZExt(haveBlocks, iBuilder->getSizeTy())), iBuilder->GetInsertBlock());
     979    iBuilder->CreateCondBr(haveBlocks, doSegmentOuterLoop, segmentDone);
    964980    iBuilder->SetInsertPoint(segmentDone);
    965981}
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5441 r5442  
    2424   
    2525    Function::arg_iterator args = mCurrentMethod->arg_begin();
    26     Value * self = &*(args++);
     26    /* self = */ args++;
    2727    Value * itemsToDo = &*(args++);
    2828    Value * codeUnitBuffer = &*(args++);
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5440 r5442  
    9393Value * StreamSetBuffer::getRawItemPointer(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * streamIndex, Value * absolutePosition) const {
    9494    Value * ptr = getBaseAddress(iBuilder, self);
     95
    9596    if (!isa<ConstantInt>(streamIndex) || !cast<ConstantInt>(streamIndex)->isZero()) {
    9697        ptr = iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), streamIndex});
    9798    }
     99    Value * bufferItemPosition = modByBufferBlocks(iBuilder, absolutePosition);
    98100    IntegerType * const ty = cast<IntegerType>(mBaseType->getArrayElementType()->getVectorElementType());
    99101    ptr = iBuilder->CreatePointerCast(ptr, ty->getPointerTo());
     
    101103        const auto bw = ty->getBitWidth();
    102104        if (LLVM_LIKELY((bw & (bw - 1)) == 0)) { // is power of 2
    103             absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8 / bw));
     105            bufferItemPosition = iBuilder->CreateUDiv(bufferItemPosition, ConstantInt::get(absolutePosition->getType(), 8 / bw));
    104106        } else {
    105             absolutePosition = iBuilder->CreateMul(absolutePosition, ConstantInt::get(absolutePosition->getType(), bw));
    106             absolutePosition = iBuilder->CreateUDiv(absolutePosition, ConstantInt::get(absolutePosition->getType(), 8));
     107            bufferItemPosition = iBuilder->CreateMul(bufferItemPosition, ConstantInt::get(absolutePosition->getType(), bw));
     108            bufferItemPosition = iBuilder->CreateUDiv(bufferItemPosition, ConstantInt::get(absolutePosition->getType(), 8));
    107109        }
    108110    }
    109     return iBuilder->CreateGEP(ptr, absolutePosition);
     111    Value * rawPointer = iBuilder->CreateGEP(ptr, bufferItemPosition);
     112    return rawPointer;
    110113}
    111114
Note: See TracChangeset for help on using the changeset viewer.