Ignore:
Timestamp:
Jun 11, 2017, 9:41:57 PM (2 years ago)
Author:
cameron
Message:

Progress on multi-block kernels, including support for SourceBuffers?

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5501 r5503  
    857857
    858858    Value * linearlyAvailStrides = stridesRemaining;
     859
    859860    for (unsigned i = 0; i < inputSetCount; i++) {
    860861        Value * p = kb->getProcessedItemCount(mStreamSetInputs[i].name);
     
    868869            if ((rate.isFixedRatio()) && (rate.getRatioNumerator() == rate.getRatioDenominator()) && (rate.referenceStreamSet() == "")) {
    869870                maxReferenceItems = kb->CreateMul(kb->getLinearlyAccessibleBlocks(mStreamSetInputs[i].name, blkNo), blockSize);
     871
    870872            } else {
    871873                Value * linearlyAvailItems = kb->getLinearlyAccessibleItems(mStreamSetInputs[i].name, p);
     
    10111013    Constant * const tempAreaSize = ConstantExpr::getIntegerCast(ConstantExpr::getSizeOf(tempParameterStructType), kb->getSizeTy(), false);
    10121014    kb->CreateMemZero(tempParameterArea, tempAreaSize);
    1013 
    10141015    // For each input and output buffer, copy over necessary data starting from the last
    10151016    // block boundary.
     
    10211022        Type * bufPtrType = mStreamSetInputBuffers[i]->getPointerType();
    10221023        tempBufPtr = kb->CreatePointerCast(tempBufPtr, bufPtrType);
    1023         Value * blockBasePos = kb->CreateAnd(processedItemCount[i], blockBaseMask);
     1024        ConstantInt * strideItems = kb->getSize(itemsPerStride[i]);
     1025        Value * strideBasePos = kb->CreateSub(processedItemCount[i], kb->CreateURem(processedItemCount[i], strideItems));
     1026        Value * blockBasePos = (itemsPerStride[i] % bitBlockWidth == 0) ? strideBasePos : kb->CreateAnd(strideBasePos, blockBaseMask);
     1027
    10241028        // The number of items to copy is determined by the processing rate requirements.
    10251029        if (i > 1) {
     
    10361040            }
    10371041            else {
    1038                 // Ensure that there is up to a full block of items, if available.
     1042                // Ensure that there is up to a full stride of items, if available.
    10391043                Value * avail = kb->CreateSub(mAvailableItemCount[i], processedItemCount[i]);
    1040                 finalItemCountNeeded.push_back(kb->CreateSelect(kb->CreateICmpULT(avail, blockSize), avail, blockSize));
     1044                finalItemCountNeeded.push_back(kb->CreateSelect(kb->CreateICmpULT(avail, strideItems), avail, strideItems));
    10411045            }
    10421046        }
     1047       
    10431048        Value * inputPtr = kb->CreatePointerCast(kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), blockBasePos), bufPtrType);
     1049       
    10441050        if (maxBlocksToCopy[i] == 1) {
    10451051            // copy one block
     
    10481054        else {
    10491055            Value * neededItems = kb->CreateSub(finalItemCountNeeded[i], blockBasePos);
    1050             // Round up to exact multiple of block size.
    1051             neededItems = kb->CreateAnd(kb->CreateAdd(neededItems, kb->getSize(bitBlockWidth - 1)), blockBaseMask);
    10521056            Value * availFromBase = kb->getLinearlyAccessibleItems(mStreamSetInputs[i].name, blockBasePos);
    1053             Value * copyItems1 = kb->CreateSelect(kb->CreateICmpULT(neededItems, availFromBase), neededItems, availFromBase);
     1057            Value * allAvail = kb->CreateICmpULE(neededItems, availFromBase);
     1058            Value * copyItems1 = kb->CreateSelect(allAvail, neededItems, availFromBase);
     1059            mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, inputPtr, copyItems1);
     1060            BasicBlock * copyRemaining = kb->CreateBasicBlock("copyRemaining");
     1061            BasicBlock * copyDone = kb->CreateBasicBlock("copyDone");
     1062            kb->CreateCondBr(allAvail, copyDone, copyRemaining);
     1063            kb->SetInsertPoint(copyRemaining);
    10541064            Value * copyItems2 = kb->CreateSub(neededItems, copyItems1);
    1055             mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, inputPtr, copyItems1);
     1065            Value * nextBasePos = kb->CreateAdd(blockBasePos, copyItems1);
     1066            Value * nextInputPtr = kb->CreatePointerCast(kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), nextBasePos), bufPtrType);
    10561067            Value * nextBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
    1057             mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), nextBufPtr, kb->getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
     1068            mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), nextBufPtr, nextInputPtr, copyItems2);
     1069            kb->CreateBr(copyDone);
     1070            kb->SetInsertPoint(copyDone);
    10581071        }
    10591072        Value * itemAddress = kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), processedItemCount[i]);
     
    10901103        Value * copyItems = kb->CreateSub(finalItems, blockBasePos[i]);
    10911104        // Round up to exact multiple of block size.
    1092         copyItems = kb->CreateAnd(kb->CreateAdd(copyItems, kb->getSize(bitBlockWidth - 1)), blockBaseMask);
     1105        //copyItems = kb->CreateAnd(kb->CreateAdd(copyItems, kb->getSize(bitBlockWidth - 1)), blockBaseMask);
    10931106        Value * writableFromBase = kb->getLinearlyWritableItems(mStreamSetOutputs[i].name, blockBasePos[i]); // must be a whole number of blocks.
    1094         Value * copyItems1 = kb->CreateSelect(kb->CreateICmpULT(copyItems, writableFromBase), copyItems, writableFromBase);
    1095         Value * copyBlocks1 = kb->CreateUDiv(copyItems1, blockSize);
    1096         mStreamSetOutputBuffers[i]->createBlockCopy(kb.get(), outputBlockPtr[i], tempBufPtr, copyBlocks1);
     1107        Value * allWritable = kb->CreateICmpULE(copyItems, writableFromBase);
     1108        Value * copyItems1 = kb->CreateSelect(allWritable, copyItems, writableFromBase);
     1109        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), outputBlockPtr[i], tempBufPtr, copyItems1);
     1110        BasicBlock * copyRemaining = kb->CreateBasicBlock("copyRemaining");
     1111        BasicBlock * copyDone = kb->CreateBasicBlock("copyDone");
     1112        kb->CreateCondBr(allWritable, copyDone, copyRemaining);
     1113        kb->SetInsertPoint(copyRemaining);
    10971114        Value * copyItems2 = kb->CreateSub(copyItems, copyItems1);
    1098         tempBufPtr = kb->CreateGEP(tempBufPtr, copyBlocks1);
    1099         Value * outputBaseBlockPtr = kb->CreateGEP(kb->getBaseAddress(mStreamSetOutputs[i].name), kb->getInt32(0));
    1100         mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), outputBaseBlockPtr, tempBufPtr, kb->CreateUDiv(copyItems2, blockSize));
     1115        Value * nextBasePos = kb->CreateAdd(blockBasePos[i], copyItems1);
     1116        Type * bufPtrType = mStreamSetOutputBuffers[i]->getPointerType();
     1117        Value * nextOutputPtr = kb->CreatePointerCast(kb->getRawOutputPointer(mStreamSetOutputs[i].name, kb->getInt32(0), nextBasePos), bufPtrType);
     1118        tempBufPtr = kb->CreateGEP(tempBufPtr, kb->CreateUDiv(copyItems1, blockSize));
     1119        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), nextOutputPtr, tempBufPtr, copyItems2);
     1120        kb->CreateBr(copyDone);
     1121        kb->SetInsertPoint(copyDone);
    11011122    }
    11021123
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.cpp

    r5501 r5503  
    6464    size->addIncoming(pageSize, emptyFile);
    6565    size->addIncoming(fileSize, nonEmptyFile);
    66 
    6766    kb->setBaseAddress("sourceBuffer", buffer);
    6867    kb->setBufferedSize("sourceBuffer", size);
    6968    kb->setScalarField("readableBuffer", buffer);
    7069    kb->setScalarField("fileSize", fileSize);
     70    kb->setCapacity("sourceBuffer", fileSize);
    7171    kb->CreateMAdvise(buffer, fileSize, CBuilder::ADVICE_WILLNEED);
    7272
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5501 r5503  
    179179        Value * copyBits = iBuilder->CreateMul(itemsToCopy, iBuilder->getSize(fieldWidth));
    180180        Value * copyBytes = iBuilder->CreateLShr(iBuilder->CreateAdd(copyBits, iBuilder->getSize(7)), iBuilder->getSize(3));
     181       
    181182        iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, int8PtrTy), iBuilder->CreateBitCast(sourceBlockPtr, int8PtrTy), copyBytes, alignment);
    182183    } else {
     
    222223void SourceBuffer::setBaseAddress(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * addr) const {
    223224    Value * const ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(int(SourceBuffer::Field::BaseAddress))});
     225
    224226    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
    225227}
     
    239241Value * SourceBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromPosition) const {
    240242    return iBuilder->CreateSub(getCapacity(iBuilder, self), fromPosition);
     243}
     244
     245Value * SourceBuffer::getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, Value * self, Value * fromBlock) const {
     246    return iBuilder->CreateSub(iBuilder->CreateUDiv(getCapacity(iBuilder, self), iBuilder->getSize(iBuilder->getBitBlockWidth())), fromBlock);
    241247}
    242248
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5501 r5503  
    156156   
    157157    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * fromPosition) const override;
     158   
     159    llvm::Value * getLinearlyAccessibleBlocks(IDISA::IDISA_Builder * const iBuilder, llvm::Value * self, llvm::Value * fromBlock) const override;
     160
    158161
    159162protected:
Note: See TracChangeset for help on using the changeset viewer.