Changeset 5451 for icGREP/icgrep-devel


Ignore:
Timestamp:
May 15, 2017, 11:30:27 AM (2 years ago)
Author:
cameron
Message:

Fix off-by-one error in termination position; mask off bits after N in output

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/until_n.cpp

    r5450 r5451  
    4747    BasicBlock * notFoundYet = kb->CreateBasicBlock("notFoundYet");
    4848    BasicBlock * findNth = kb->CreateBasicBlock("findNth");
    49     BasicBlock * getNthBitPosn = kb->CreateBasicBlock("getNthBitPosn");
     49    BasicBlock * getPosnAfterNth = kb->CreateBasicBlock("getPosnAfterNth");
    5050    BasicBlock * nthPosFound = kb->CreateBasicBlock("nthPosFound");
    5151    BasicBlock * doSegmentReturn = kb->CreateBasicBlock("doSegmentReturn");
     
    140140    // set of itemsToDo.
    141141   
    142     Value * finalPosition = kb->CreateAdd(kb->getProducedItemCount("uptoN"), itemsToDo);
    143     kb->setProducedItemCount("uptoN", finalPosition);
     142    Value * finalCount = kb->CreateAdd(kb->getProducedItemCount("uptoN"), itemsToDo);
     143    kb->setProducedItemCount("uptoN", finalCount);
    144144    kb->CreateBr(doSegmentReturn);
    145145
    146146    //
    147     // With the last input scanMask loaded, the count of bits reaches or
    148     // exceeds N.  Find the exact position of the Nth bit in this pack.
     147    // With the last input scanMask loaded, the count of one bits seen reaches or
     148    // exceeds N.  Determine the position immediately after the Nth one bit.
    149149    //
    150150    kb->SetInsertPoint(findNth);
     
    158158    seen1->addIncoming(oneMoreSeen, findNth);
    159159    remainingBits->addIncoming(clearLowest, findNth);
    160     kb->CreateCondBr(kb->CreateICmpULT(oneMoreSeen, N), findNth, getNthBitPosn);
     160    kb->CreateCondBr(kb->CreateICmpULT(oneMoreSeen, N), findNth, getPosnAfterNth);
    161161
    162162    //
    163163    // We have cleared the low bits of scanMask up to and including the Nth in the stream.
    164     kb->SetInsertPoint(getNthBitPosn);
     164    kb->SetInsertPoint(getPosnAfterNth);
    165165    Value * scanMaskUpToN = kb->CreateXor(scanMask, clearLowest);
    166     Value * posnInPack = kb->CreateSub(ConstantInt::get(iPackTy, packSize-1), kb->CreateCountReverseZeroes(scanMaskUpToN));
     166    Value * posnInPack = kb->CreateSub(ConstantInt::get(iPackTy, packSize), kb->CreateCountReverseZeroes(scanMaskUpToN));
    167167    Value * posnInGroup = kb->CreateAdd(kb->CreateMul(nonZeroPack, kb->getSize(packSize)), posnInPack);
    168168    Value * posnInItemsToDo = kb->CreateAdd(kb->CreateMul(blockGroupBase, blockSize), posnInGroup);
     
    173173   
    174174    kb->SetInsertPoint(nthPosFound);
    175     finalPosition = kb->CreateAdd(kb->getProcessedItemCount("bits"), posnInItemsToDo);
    176     kb->setProcessedItemCount("bits", finalPosition);
    177     kb->setProducedItemCount("uptoN", finalPosition);
     175    finalCount = kb->CreateAdd(kb->getProcessedItemCount("bits"), posnInItemsToDo);
     176    Value * finalBlock = kb->CreateUDiv(posnInItemsToDo, blockSize);
     177    blk = kb->CreateBlockAlignedLoad(kb->CreateGEP(sourceBitstream, {finalBlock, kb->getInt32(0)}));
     178    blk = kb->CreateAnd(blk, kb->CreateNot(kb->bitblock_mask_from(kb->CreateURem(posnInItemsToDo, blockSize))));
     179    Value * outputPtr = kb->CreateGEP(uptoN_bitstream, {finalBlock, kb->getInt32(0)});
     180    kb->CreateBlockAlignedStore(blk, outputPtr);
     181    kb->setProcessedItemCount("bits", finalCount);
     182    kb->setProducedItemCount("uptoN", finalCount);
    178183    kb->setTerminationSignal();
    179184    kb->CreateBr(doSegmentReturn);
Note: See TracChangeset for help on using the changeset viewer.