Ignore:
Timestamp:
Dec 3, 2017, 12:40:40 PM (20 months ago)
Author:
nmedfort
Message:

Bug fixes and simplified MultiBlockKernel? logic

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp

    r5706 r5755  
    1111using namespace kernel;
    1212
    13 Value * getInputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
    14     return iBuilder->CreateGEP(
    15             iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
    16             offset
    17             );
    18 }
     13Value * LZ4ByteStreamDecoderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, Value * numOfStrides) {
    1914
    20 Value * selectMin(const std::unique_ptr<KernelBuilder> & iBuilder, Value * a, Value * b) {
    21     return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
    22 }
     15    BasicBlock * entry_block = b->GetInsertBlock();
     16    BasicBlock * loopBody = b->CreateBasicBlock("bytestream_block_loop_body");
     17    BasicBlock * loopExit = b->CreateBasicBlock("bytestream_block_loop_exit");
     18    Type * const i32PtrTy = b->getInt32Ty()->getPointerTo();
     19    Type * const sizeTy = b->getSizeTy();
     20    assert (mBufferSize > 0);
     21    Value * bufferSize = b->getSize(mBufferSize);
     22    Value * bufferSizeMask = b->getSize(mBufferSize - 1);
     23    Value * const iterations = b->getAvailableItemCount("literalIndexes");
     24    Value * const inputBufferBasePtr = b->getRawInputPointer("inputStream", b->getInt32(0));
     25    Value * const outputBufferBasePtr = b->getRawOutputPointer("outputStream", b->getInt32(0));
     26    Value * baseLiteralStartPtr = b->getInputStreamBlockPtr("literalIndexes", b->getSize(0));
     27    baseLiteralStartPtr = b->CreatePointerCast(baseLiteralStartPtr, i32PtrTy);
     28    Value * baseLiteralLengthPtr = b->getInputStreamBlockPtr("literalIndexes", b->getSize(1));
     29    baseLiteralLengthPtr = b->CreatePointerCast(baseLiteralLengthPtr, i32PtrTy);
     30    Value * baseMatchOffsetPtr = b->getInputStreamBlockPtr("matchIndexes", b->getSize(0));
     31    baseMatchOffsetPtr = b->CreatePointerCast(baseMatchOffsetPtr, i32PtrTy);
     32    Value * baseMatchLengthPtr = b->getInputStreamBlockPtr("matchIndexes", b->getSize(1));
     33    baseMatchLengthPtr = b->CreatePointerCast(baseMatchLengthPtr, i32PtrTy);
     34    b->CreateBr(loopBody);
    2335
    24 void LZ4ByteStreamDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    25     BasicBlock * entry_block = iBuilder->GetInsertBlock();
    26     BasicBlock * loopBody = iBuilder->CreateBasicBlock("bytestream_block_loop_body");
    27     BasicBlock * loopExit = iBuilder->CreateBasicBlock("bytestream_block_loop_exit");
    28 
    29     Value * bufferSize = iBuilder->getSize(mBufferSize);
    30     Value * bufferSizeMask = iBuilder->CreateSub(bufferSize, iBuilder->getSize(1));
    31     Value * iterations = selectMin(iBuilder,
    32             iBuilder->getSize(iBuilder->getBitBlockWidth()),
    33             iBuilder->CreateSub(iBuilder->getAvailableItemCount("literalIndexes"), iBuilder->getProcessedItemCount("literalIndexes")));
    34     Value * inputBufferBasePtr = iBuilder->getRawInputPointer("inputStream", iBuilder->getSize(0));
    35     Value * outputBufferBasePtr = iBuilder->getRawOutputPointer("outputStream", iBuilder->getSize(0));
    36     iBuilder->CreateBr(loopBody);
    37 
    38     iBuilder->SetInsertPoint(loopBody);
    39     PHINode * phiInputIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "inputIndex");
    40     phiInputIndex->addIncoming(iBuilder->getSize(0), entry_block);
     36    b->SetInsertPoint(loopBody);
     37    PHINode * phiInputIndex = b->CreatePHI(sizeTy, 2, "inputIndex");
     38    phiInputIndex->addIncoming(b->getSize(0), entry_block);
    4139
    4240    // =================================================
    4341    // Indexes extraction.
    44     Value * literalStartPtr = getInputPtr(iBuilder,
    45             iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(0)), phiInputIndex);
    46     Value * literalLengthPtr = getInputPtr(iBuilder,
    47             iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(1)), phiInputIndex);
    48     Value * matchOffsetPtr = getInputPtr(iBuilder,
    49             iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(0)), phiInputIndex);
    50     Value * matchLengthPtr = getInputPtr(iBuilder,
    51             iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(1)), phiInputIndex);
    52     Value * literalStart = iBuilder->CreateZExt(iBuilder->CreateLoad(literalStartPtr), iBuilder->getSizeTy());
    53     Value * literalLength = iBuilder->CreateZExt(iBuilder->CreateLoad(literalLengthPtr), iBuilder->getSizeTy());
    54     Value * matchOffset = iBuilder->CreateZExt(iBuilder->CreateLoad(matchOffsetPtr), iBuilder->getSizeTy());
    55     Value * matchLength = iBuilder->CreateZExt(iBuilder->CreateLoad(matchLengthPtr), iBuilder->getSizeTy());
    5642
    57 //    iBuilder->CallPrintInt(" ----- literalStart", literalStart);
    58 //    iBuilder->CallPrintInt(" ----- literalLength", literalLength);
    59 //    iBuilder->CallPrintInt(" ----- matchOffset", matchOffset);
    60 //    iBuilder->CallPrintInt(" ----- matchLength", matchLength);
    6143
    62 //#if 0
    63 //    Value * processedItem = iBuilder->CreateAdd(iBuilder->getProcessedItemCount("literalIndexes"), phiInputIndex);
    64 //    iBuilder->CallPrintInt("ProccessedItem", processedItem);
    65 //    iBuilder->CallPrintInt("LiteralStart", literalStart);
    66 //    iBuilder->CallPrintInt("LiteralLength", literalLength);
    67 //    iBuilder->CallPrintInt("MatchOffset", matchOffset);
    68 //    iBuilder->CallPrintInt("MatchLength", matchLength);
    69 //#endif
     44    Value * literalStartPtr = b->CreateGEP(baseLiteralStartPtr, phiInputIndex);
     45    Value * literalLengthPtr = b->CreateGEP(baseLiteralLengthPtr, phiInputIndex);
     46    Value * matchOffsetPtr = b->CreateGEP(baseMatchOffsetPtr, phiInputIndex);
     47    Value * matchLengthPtr = b->CreateGEP(baseMatchLengthPtr, phiInputIndex);
     48
     49    Value * literalStart = b->CreateZExt(b->CreateLoad(literalStartPtr), sizeTy);
     50    Value * literalLength = b->CreateZExt(b->CreateLoad(literalLengthPtr), sizeTy);
     51    Value * matchOffset = b->CreateZExt(b->CreateLoad(matchOffsetPtr), sizeTy);
     52    Value * matchLength = b->CreateZExt(b->CreateLoad(matchLengthPtr), sizeTy);
    7053
    7154    // =================================================
    7255    // Literals.
    73     Value * outputItems = iBuilder->getProducedItemCount("outputStream");
    74     Value * bufferOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
    75     Value * remainingBuffer = iBuilder->CreateSub(bufferSize, bufferOffset);
    76     Value * copyLength1 = selectMin(iBuilder, remainingBuffer, literalLength);
    77     iBuilder->CreateMemCpy(
    78             iBuilder->CreateGEP(outputBufferBasePtr, bufferOffset),
    79             iBuilder->CreateGEP(inputBufferBasePtr, literalStart),
     56    Value * outputItems = b->getProducedItemCount("outputStream");
     57    Value * bufferOffset = b->CreateAnd(outputItems, bufferSizeMask);
     58    Value * remainingBuffer = b->CreateSub(bufferSize, bufferOffset);
     59    Value * copyLength1 = b->CreateUMin(remainingBuffer, literalLength);
     60    b->CreateMemCpy(
     61            b->CreateGEP(outputBufferBasePtr, bufferOffset),
     62            b->CreateGEP(inputBufferBasePtr, literalStart),
    8063            copyLength1, 1);    // no alignment guaranteed
    8164    // Potential wrap around.
    82     iBuilder->CreateMemCpy(
     65    b->CreateMemCpy(
    8366            outputBufferBasePtr,
    84             iBuilder->CreateGEP(inputBufferBasePtr, iBuilder->CreateAdd(literalStart, copyLength1)),
    85             iBuilder->CreateSub(literalLength, copyLength1), 1); // Buffer start is aligned.
     67            b->CreateGEP(inputBufferBasePtr, b->CreateAdd(literalStart, copyLength1)),
     68            b->CreateSub(literalLength, copyLength1), 1); // Buffer start is aligned.
    8669    // NOTE: Test case reported non-8-byte alignment
    87     outputItems = iBuilder->CreateAdd(outputItems, literalLength);
     70    outputItems = b->CreateAdd(outputItems, literalLength);
    8871
    8972    // =================================================
     
    9275    // [cur, cur+matchLength] sequentially, with two ranges potentially overlapping.
    9376    // If matchOffset is larger than 4, we copy 4 bytes at a time; otherwise, one byte a time.
    94     Value * matchStart = iBuilder->CreateSub(outputItems, matchOffset);
    95     Value * baseSrcOffset = iBuilder->CreateAnd(matchStart, bufferSizeMask);
    96     Value * baseDstOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
    97     Value * copyStep = iBuilder->CreateSelect(
    98             iBuilder->CreateICmpULT(matchOffset, iBuilder->getSize(4)),
    99             iBuilder->getSize(1),
    100             iBuilder->getSize(4)
    101             );
    102     BasicBlock * cpyLoopCond = iBuilder->CreateBasicBlock("matchcopy_loop_cond");
    103     BasicBlock * cpyLoopBody = iBuilder->CreateBasicBlock("matchcopy_loop_body");
    104     BasicBlock * cpyLoopExit = iBuilder->CreateBasicBlock("matchcopy_loop_exit");
    105     iBuilder->CreateBr(cpyLoopCond);
     77    Value * matchStart = b->CreateSub(outputItems, matchOffset);
     78    Value * baseSrcOffset = b->CreateAnd(matchStart, bufferSizeMask);
     79    Value * baseDstOffset = b->CreateAnd(outputItems, bufferSizeMask);
     80    Value * const copyStep = b->CreateSelect(
     81            b->CreateICmpULT(matchOffset, b->getSize(4)),
     82            b->getSize(1),
     83            b->getSize(4));
     84    BasicBlock * cpyLoopCond = b->CreateBasicBlock("matchcopy_loop_cond");
     85    BasicBlock * cpyLoopBody = b->CreateBasicBlock("matchcopy_loop_body");
     86    BasicBlock * cpyLoopExit = b->CreateBasicBlock("matchcopy_loop_exit");
     87    b->CreateBr(cpyLoopCond);
    10688
    107     iBuilder->SetInsertPoint(cpyLoopCond);
    108     PHINode * phiSrcOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "srcOffset");
    109     PHINode * phiDstOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "dstOffset");
    110     PHINode * phiIter = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "iterator");
     89    b->SetInsertPoint(cpyLoopCond);
     90    PHINode * phiSrcOffset = b->CreatePHI(sizeTy, 3, "srcOffset");
     91    PHINode * phiDstOffset = b->CreatePHI(sizeTy, 3, "dstOffset");
     92    PHINode * phiIter = b->CreatePHI(sizeTy, 3, "iterator");
    11193    phiSrcOffset->addIncoming(baseSrcOffset, loopBody);
    11294    phiDstOffset->addIncoming(baseDstOffset, loopBody);
    113     phiIter->addIncoming(iBuilder->getSize(0), loopBody);
    114     iBuilder->CreateCondBr(
    115             iBuilder->CreateICmpUGE(phiIter, matchLength),
     95    phiIter->addIncoming(b->getSize(0), loopBody);
     96    b->CreateCondBr(
     97            b->CreateICmpUGE(phiIter, matchLength),
    11698            cpyLoopExit,
    11799            cpyLoopBody
    118100            );
    119101
    120     iBuilder->SetInsertPoint(cpyLoopBody);
     102    b->SetInsertPoint(cpyLoopBody);
    121103//#ifndef NDEBUG
    122104//    iBuilder->CallPrintIntToStderr("srcOffset", phiSrcOffset);
    123105//    iBuilder->CallPrintIntToStderr("dstOffset", phiDstOffset);
    124106//#endif
    125     BasicBlock * reachingBufferEnd_then = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_then");
    126     BasicBlock * reachingBufferEnd_else = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_else");
    127     Value * distSrcEnd = iBuilder->CreateSub(bufferSize, phiSrcOffset);
    128     Value * distDstEnd = iBuilder->CreateSub(bufferSize, phiDstOffset);
    129     Value * minDist = selectMin(iBuilder, distSrcEnd, distDstEnd);
    130     iBuilder->CreateUnlikelyCondBr(
    131             iBuilder->CreateICmpULE(minDist, iBuilder->getSize(4)),
     107    BasicBlock * reachingBufferEnd_then = b->CreateBasicBlock("matchcopy_reaching_buf_end_then");
     108    BasicBlock * reachingBufferEnd_else = b->CreateBasicBlock("matchcopy_reaching_buf_end_else");
     109    Value * distSrcEnd = b->CreateSub(bufferSize, phiSrcOffset);
     110    Value * distDstEnd = b->CreateSub(bufferSize, phiDstOffset);
     111    Value * minDist = b->CreateUMin(distSrcEnd, distDstEnd);
     112    b->CreateUnlikelyCondBr(
     113            b->CreateICmpULE(minDist, b->getSize(4)),
    132114            reachingBufferEnd_then,
    133115            reachingBufferEnd_else
    134116            );
    135117
    136     iBuilder->SetInsertPoint(reachingBufferEnd_then);
    137     Value * src8 = iBuilder->CreateGEP(outputBufferBasePtr, phiSrcOffset);
    138     Value * dst8 = iBuilder->CreateGEP(outputBufferBasePtr, phiDstOffset);
    139     iBuilder->CreateStore(iBuilder->CreateLoad(src8), dst8);
    140     Value * newSrcOffset = iBuilder->CreateAnd(
    141             iBuilder->CreateAdd(phiSrcOffset, iBuilder->getSize(1)),
     118    b->SetInsertPoint(reachingBufferEnd_then);
     119    Value * src8 = b->CreateGEP(outputBufferBasePtr, phiSrcOffset);
     120    Value * dst8 = b->CreateGEP(outputBufferBasePtr, phiDstOffset);
     121    b->CreateStore(b->CreateLoad(src8), dst8);
     122    Value * newSrcOffset = b->CreateAnd(
     123            b->CreateAdd(phiSrcOffset, b->getSize(1)),
    142124            bufferSizeMask
    143125            );
    144     Value * newDstOffset = iBuilder->CreateAnd(
    145             iBuilder->CreateAdd(phiDstOffset, iBuilder->getSize(1)),
     126    Value * newDstOffset = b->CreateAnd(
     127            b->CreateAdd(phiDstOffset, b->getSize(1)),
    146128            bufferSizeMask
    147129            );
    148130    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_then);
    149131    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_then);
    150     phiIter->addIncoming(iBuilder->CreateAdd(phiIter, iBuilder->getSize(1)), reachingBufferEnd_then);
    151     iBuilder->CreateBr(cpyLoopCond);
     132    phiIter->addIncoming(b->CreateAdd(phiIter, b->getSize(1)), reachingBufferEnd_then);
     133    b->CreateBr(cpyLoopCond);
    152134
    153     iBuilder->SetInsertPoint(reachingBufferEnd_else);
     135    b->SetInsertPoint(reachingBufferEnd_else);
    154136    // Copy 4 bytes at a time (regardless of step length).
    155     Value * src32 = iBuilder->CreatePointerCast(
    156             iBuilder->CreateGEP(outputBufferBasePtr, phiSrcOffset),
    157             iBuilder->getInt32Ty()->getPointerTo());
    158     Value * dst32 = iBuilder->CreatePointerCast(
    159             iBuilder->CreateGEP(outputBufferBasePtr, phiDstOffset),
    160             iBuilder->getInt32Ty()->getPointerTo());
     137    Value * src32 = b->CreatePointerCast(
     138            b->CreateGEP(outputBufferBasePtr, phiSrcOffset),
     139            b->getInt32Ty()->getPointerTo());
     140    Value * dst32 = b->CreatePointerCast(
     141            b->CreateGEP(outputBufferBasePtr, phiDstOffset),
     142            b->getInt32Ty()->getPointerTo());
    161143    // Force unaligned load/store of an int32.
    162     iBuilder->CreateAlignedStore(iBuilder->CreateAlignedLoad(src32, 1), dst32, 1);
    163     newSrcOffset = iBuilder->CreateAnd(
    164             iBuilder->CreateAdd(phiSrcOffset, copyStep),
     144    b->CreateAlignedStore(b->CreateAlignedLoad(src32, 1), dst32, 1);
     145    newSrcOffset = b->CreateAnd(
     146            b->CreateAdd(phiSrcOffset, copyStep),
    165147            bufferSizeMask
    166148            );
    167     newDstOffset = iBuilder->CreateAnd(
    168             iBuilder->CreateAdd(phiDstOffset, copyStep),
     149    newDstOffset = b->CreateAnd(
     150            b->CreateAdd(phiDstOffset, copyStep),
    169151            bufferSizeMask
    170152            );
    171153    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_else);
    172154    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_else);
    173     phiIter->addIncoming(iBuilder->CreateAdd(phiIter, copyStep), reachingBufferEnd_else);
    174     iBuilder->CreateBr(cpyLoopCond);
     155    phiIter->addIncoming(b->CreateAdd(phiIter, copyStep), reachingBufferEnd_else);
     156    b->CreateBr(cpyLoopCond);
    175157
    176     iBuilder->SetInsertPoint(cpyLoopExit);
    177     outputItems = iBuilder->CreateAdd(outputItems, matchLength);
    178     iBuilder->setProducedItemCount("outputStream", outputItems);
     158    b->SetInsertPoint(cpyLoopExit);
     159    outputItems = b->CreateAdd(outputItems, matchLength);
     160    b->setProducedItemCount("outputStream", outputItems);
    179161
    180     Value * newInputIndex = iBuilder->CreateAdd(phiInputIndex, iBuilder->getSize(1));
     162    Value * newInputIndex = b->CreateAdd(phiInputIndex, b->getSize(1));
    181163    phiInputIndex->addIncoming(newInputIndex, cpyLoopExit);
    182     iBuilder->CreateUnlikelyCondBr(
    183             iBuilder->CreateICmpEQ(newInputIndex, iterations),
     164    b->CreateUnlikelyCondBr(
     165            b->CreateICmpEQ(newInputIndex, iterations),
    184166            loopExit,
    185167            loopBody
    186168            );
    187169
    188     iBuilder->SetInsertPoint(loopExit);
    189 //#ifndef NDEBUG
    190 //    iBuilder->CallPrintInt("Decompressed bytes", iBuilder->getProducedItemCount("outputStream"));
    191 //#endif
     170    b->SetInsertPoint(loopExit);
     171    return numOfStrides;
    192172}
    193173
    194174
    195175LZ4ByteStreamDecoderKernel::LZ4ByteStreamDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, size_t bufferSize)
    196 : BlockOrientedKernel("lz4ByteStreamDecoder",
     176: MultiBlockKernel("lz4ByteStreamDecoder",
    197177    // Inputs
    198178    {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes"},
    199179     Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes"},
    200      Binding{iBuilder->getStreamSetTy(1, 8), "inputStream", UnknownRate(), LookBehind(65536)}},
     180     Binding{iBuilder->getStreamSetTy(1, 8), "inputStream", FixedRate(), { Deferred(), LookBehind(65536) }}},
    201181    // Outputs
    202182    {Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
Note: See TracChangeset for help on using the changeset viewer.