source: icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp @ 5822

Last change on this file since 5822 was 5793, checked in by nmedfort, 18 months ago

Bug fix for pipeline: it was terminating too early when there was insufficient output space to process all of the input for a kernel.

File size: 8.6 KB
RevLine 
[5422]1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "lz4_bytestream_decoder.h"
[5436]8#include <kernels/kernel_builder.h>
[5422]9
10using namespace llvm;
11using namespace kernel;
12
[5755]13Value * LZ4ByteStreamDecoderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, Value * numOfStrides) {
[5422]14
[5755]15    BasicBlock * entry_block = b->GetInsertBlock();
16    BasicBlock * loopBody = b->CreateBasicBlock("bytestream_block_loop_body");
17    BasicBlock * loopExit = b->CreateBasicBlock("bytestream_block_loop_exit");
18    Type * const i32PtrTy = b->getInt32Ty()->getPointerTo();
19    Type * const sizeTy = b->getSizeTy();
20    assert (mBufferSize > 0);
21    Value * bufferSize = b->getSize(mBufferSize);
22    Value * bufferSizeMask = b->getSize(mBufferSize - 1);
23    Value * const iterations = b->getAvailableItemCount("literalIndexes");
24    Value * const inputBufferBasePtr = b->getRawInputPointer("inputStream", b->getInt32(0));
25    Value * const outputBufferBasePtr = b->getRawOutputPointer("outputStream", b->getInt32(0));
26    Value * baseLiteralStartPtr = b->getInputStreamBlockPtr("literalIndexes", b->getSize(0));
27    baseLiteralStartPtr = b->CreatePointerCast(baseLiteralStartPtr, i32PtrTy);
28    Value * baseLiteralLengthPtr = b->getInputStreamBlockPtr("literalIndexes", b->getSize(1));
29    baseLiteralLengthPtr = b->CreatePointerCast(baseLiteralLengthPtr, i32PtrTy);
30    Value * baseMatchOffsetPtr = b->getInputStreamBlockPtr("matchIndexes", b->getSize(0));
31    baseMatchOffsetPtr = b->CreatePointerCast(baseMatchOffsetPtr, i32PtrTy);
32    Value * baseMatchLengthPtr = b->getInputStreamBlockPtr("matchIndexes", b->getSize(1));
33    baseMatchLengthPtr = b->CreatePointerCast(baseMatchLengthPtr, i32PtrTy);
34    b->CreateBr(loopBody);
[5645]35
[5755]36    b->SetInsertPoint(loopBody);
37    PHINode * phiInputIndex = b->CreatePHI(sizeTy, 2, "inputIndex");
38    phiInputIndex->addIncoming(b->getSize(0), entry_block);
[5422]39
40    // =================================================
41    // Indexes extraction.
42
43
[5755]44    Value * literalStartPtr = b->CreateGEP(baseLiteralStartPtr, phiInputIndex);
45    Value * literalLengthPtr = b->CreateGEP(baseLiteralLengthPtr, phiInputIndex);
46    Value * matchOffsetPtr = b->CreateGEP(baseMatchOffsetPtr, phiInputIndex);
47    Value * matchLengthPtr = b->CreateGEP(baseMatchLengthPtr, phiInputIndex);
[5706]48
[5755]49    Value * literalStart = b->CreateZExt(b->CreateLoad(literalStartPtr), sizeTy);
50    Value * literalLength = b->CreateZExt(b->CreateLoad(literalLengthPtr), sizeTy);
51    Value * matchOffset = b->CreateZExt(b->CreateLoad(matchOffsetPtr), sizeTy);
52    Value * matchLength = b->CreateZExt(b->CreateLoad(matchLengthPtr), sizeTy);
53
[5422]54    // =================================================
55    // Literals.
[5755]56    Value * outputItems = b->getProducedItemCount("outputStream");
57    Value * bufferOffset = b->CreateAnd(outputItems, bufferSizeMask);
58    Value * remainingBuffer = b->CreateSub(bufferSize, bufferOffset);
59    Value * copyLength1 = b->CreateUMin(remainingBuffer, literalLength);
60    b->CreateMemCpy(
61            b->CreateGEP(outputBufferBasePtr, bufferOffset),
62            b->CreateGEP(inputBufferBasePtr, literalStart),
[5422]63            copyLength1, 1);    // no alignment guaranteed
64    // Potential wrap around.
[5755]65    b->CreateMemCpy(
[5422]66            outputBufferBasePtr,
[5755]67            b->CreateGEP(inputBufferBasePtr, b->CreateAdd(literalStart, copyLength1)),
68            b->CreateSub(literalLength, copyLength1), 1); // Buffer start is aligned.
[5645]69    // NOTE: Test case reported non-8-byte alignment
[5755]70    outputItems = b->CreateAdd(outputItems, literalLength);
[5422]71
72    // =================================================
73    // Match copy.
74    // Conceptually, copy [cur-matchOffset, cur-matchOffset+matchLength] to
75    // [cur, cur+matchLength] sequentially, with two ranges potentially overlapping.
76    // If matchOffset is larger than 4, we copy 4 bytes at a time; otherwise, one byte a time.
[5755]77    Value * matchStart = b->CreateSub(outputItems, matchOffset);
78    Value * baseSrcOffset = b->CreateAnd(matchStart, bufferSizeMask);
79    Value * baseDstOffset = b->CreateAnd(outputItems, bufferSizeMask);
80    Value * const copyStep = b->CreateSelect(
81            b->CreateICmpULT(matchOffset, b->getSize(4)),
82            b->getSize(1),
83            b->getSize(4));
84    BasicBlock * cpyLoopCond = b->CreateBasicBlock("matchcopy_loop_cond");
85    BasicBlock * cpyLoopBody = b->CreateBasicBlock("matchcopy_loop_body");
86    BasicBlock * cpyLoopExit = b->CreateBasicBlock("matchcopy_loop_exit");
87    b->CreateBr(cpyLoopCond);
[5422]88
[5755]89    b->SetInsertPoint(cpyLoopCond);
90    PHINode * phiSrcOffset = b->CreatePHI(sizeTy, 3, "srcOffset");
91    PHINode * phiDstOffset = b->CreatePHI(sizeTy, 3, "dstOffset");
92    PHINode * phiIter = b->CreatePHI(sizeTy, 3, "iterator");
[5422]93    phiSrcOffset->addIncoming(baseSrcOffset, loopBody);
94    phiDstOffset->addIncoming(baseDstOffset, loopBody);
[5755]95    phiIter->addIncoming(b->getSize(0), loopBody);
96    b->CreateCondBr(
97            b->CreateICmpUGE(phiIter, matchLength),
[5422]98            cpyLoopExit,
99            cpyLoopBody
100            );
101
[5755]102    b->SetInsertPoint(cpyLoopBody);
[5706]103//#ifndef NDEBUG
104//    iBuilder->CallPrintIntToStderr("srcOffset", phiSrcOffset);
105//    iBuilder->CallPrintIntToStderr("dstOffset", phiDstOffset);
106//#endif
[5755]107    BasicBlock * reachingBufferEnd_then = b->CreateBasicBlock("matchcopy_reaching_buf_end_then");
108    BasicBlock * reachingBufferEnd_else = b->CreateBasicBlock("matchcopy_reaching_buf_end_else");
109    Value * distSrcEnd = b->CreateSub(bufferSize, phiSrcOffset);
110    Value * distDstEnd = b->CreateSub(bufferSize, phiDstOffset);
111    Value * minDist = b->CreateUMin(distSrcEnd, distDstEnd);
112    b->CreateUnlikelyCondBr(
113            b->CreateICmpULE(minDist, b->getSize(4)),
[5422]114            reachingBufferEnd_then,
115            reachingBufferEnd_else
116            );
117
[5755]118    b->SetInsertPoint(reachingBufferEnd_then);
119    Value * src8 = b->CreateGEP(outputBufferBasePtr, phiSrcOffset);
120    Value * dst8 = b->CreateGEP(outputBufferBasePtr, phiDstOffset);
121    b->CreateStore(b->CreateLoad(src8), dst8);
122    Value * newSrcOffset = b->CreateAnd(
123            b->CreateAdd(phiSrcOffset, b->getSize(1)),
[5422]124            bufferSizeMask
125            );
[5755]126    Value * newDstOffset = b->CreateAnd(
127            b->CreateAdd(phiDstOffset, b->getSize(1)),
[5422]128            bufferSizeMask
129            );
130    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_then);
131    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_then);
[5755]132    phiIter->addIncoming(b->CreateAdd(phiIter, b->getSize(1)), reachingBufferEnd_then);
133    b->CreateBr(cpyLoopCond);
[5422]134
[5755]135    b->SetInsertPoint(reachingBufferEnd_else);
[5422]136    // Copy 4 bytes at a time (regardless of step length).
[5755]137    Value * src32 = b->CreatePointerCast(
138            b->CreateGEP(outputBufferBasePtr, phiSrcOffset),
139            b->getInt32Ty()->getPointerTo());
140    Value * dst32 = b->CreatePointerCast(
141            b->CreateGEP(outputBufferBasePtr, phiDstOffset),
142            b->getInt32Ty()->getPointerTo());
[5422]143    // Force unaligned load/store of an int32.
[5755]144    b->CreateAlignedStore(b->CreateAlignedLoad(src32, 1), dst32, 1);
145    newSrcOffset = b->CreateAnd(
146            b->CreateAdd(phiSrcOffset, copyStep),
[5422]147            bufferSizeMask
148            );
[5755]149    newDstOffset = b->CreateAnd(
150            b->CreateAdd(phiDstOffset, copyStep),
[5422]151            bufferSizeMask
152            );
153    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_else);
154    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_else);
[5755]155    phiIter->addIncoming(b->CreateAdd(phiIter, copyStep), reachingBufferEnd_else);
156    b->CreateBr(cpyLoopCond);
[5422]157
[5755]158    b->SetInsertPoint(cpyLoopExit);
159    outputItems = b->CreateAdd(outputItems, matchLength);
160    b->setProducedItemCount("outputStream", outputItems);
[5422]161
[5755]162    Value * newInputIndex = b->CreateAdd(phiInputIndex, b->getSize(1));
[5422]163    phiInputIndex->addIncoming(newInputIndex, cpyLoopExit);
[5755]164    b->CreateUnlikelyCondBr(
165            b->CreateICmpEQ(newInputIndex, iterations),
[5422]166            loopExit,
167            loopBody
168            );
169
[5755]170    b->SetInsertPoint(loopExit);
171    return numOfStrides;
[5422]172}
173
174
[5436]175LZ4ByteStreamDecoderKernel::LZ4ByteStreamDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, size_t bufferSize)
[5755]176: MultiBlockKernel("lz4ByteStreamDecoder",
[5793]177// Inputs
178{Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes"},
179 Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes"},
180 Binding{iBuilder->getStreamSetTy(1, 8), "inputStream", FixedRate(), { Deferred(), Misaligned(), LookBehind(65536) }}},
181// Outputs
182{Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
183// Arguments
184{},
185{},
186{})
187, mBufferSize(bufferSize) {
188
[5422]189}
[5793]190
191
Note: See TracBrowser for help on using the repository browser.