source: icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp @ 5816

Last change on this file since 5816 was 5793, checked in by nmedfort, 21 months ago

Bug fix for pipeline: it was terminating too early when there was insufficient output space to process all of the input for a kernel.

File size: 8.6 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "lz4_bytestream_decoder.h"
8#include <kernels/kernel_builder.h>
9
10using namespace llvm;
11using namespace kernel;
12
13Value * LZ4ByteStreamDecoderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, Value * numOfStrides) {
14
15    BasicBlock * entry_block = b->GetInsertBlock();
16    BasicBlock * loopBody = b->CreateBasicBlock("bytestream_block_loop_body");
17    BasicBlock * loopExit = b->CreateBasicBlock("bytestream_block_loop_exit");
18    Type * const i32PtrTy = b->getInt32Ty()->getPointerTo();
19    Type * const sizeTy = b->getSizeTy();
20    assert (mBufferSize > 0);
21    Value * bufferSize = b->getSize(mBufferSize);
22    Value * bufferSizeMask = b->getSize(mBufferSize - 1);
23    Value * const iterations = b->getAvailableItemCount("literalIndexes");
24    Value * const inputBufferBasePtr = b->getRawInputPointer("inputStream", b->getInt32(0));
25    Value * const outputBufferBasePtr = b->getRawOutputPointer("outputStream", b->getInt32(0));
26    Value * baseLiteralStartPtr = b->getInputStreamBlockPtr("literalIndexes", b->getSize(0));
27    baseLiteralStartPtr = b->CreatePointerCast(baseLiteralStartPtr, i32PtrTy);
28    Value * baseLiteralLengthPtr = b->getInputStreamBlockPtr("literalIndexes", b->getSize(1));
29    baseLiteralLengthPtr = b->CreatePointerCast(baseLiteralLengthPtr, i32PtrTy);
30    Value * baseMatchOffsetPtr = b->getInputStreamBlockPtr("matchIndexes", b->getSize(0));
31    baseMatchOffsetPtr = b->CreatePointerCast(baseMatchOffsetPtr, i32PtrTy);
32    Value * baseMatchLengthPtr = b->getInputStreamBlockPtr("matchIndexes", b->getSize(1));
33    baseMatchLengthPtr = b->CreatePointerCast(baseMatchLengthPtr, i32PtrTy);
34    b->CreateBr(loopBody);
35
36    b->SetInsertPoint(loopBody);
37    PHINode * phiInputIndex = b->CreatePHI(sizeTy, 2, "inputIndex");
38    phiInputIndex->addIncoming(b->getSize(0), entry_block);
39
40    // =================================================
41    // Indexes extraction.
42
43
44    Value * literalStartPtr = b->CreateGEP(baseLiteralStartPtr, phiInputIndex);
45    Value * literalLengthPtr = b->CreateGEP(baseLiteralLengthPtr, phiInputIndex);
46    Value * matchOffsetPtr = b->CreateGEP(baseMatchOffsetPtr, phiInputIndex);
47    Value * matchLengthPtr = b->CreateGEP(baseMatchLengthPtr, phiInputIndex);
48
49    Value * literalStart = b->CreateZExt(b->CreateLoad(literalStartPtr), sizeTy);
50    Value * literalLength = b->CreateZExt(b->CreateLoad(literalLengthPtr), sizeTy);
51    Value * matchOffset = b->CreateZExt(b->CreateLoad(matchOffsetPtr), sizeTy);
52    Value * matchLength = b->CreateZExt(b->CreateLoad(matchLengthPtr), sizeTy);
53
54    // =================================================
55    // Literals.
56    Value * outputItems = b->getProducedItemCount("outputStream");
57    Value * bufferOffset = b->CreateAnd(outputItems, bufferSizeMask);
58    Value * remainingBuffer = b->CreateSub(bufferSize, bufferOffset);
59    Value * copyLength1 = b->CreateUMin(remainingBuffer, literalLength);
60    b->CreateMemCpy(
61            b->CreateGEP(outputBufferBasePtr, bufferOffset),
62            b->CreateGEP(inputBufferBasePtr, literalStart),
63            copyLength1, 1);    // no alignment guaranteed
64    // Potential wrap around.
65    b->CreateMemCpy(
66            outputBufferBasePtr,
67            b->CreateGEP(inputBufferBasePtr, b->CreateAdd(literalStart, copyLength1)),
68            b->CreateSub(literalLength, copyLength1), 1); // Buffer start is aligned.
69    // NOTE: Test case reported non-8-byte alignment
70    outputItems = b->CreateAdd(outputItems, literalLength);
71
72    // =================================================
73    // Match copy.
74    // Conceptually, copy [cur-matchOffset, cur-matchOffset+matchLength] to
75    // [cur, cur+matchLength] sequentially, with two ranges potentially overlapping.
76    // If matchOffset is larger than 4, we copy 4 bytes at a time; otherwise, one byte a time.
77    Value * matchStart = b->CreateSub(outputItems, matchOffset);
78    Value * baseSrcOffset = b->CreateAnd(matchStart, bufferSizeMask);
79    Value * baseDstOffset = b->CreateAnd(outputItems, bufferSizeMask);
80    Value * const copyStep = b->CreateSelect(
81            b->CreateICmpULT(matchOffset, b->getSize(4)),
82            b->getSize(1),
83            b->getSize(4));
84    BasicBlock * cpyLoopCond = b->CreateBasicBlock("matchcopy_loop_cond");
85    BasicBlock * cpyLoopBody = b->CreateBasicBlock("matchcopy_loop_body");
86    BasicBlock * cpyLoopExit = b->CreateBasicBlock("matchcopy_loop_exit");
87    b->CreateBr(cpyLoopCond);
88
89    b->SetInsertPoint(cpyLoopCond);
90    PHINode * phiSrcOffset = b->CreatePHI(sizeTy, 3, "srcOffset");
91    PHINode * phiDstOffset = b->CreatePHI(sizeTy, 3, "dstOffset");
92    PHINode * phiIter = b->CreatePHI(sizeTy, 3, "iterator");
93    phiSrcOffset->addIncoming(baseSrcOffset, loopBody);
94    phiDstOffset->addIncoming(baseDstOffset, loopBody);
95    phiIter->addIncoming(b->getSize(0), loopBody);
96    b->CreateCondBr(
97            b->CreateICmpUGE(phiIter, matchLength),
98            cpyLoopExit,
99            cpyLoopBody
100            );
101
102    b->SetInsertPoint(cpyLoopBody);
103//#ifndef NDEBUG
104//    iBuilder->CallPrintIntToStderr("srcOffset", phiSrcOffset);
105//    iBuilder->CallPrintIntToStderr("dstOffset", phiDstOffset);
106//#endif
107    BasicBlock * reachingBufferEnd_then = b->CreateBasicBlock("matchcopy_reaching_buf_end_then");
108    BasicBlock * reachingBufferEnd_else = b->CreateBasicBlock("matchcopy_reaching_buf_end_else");
109    Value * distSrcEnd = b->CreateSub(bufferSize, phiSrcOffset);
110    Value * distDstEnd = b->CreateSub(bufferSize, phiDstOffset);
111    Value * minDist = b->CreateUMin(distSrcEnd, distDstEnd);
112    b->CreateUnlikelyCondBr(
113            b->CreateICmpULE(minDist, b->getSize(4)),
114            reachingBufferEnd_then,
115            reachingBufferEnd_else
116            );
117
118    b->SetInsertPoint(reachingBufferEnd_then);
119    Value * src8 = b->CreateGEP(outputBufferBasePtr, phiSrcOffset);
120    Value * dst8 = b->CreateGEP(outputBufferBasePtr, phiDstOffset);
121    b->CreateStore(b->CreateLoad(src8), dst8);
122    Value * newSrcOffset = b->CreateAnd(
123            b->CreateAdd(phiSrcOffset, b->getSize(1)),
124            bufferSizeMask
125            );
126    Value * newDstOffset = b->CreateAnd(
127            b->CreateAdd(phiDstOffset, b->getSize(1)),
128            bufferSizeMask
129            );
130    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_then);
131    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_then);
132    phiIter->addIncoming(b->CreateAdd(phiIter, b->getSize(1)), reachingBufferEnd_then);
133    b->CreateBr(cpyLoopCond);
134
135    b->SetInsertPoint(reachingBufferEnd_else);
136    // Copy 4 bytes at a time (regardless of step length).
137    Value * src32 = b->CreatePointerCast(
138            b->CreateGEP(outputBufferBasePtr, phiSrcOffset),
139            b->getInt32Ty()->getPointerTo());
140    Value * dst32 = b->CreatePointerCast(
141            b->CreateGEP(outputBufferBasePtr, phiDstOffset),
142            b->getInt32Ty()->getPointerTo());
143    // Force unaligned load/store of an int32.
144    b->CreateAlignedStore(b->CreateAlignedLoad(src32, 1), dst32, 1);
145    newSrcOffset = b->CreateAnd(
146            b->CreateAdd(phiSrcOffset, copyStep),
147            bufferSizeMask
148            );
149    newDstOffset = b->CreateAnd(
150            b->CreateAdd(phiDstOffset, copyStep),
151            bufferSizeMask
152            );
153    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_else);
154    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_else);
155    phiIter->addIncoming(b->CreateAdd(phiIter, copyStep), reachingBufferEnd_else);
156    b->CreateBr(cpyLoopCond);
157
158    b->SetInsertPoint(cpyLoopExit);
159    outputItems = b->CreateAdd(outputItems, matchLength);
160    b->setProducedItemCount("outputStream", outputItems);
161
162    Value * newInputIndex = b->CreateAdd(phiInputIndex, b->getSize(1));
163    phiInputIndex->addIncoming(newInputIndex, cpyLoopExit);
164    b->CreateUnlikelyCondBr(
165            b->CreateICmpEQ(newInputIndex, iterations),
166            loopExit,
167            loopBody
168            );
169
170    b->SetInsertPoint(loopExit);
171    return numOfStrides;
172}
173
174
175LZ4ByteStreamDecoderKernel::LZ4ByteStreamDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, size_t bufferSize)
176: MultiBlockKernel("lz4ByteStreamDecoder",
177// Inputs
178{Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes"},
179 Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes"},
180 Binding{iBuilder->getStreamSetTy(1, 8), "inputStream", FixedRate(), { Deferred(), Misaligned(), LookBehind(65536) }}},
181// Outputs
182{Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
183// Arguments
184{},
185{},
186{})
187, mBufferSize(bufferSize) {
188
189}
190
191
Note: See TracBrowser for help on using the repository browser.