source: icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp @ 5706

Last change on this file since 5706 was 5706, checked in by nmedfort, 20 months ago

First stage of MultiBlockKernel? and pipeline restructuring

File size: 10.2 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "lz4_bytestream_decoder.h"
8#include <kernels/kernel_builder.h>
9
10using namespace llvm;
11using namespace kernel;
12
13Value * getInputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
14    return iBuilder->CreateGEP(
15            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
16            offset
17            );
18}
19
20Value * selectMin(const std::unique_ptr<KernelBuilder> & iBuilder, Value * a, Value * b) {
21    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
22}
23
24void LZ4ByteStreamDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
25    BasicBlock * entry_block = iBuilder->GetInsertBlock();
26    BasicBlock * loopBody = iBuilder->CreateBasicBlock("bytestream_block_loop_body");
27    BasicBlock * loopExit = iBuilder->CreateBasicBlock("bytestream_block_loop_exit");
28
29    Value * bufferSize = iBuilder->getSize(mBufferSize);
30    Value * bufferSizeMask = iBuilder->CreateSub(bufferSize, iBuilder->getSize(1));
31    Value * iterations = selectMin(iBuilder,
32            iBuilder->getSize(iBuilder->getBitBlockWidth()),
33            iBuilder->CreateSub(iBuilder->getAvailableItemCount("literalIndexes"), iBuilder->getProcessedItemCount("literalIndexes")));
34    Value * inputBufferBasePtr = iBuilder->getRawInputPointer("inputStream", iBuilder->getSize(0));
35    Value * outputBufferBasePtr = iBuilder->getRawOutputPointer("outputStream", iBuilder->getSize(0));
36    iBuilder->CreateBr(loopBody);
37
38    iBuilder->SetInsertPoint(loopBody);
39    PHINode * phiInputIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "inputIndex");
40    phiInputIndex->addIncoming(iBuilder->getSize(0), entry_block);
41
42    // =================================================
43    // Indexes extraction.
44    Value * literalStartPtr = getInputPtr(iBuilder,
45            iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(0)), phiInputIndex);
46    Value * literalLengthPtr = getInputPtr(iBuilder,
47            iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(1)), phiInputIndex);
48    Value * matchOffsetPtr = getInputPtr(iBuilder,
49            iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(0)), phiInputIndex);
50    Value * matchLengthPtr = getInputPtr(iBuilder,
51            iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(1)), phiInputIndex);
52    Value * literalStart = iBuilder->CreateZExt(iBuilder->CreateLoad(literalStartPtr), iBuilder->getSizeTy());
53    Value * literalLength = iBuilder->CreateZExt(iBuilder->CreateLoad(literalLengthPtr), iBuilder->getSizeTy());
54    Value * matchOffset = iBuilder->CreateZExt(iBuilder->CreateLoad(matchOffsetPtr), iBuilder->getSizeTy());
55    Value * matchLength = iBuilder->CreateZExt(iBuilder->CreateLoad(matchLengthPtr), iBuilder->getSizeTy());
56
57//    iBuilder->CallPrintInt(" ----- literalStart", literalStart);
58//    iBuilder->CallPrintInt(" ----- literalLength", literalLength);
59//    iBuilder->CallPrintInt(" ----- matchOffset", matchOffset);
60//    iBuilder->CallPrintInt(" ----- matchLength", matchLength);
61
62//#if 0
63//    Value * processedItem = iBuilder->CreateAdd(iBuilder->getProcessedItemCount("literalIndexes"), phiInputIndex);
64//    iBuilder->CallPrintInt("ProccessedItem", processedItem);
65//    iBuilder->CallPrintInt("LiteralStart", literalStart);
66//    iBuilder->CallPrintInt("LiteralLength", literalLength);
67//    iBuilder->CallPrintInt("MatchOffset", matchOffset);
68//    iBuilder->CallPrintInt("MatchLength", matchLength);
69//#endif
70
71    // =================================================
72    // Literals.
73    Value * outputItems = iBuilder->getProducedItemCount("outputStream");
74    Value * bufferOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
75    Value * remainingBuffer = iBuilder->CreateSub(bufferSize, bufferOffset);
76    Value * copyLength1 = selectMin(iBuilder, remainingBuffer, literalLength);
77    iBuilder->CreateMemCpy(
78            iBuilder->CreateGEP(outputBufferBasePtr, bufferOffset),
79            iBuilder->CreateGEP(inputBufferBasePtr, literalStart),
80            copyLength1, 1);    // no alignment guaranteed
81    // Potential wrap around.
82    iBuilder->CreateMemCpy(
83            outputBufferBasePtr,
84            iBuilder->CreateGEP(inputBufferBasePtr, iBuilder->CreateAdd(literalStart, copyLength1)),
85            iBuilder->CreateSub(literalLength, copyLength1), 1); // Buffer start is aligned.
86    // NOTE: Test case reported non-8-byte alignment
87    outputItems = iBuilder->CreateAdd(outputItems, literalLength);
88
89    // =================================================
90    // Match copy.
91    // Conceptually, copy [cur-matchOffset, cur-matchOffset+matchLength] to
92    // [cur, cur+matchLength] sequentially, with two ranges potentially overlapping.
93    // If matchOffset is larger than 4, we copy 4 bytes at a time; otherwise, one byte a time.
94    Value * matchStart = iBuilder->CreateSub(outputItems, matchOffset);
95    Value * baseSrcOffset = iBuilder->CreateAnd(matchStart, bufferSizeMask);
96    Value * baseDstOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
97    Value * copyStep = iBuilder->CreateSelect(
98            iBuilder->CreateICmpULT(matchOffset, iBuilder->getSize(4)),
99            iBuilder->getSize(1),
100            iBuilder->getSize(4)
101            );
102    BasicBlock * cpyLoopCond = iBuilder->CreateBasicBlock("matchcopy_loop_cond");
103    BasicBlock * cpyLoopBody = iBuilder->CreateBasicBlock("matchcopy_loop_body");
104    BasicBlock * cpyLoopExit = iBuilder->CreateBasicBlock("matchcopy_loop_exit");
105    iBuilder->CreateBr(cpyLoopCond);
106
107    iBuilder->SetInsertPoint(cpyLoopCond);
108    PHINode * phiSrcOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "srcOffset");
109    PHINode * phiDstOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "dstOffset");
110    PHINode * phiIter = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "iterator");
111    phiSrcOffset->addIncoming(baseSrcOffset, loopBody);
112    phiDstOffset->addIncoming(baseDstOffset, loopBody);
113    phiIter->addIncoming(iBuilder->getSize(0), loopBody);
114    iBuilder->CreateCondBr(
115            iBuilder->CreateICmpUGE(phiIter, matchLength),
116            cpyLoopExit,
117            cpyLoopBody
118            );
119
120    iBuilder->SetInsertPoint(cpyLoopBody);
121//#ifndef NDEBUG
122//    iBuilder->CallPrintIntToStderr("srcOffset", phiSrcOffset);
123//    iBuilder->CallPrintIntToStderr("dstOffset", phiDstOffset);
124//#endif
125    BasicBlock * reachingBufferEnd_then = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_then");
126    BasicBlock * reachingBufferEnd_else = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_else");
127    Value * distSrcEnd = iBuilder->CreateSub(bufferSize, phiSrcOffset);
128    Value * distDstEnd = iBuilder->CreateSub(bufferSize, phiDstOffset);
129    Value * minDist = selectMin(iBuilder, distSrcEnd, distDstEnd);
130    iBuilder->CreateUnlikelyCondBr(
131            iBuilder->CreateICmpULE(minDist, iBuilder->getSize(4)),
132            reachingBufferEnd_then,
133            reachingBufferEnd_else
134            );
135
136    iBuilder->SetInsertPoint(reachingBufferEnd_then);
137    Value * src8 = iBuilder->CreateGEP(outputBufferBasePtr, phiSrcOffset);
138    Value * dst8 = iBuilder->CreateGEP(outputBufferBasePtr, phiDstOffset);
139    iBuilder->CreateStore(iBuilder->CreateLoad(src8), dst8);
140    Value * newSrcOffset = iBuilder->CreateAnd(
141            iBuilder->CreateAdd(phiSrcOffset, iBuilder->getSize(1)),
142            bufferSizeMask
143            );
144    Value * newDstOffset = iBuilder->CreateAnd(
145            iBuilder->CreateAdd(phiDstOffset, iBuilder->getSize(1)),
146            bufferSizeMask
147            );
148    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_then);
149    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_then);
150    phiIter->addIncoming(iBuilder->CreateAdd(phiIter, iBuilder->getSize(1)), reachingBufferEnd_then);
151    iBuilder->CreateBr(cpyLoopCond);
152
153    iBuilder->SetInsertPoint(reachingBufferEnd_else);
154    // Copy 4 bytes at a time (regardless of step length).
155    Value * src32 = iBuilder->CreatePointerCast(
156            iBuilder->CreateGEP(outputBufferBasePtr, phiSrcOffset),
157            iBuilder->getInt32Ty()->getPointerTo());
158    Value * dst32 = iBuilder->CreatePointerCast(
159            iBuilder->CreateGEP(outputBufferBasePtr, phiDstOffset),
160            iBuilder->getInt32Ty()->getPointerTo());
161    // Force unaligned load/store of an int32.
162    iBuilder->CreateAlignedStore(iBuilder->CreateAlignedLoad(src32, 1), dst32, 1);
163    newSrcOffset = iBuilder->CreateAnd(
164            iBuilder->CreateAdd(phiSrcOffset, copyStep),
165            bufferSizeMask
166            );
167    newDstOffset = iBuilder->CreateAnd(
168            iBuilder->CreateAdd(phiDstOffset, copyStep),
169            bufferSizeMask
170            );
171    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_else);
172    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_else);
173    phiIter->addIncoming(iBuilder->CreateAdd(phiIter, copyStep), reachingBufferEnd_else);
174    iBuilder->CreateBr(cpyLoopCond);
175
176    iBuilder->SetInsertPoint(cpyLoopExit);
177    outputItems = iBuilder->CreateAdd(outputItems, matchLength);
178    iBuilder->setProducedItemCount("outputStream", outputItems);
179
180    Value * newInputIndex = iBuilder->CreateAdd(phiInputIndex, iBuilder->getSize(1));
181    phiInputIndex->addIncoming(newInputIndex, cpyLoopExit);
182    iBuilder->CreateUnlikelyCondBr(
183            iBuilder->CreateICmpEQ(newInputIndex, iterations),
184            loopExit,
185            loopBody
186            );
187
188    iBuilder->SetInsertPoint(loopExit);
189//#ifndef NDEBUG
190//    iBuilder->CallPrintInt("Decompressed bytes", iBuilder->getProducedItemCount("outputStream"));
191//#endif
192}
193
194
195LZ4ByteStreamDecoderKernel::LZ4ByteStreamDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, size_t bufferSize)
196: BlockOrientedKernel("lz4ByteStreamDecoder",
197    // Inputs
198    {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes"},
199     Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes"},
200     Binding{iBuilder->getStreamSetTy(1, 8), "inputStream", UnknownRate(), LookBehind(65536)}},
201    // Outputs
202    {Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
203    // Arguments
204    {},
205    {},
206    {}),
207 mBufferSize(bufferSize) {
208    setNoTerminateAttribute(true);
209}
Note: See TracBrowser for help on using the repository browser.