source: icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp @ 5549

Last change on this file since 5549 was 5440, checked in by nmedfort, 2 years ago

Large refactoring step. Removed IR generation code from Kernel (formally KernelBuilder?) and moved it into the new KernelBuilder? class.

File size: 9.9 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "lz4_bytestream_decoder.h"
8#include <kernels/kernel_builder.h>
9
10using namespace llvm;
11using namespace kernel;
12
13Value * getInputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
14    return iBuilder->CreateGEP(
15            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
16            offset
17            );
18}
19
20Value * selectMin(const std::unique_ptr<KernelBuilder> & iBuilder, Value * a, Value * b) {
21    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
22}
23
24void LZ4ByteStreamDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
25    BasicBlock * entry_block = iBuilder->GetInsertBlock();
26    BasicBlock * loopBody = iBuilder->CreateBasicBlock("bytestream_block_loop_body");
27    BasicBlock * loopExit = iBuilder->CreateBasicBlock("bytestream_block_loop_exit");
28
29    Value * bufferSize = iBuilder->getSize(mBufferSize);
30    Value * bufferSizeMask = iBuilder->CreateSub(bufferSize, iBuilder->getSize(1));
31    Value * iterations = selectMin(iBuilder,
32            iBuilder->getSize(iBuilder->getBitBlockWidth()),
33            iBuilder->CreateSub(iBuilder->getAvailableItemCount("literalIndexes"), iBuilder->getProcessedItemCount("literalIndexes")));
34    Value * inputBufferBasePtr = iBuilder->getRawInputPointer("inputStream", iBuilder->getSize(0), iBuilder->getSize(0));
35    Value * outputBufferBasePtr = iBuilder->getRawOutputPointer("outputStream", iBuilder->getSize(0), iBuilder->getSize(0));
36    iBuilder->CreateBr(loopBody);
37
38    iBuilder->SetInsertPoint(loopBody);
39    PHINode * phiInputIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "inputIndex");
40    phiInputIndex->addIncoming(iBuilder->getSize(0), entry_block);
41
42    // =================================================
43    // Indexes extraction.
44    Value * literalStartPtr = getInputPtr(iBuilder,
45            iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(0)), phiInputIndex);
46    Value * literalLengthPtr = getInputPtr(iBuilder,
47            iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(1)), phiInputIndex);
48    Value * matchOffsetPtr = getInputPtr(iBuilder,
49            iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(0)), phiInputIndex);
50    Value * matchLengthPtr = getInputPtr(iBuilder,
51            iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(1)), phiInputIndex);
52    Value * literalStart = iBuilder->CreateZExt(iBuilder->CreateLoad(literalStartPtr), iBuilder->getSizeTy());
53    Value * literalLength = iBuilder->CreateZExt(iBuilder->CreateLoad(literalLengthPtr), iBuilder->getSizeTy());
54    Value * matchOffset = iBuilder->CreateZExt(iBuilder->CreateLoad(matchOffsetPtr), iBuilder->getSizeTy());
55    Value * matchLength = iBuilder->CreateZExt(iBuilder->CreateLoad(matchLengthPtr), iBuilder->getSizeTy());
56
57#if 0
58    Value * processedItem = iBuilder->CreateAdd(iBuilder->getProcessedItemCount("literalIndexes"), phiInputIndex);
59    iBuilder->CallPrintInt("ProccessedItem", processedItem);
60    iBuilder->CallPrintInt("LiteralStart", literalStart);
61    iBuilder->CallPrintInt("LiteralLength", literalLength);
62    iBuilder->CallPrintInt("MatchOffset", matchOffset);
63    iBuilder->CallPrintInt("MatchLength", matchLength);
64#endif
65
66    // =================================================
67    // Literals.
68    Value * outputItems = iBuilder->getProducedItemCount("outputStream");
69    Value * bufferOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
70    Value * remainingBuffer = iBuilder->CreateSub(bufferSize, bufferOffset);
71    Value * copyLength1 = selectMin(iBuilder, remainingBuffer, literalLength);
72    iBuilder->CreateMemCpy(
73            iBuilder->CreateGEP(outputBufferBasePtr, bufferOffset),
74            iBuilder->CreateGEP(inputBufferBasePtr, literalStart),
75            copyLength1, 1);    // no alignment guaranteed
76    // Potential wrap around.
77    iBuilder->CreateMemCpy(
78            outputBufferBasePtr,
79            iBuilder->CreateGEP(inputBufferBasePtr, iBuilder->CreateAdd(literalStart, copyLength1)),
80            iBuilder->CreateSub(literalLength, copyLength1), 8);        // Buffer start is aligned.
81    outputItems = iBuilder->CreateAdd(outputItems, literalLength);
82
83    // =================================================
84    // Match copy.
85    // Conceptually, copy [cur-matchOffset, cur-matchOffset+matchLength] to
86    // [cur, cur+matchLength] sequentially, with two ranges potentially overlapping.
87    // If matchOffset is larger than 4, we copy 4 bytes at a time; otherwise, one byte a time.
88    Value * matchStart = iBuilder->CreateSub(outputItems, matchOffset);
89    Value * baseSrcOffset = iBuilder->CreateAnd(matchStart, bufferSizeMask);
90    Value * baseDstOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
91    Value * copyStep = iBuilder->CreateSelect(
92            iBuilder->CreateICmpULT(matchOffset, iBuilder->getSize(4)),
93            iBuilder->getSize(1),
94            iBuilder->getSize(4)
95            );
96    BasicBlock * cpyLoopCond = iBuilder->CreateBasicBlock("matchcopy_loop_cond");
97    BasicBlock * cpyLoopBody = iBuilder->CreateBasicBlock("matchcopy_loop_body");
98    BasicBlock * cpyLoopExit = iBuilder->CreateBasicBlock("matchcopy_loop_exit");
99    iBuilder->CreateBr(cpyLoopCond);
100
101    iBuilder->SetInsertPoint(cpyLoopCond);
102    PHINode * phiSrcOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "srcOffset");
103    PHINode * phiDstOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "dstOffset");
104    PHINode * phiIter = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "iterator");
105    phiSrcOffset->addIncoming(baseSrcOffset, loopBody);
106    phiDstOffset->addIncoming(baseDstOffset, loopBody);
107    phiIter->addIncoming(iBuilder->getSize(0), loopBody);
108    iBuilder->CreateCondBr(
109            iBuilder->CreateICmpUGE(phiIter, matchLength),
110            cpyLoopExit,
111            cpyLoopBody
112            );
113
114    iBuilder->SetInsertPoint(cpyLoopBody);
115#ifndef NDEBUG
116    iBuilder->CallPrintIntToStderr("srcOffset", phiSrcOffset);
117    iBuilder->CallPrintIntToStderr("dstOffset", phiDstOffset);
118#endif
119    BasicBlock * reachingBufferEnd_then = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_then");
120    BasicBlock * reachingBufferEnd_else = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_else");
121    Value * distSrcEnd = iBuilder->CreateSub(bufferSize, phiSrcOffset);
122    Value * distDstEnd = iBuilder->CreateSub(bufferSize, phiDstOffset);
123    Value * minDist = selectMin(iBuilder, distSrcEnd, distDstEnd);
124    iBuilder->CreateUnlikelyCondBr(
125            iBuilder->CreateICmpULE(minDist, iBuilder->getSize(4)),
126            reachingBufferEnd_then,
127            reachingBufferEnd_else
128            );
129
130    iBuilder->SetInsertPoint(reachingBufferEnd_then);
131    Value * src8 = iBuilder->CreateGEP(outputBufferBasePtr, phiSrcOffset);
132    Value * dst8 = iBuilder->CreateGEP(outputBufferBasePtr, phiDstOffset);
133    iBuilder->CreateStore(iBuilder->CreateLoad(src8), dst8);
134    Value * newSrcOffset = iBuilder->CreateAnd(
135            iBuilder->CreateAdd(phiSrcOffset, iBuilder->getSize(1)),
136            bufferSizeMask
137            );
138    Value * newDstOffset = iBuilder->CreateAnd(
139            iBuilder->CreateAdd(phiDstOffset, iBuilder->getSize(1)),
140            bufferSizeMask
141            );
142    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_then);
143    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_then);
144    phiIter->addIncoming(iBuilder->CreateAdd(phiIter, iBuilder->getSize(1)), reachingBufferEnd_then);
145    iBuilder->CreateBr(cpyLoopCond);
146
147    iBuilder->SetInsertPoint(reachingBufferEnd_else);
148    // Copy 4 bytes at a time (regardless of step length).
149    Value * src32 = iBuilder->CreatePointerCast(
150            iBuilder->CreateGEP(outputBufferBasePtr, phiSrcOffset),
151            iBuilder->getInt32Ty()->getPointerTo());
152    Value * dst32 = iBuilder->CreatePointerCast(
153            iBuilder->CreateGEP(outputBufferBasePtr, phiDstOffset),
154            iBuilder->getInt32Ty()->getPointerTo());
155    // Force unaligned load/store of an int32.
156    iBuilder->CreateAlignedStore(iBuilder->CreateAlignedLoad(src32, 1), dst32, 1);
157    newSrcOffset = iBuilder->CreateAnd(
158            iBuilder->CreateAdd(phiSrcOffset, copyStep),
159            bufferSizeMask
160            );
161    newDstOffset = iBuilder->CreateAnd(
162            iBuilder->CreateAdd(phiDstOffset, copyStep),
163            bufferSizeMask
164            );
165    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_else);
166    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_else);
167    phiIter->addIncoming(iBuilder->CreateAdd(phiIter, copyStep), reachingBufferEnd_else);
168    iBuilder->CreateBr(cpyLoopCond);
169
170    iBuilder->SetInsertPoint(cpyLoopExit);
171    outputItems = iBuilder->CreateAdd(outputItems, matchLength);
172    iBuilder->setProducedItemCount("outputStream", outputItems);
173
174    Value * newInputIndex = iBuilder->CreateAdd(phiInputIndex, iBuilder->getSize(1));
175    phiInputIndex->addIncoming(newInputIndex, cpyLoopExit);
176    iBuilder->CreateUnlikelyCondBr(
177            iBuilder->CreateICmpEQ(newInputIndex, iterations),
178            loopExit,
179            loopBody
180            );
181
182    iBuilder->SetInsertPoint(loopExit);
183#ifndef NDEBUG
184    iBuilder->CallPrintInt("Decompressed bytes", iBuilder->getProducedItemCount("outputStream"));
185#endif
186}
187
188
189LZ4ByteStreamDecoderKernel::LZ4ByteStreamDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, size_t bufferSize)
190: BlockOrientedKernel("lz4ByteStreamDecoder",
191    // Inputs
192    {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes"},
193     Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes"},
194     Binding{iBuilder->getStreamSetTy(1, 8), "inputStream", UnknownRate()}},
195    // Outputs
196    {Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
197    // Arguments
198    {},
199    {},
200    {}),
201 mBufferSize(bufferSize) {
202    setNoTerminateAttribute(true);
203}
Note: See TracBrowser for help on using the repository browser.