source: icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp @ 5644

Last change on this file since 5644 was 5644, checked in by cameron, 19 months ago

Rename UCD-scripts directory

File size: 9.8 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "lz4_bytestream_decoder.h"
8#include <kernels/kernel_builder.h>
9#include "lz4_helper.h"
10
11using namespace llvm;
12using namespace kernel;
13
14Value * getInputPtr(const std::unique_ptr<KernelBuilder> & iBuilder, Value * blockStartPtr, Value * offset) {
15    return iBuilder->CreateGEP(
16            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
17            offset
18            );
19}
20
21void LZ4ByteStreamDecoderKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
22    BasicBlock * entry_block = iBuilder->GetInsertBlock();
23    BasicBlock * loopBody = iBuilder->CreateBasicBlock("bytestream_block_loop_body");
24    BasicBlock * loopExit = iBuilder->CreateBasicBlock("bytestream_block_loop_exit");
25
26    Value * bufferSize = iBuilder->getSize(mBufferSize);
27    Value * bufferSizeMask = iBuilder->CreateSub(bufferSize, iBuilder->getSize(1));
28    Value * iterations = selectMin(iBuilder,
29            iBuilder->getSize(iBuilder->getBitBlockWidth()),
30            iBuilder->CreateSub(iBuilder->getAvailableItemCount("literalIndexes"), iBuilder->getProcessedItemCount("literalIndexes")));
31    Value * inputBufferBasePtr = iBuilder->getRawInputPointer("inputStream", iBuilder->getSize(0), iBuilder->getSize(0));
32    Value * outputBufferBasePtr = iBuilder->getRawOutputPointer("outputStream", iBuilder->getSize(0), iBuilder->getSize(0));
33    iBuilder->CreateBr(loopBody);
34
35    iBuilder->SetInsertPoint(loopBody);
36    PHINode * phiInputIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "inputIndex");
37    phiInputIndex->addIncoming(iBuilder->getSize(0), entry_block);
38
39    // =================================================
40    // Indexes extraction.
41    Value * literalStartPtr = getInputPtr(iBuilder,
42            iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(0)), phiInputIndex);
43    Value * literalLengthPtr = getInputPtr(iBuilder,
44            iBuilder->getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(1)), phiInputIndex);
45    Value * matchOffsetPtr = getInputPtr(iBuilder,
46            iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(0)), phiInputIndex);
47    Value * matchLengthPtr = getInputPtr(iBuilder,
48            iBuilder->getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(1)), phiInputIndex);
49    Value * literalStart = iBuilder->CreateZExt(iBuilder->CreateLoad(literalStartPtr), iBuilder->getSizeTy());
50    Value * literalLength = iBuilder->CreateZExt(iBuilder->CreateLoad(literalLengthPtr), iBuilder->getSizeTy());
51    Value * matchOffset = iBuilder->CreateZExt(iBuilder->CreateLoad(matchOffsetPtr), iBuilder->getSizeTy());
52    Value * matchLength = iBuilder->CreateZExt(iBuilder->CreateLoad(matchLengthPtr), iBuilder->getSizeTy());
53
54#if 0
55    Value * processedItem = iBuilder->CreateAdd(iBuilder->getProcessedItemCount("literalIndexes"), phiInputIndex);
56    iBuilder->CallPrintInt("ProccessedItem", processedItem);
57    iBuilder->CallPrintInt("LiteralStart", literalStart);
58    iBuilder->CallPrintInt("LiteralLength", literalLength);
59    iBuilder->CallPrintInt("MatchOffset", matchOffset);
60    iBuilder->CallPrintInt("MatchLength", matchLength);
61#endif
62
63    // =================================================
64    // Literals.
65    Value * outputItems = iBuilder->getProducedItemCount("outputStream");
66    Value * bufferOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
67    Value * remainingBuffer = iBuilder->CreateSub(bufferSize, bufferOffset);
68    Value * copyLength1 = selectMin(iBuilder, remainingBuffer, literalLength);
69    iBuilder->CreateMemCpy(
70            iBuilder->CreateGEP(outputBufferBasePtr, bufferOffset),
71            iBuilder->CreateGEP(inputBufferBasePtr, literalStart),
72            copyLength1, 1);    // no alignment guaranteed
73    // Potential wrap around.
74    iBuilder->CreateMemCpy(
75            outputBufferBasePtr,
76            iBuilder->CreateGEP(inputBufferBasePtr, iBuilder->CreateAdd(literalStart, copyLength1)),
77            iBuilder->CreateSub(literalLength, copyLength1), 8);        // Buffer start is aligned.
78    outputItems = iBuilder->CreateAdd(outputItems, literalLength);
79
80    // =================================================
81    // Match copy.
82    // Conceptually, copy [cur-matchOffset, cur-matchOffset+matchLength] to
83    // [cur, cur+matchLength] sequentially, with two ranges potentially overlapping.
84    // If matchOffset is larger than 4, we copy 4 bytes at a time; otherwise, one byte a time.
85    Value * matchStart = iBuilder->CreateSub(outputItems, matchOffset);
86    Value * baseSrcOffset = iBuilder->CreateAnd(matchStart, bufferSizeMask);
87    Value * baseDstOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
88    Value * copyStep = iBuilder->CreateSelect(
89            iBuilder->CreateICmpULT(matchOffset, iBuilder->getSize(4)),
90            iBuilder->getSize(1),
91            iBuilder->getSize(4)
92            );
93    BasicBlock * cpyLoopCond = iBuilder->CreateBasicBlock("matchcopy_loop_cond");
94    BasicBlock * cpyLoopBody = iBuilder->CreateBasicBlock("matchcopy_loop_body");
95    BasicBlock * cpyLoopExit = iBuilder->CreateBasicBlock("matchcopy_loop_exit");
96    iBuilder->CreateBr(cpyLoopCond);
97
98    iBuilder->SetInsertPoint(cpyLoopCond);
99    PHINode * phiSrcOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "srcOffset");
100    PHINode * phiDstOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "dstOffset");
101    PHINode * phiIter = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "iterator");
102    phiSrcOffset->addIncoming(baseSrcOffset, loopBody);
103    phiDstOffset->addIncoming(baseDstOffset, loopBody);
104    phiIter->addIncoming(iBuilder->getSize(0), loopBody);
105    iBuilder->CreateCondBr(
106            iBuilder->CreateICmpUGE(phiIter, matchLength),
107            cpyLoopExit,
108            cpyLoopBody
109            );
110
111    iBuilder->SetInsertPoint(cpyLoopBody);
112#ifndef NDEBUG
113    iBuilder->CallPrintIntToStderr("srcOffset", phiSrcOffset);
114    iBuilder->CallPrintIntToStderr("dstOffset", phiDstOffset);
115#endif
116    BasicBlock * reachingBufferEnd_then = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_then");
117    BasicBlock * reachingBufferEnd_else = iBuilder->CreateBasicBlock("matchcopy_reaching_buf_end_else");
118    Value * distSrcEnd = iBuilder->CreateSub(bufferSize, phiSrcOffset);
119    Value * distDstEnd = iBuilder->CreateSub(bufferSize, phiDstOffset);
120    Value * minDist = selectMin(iBuilder, distSrcEnd, distDstEnd);
121    iBuilder->CreateUnlikelyCondBr(
122            iBuilder->CreateICmpULE(minDist, iBuilder->getSize(4)),
123            reachingBufferEnd_then,
124            reachingBufferEnd_else
125            );
126
127    iBuilder->SetInsertPoint(reachingBufferEnd_then);
128    Value * src8 = iBuilder->CreateGEP(outputBufferBasePtr, phiSrcOffset);
129    Value * dst8 = iBuilder->CreateGEP(outputBufferBasePtr, phiDstOffset);
130    iBuilder->CreateStore(iBuilder->CreateLoad(src8), dst8);
131    Value * newSrcOffset = iBuilder->CreateAnd(
132            iBuilder->CreateAdd(phiSrcOffset, iBuilder->getSize(1)),
133            bufferSizeMask
134            );
135    Value * newDstOffset = iBuilder->CreateAnd(
136            iBuilder->CreateAdd(phiDstOffset, iBuilder->getSize(1)),
137            bufferSizeMask
138            );
139    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_then);
140    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_then);
141    phiIter->addIncoming(iBuilder->CreateAdd(phiIter, iBuilder->getSize(1)), reachingBufferEnd_then);
142    iBuilder->CreateBr(cpyLoopCond);
143
144    iBuilder->SetInsertPoint(reachingBufferEnd_else);
145    // Copy 4 bytes at a time (regardless of step length).
146    Value * src32 = iBuilder->CreatePointerCast(
147            iBuilder->CreateGEP(outputBufferBasePtr, phiSrcOffset),
148            iBuilder->getInt32Ty()->getPointerTo());
149    Value * dst32 = iBuilder->CreatePointerCast(
150            iBuilder->CreateGEP(outputBufferBasePtr, phiDstOffset),
151            iBuilder->getInt32Ty()->getPointerTo());
152    // Force unaligned load/store of an int32.
153    iBuilder->CreateAlignedStore(iBuilder->CreateAlignedLoad(src32, 1), dst32, 1);
154    newSrcOffset = iBuilder->CreateAnd(
155            iBuilder->CreateAdd(phiSrcOffset, copyStep),
156            bufferSizeMask
157            );
158    newDstOffset = iBuilder->CreateAnd(
159            iBuilder->CreateAdd(phiDstOffset, copyStep),
160            bufferSizeMask
161            );
162    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_else);
163    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_else);
164    phiIter->addIncoming(iBuilder->CreateAdd(phiIter, copyStep), reachingBufferEnd_else);
165    iBuilder->CreateBr(cpyLoopCond);
166
167    iBuilder->SetInsertPoint(cpyLoopExit);
168    outputItems = iBuilder->CreateAdd(outputItems, matchLength);
169    iBuilder->setProducedItemCount("outputStream", outputItems);
170
171    Value * newInputIndex = iBuilder->CreateAdd(phiInputIndex, iBuilder->getSize(1));
172    phiInputIndex->addIncoming(newInputIndex, cpyLoopExit);
173    iBuilder->CreateUnlikelyCondBr(
174            iBuilder->CreateICmpEQ(newInputIndex, iterations),
175            loopExit,
176            loopBody
177            );
178
179    iBuilder->SetInsertPoint(loopExit);
180#ifndef NDEBUG
181    iBuilder->CallPrintInt("Decompressed bytes", iBuilder->getProducedItemCount("outputStream"));
182#endif
183}
184
185
186LZ4ByteStreamDecoderKernel::LZ4ByteStreamDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, size_t bufferSize)
187: BlockOrientedKernel("lz4ByteStreamDecoder",
188    // Inputs
189    {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes"},
190     Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes"},
191     Binding{iBuilder->getStreamSetTy(1, 8), "inputStream", UnknownRate()}},
192    // Outputs
193    {Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
194    // Arguments
195    {},
196    {},
197    {}),
198 mBufferSize(bufferSize) {
199    setNoTerminateAttribute(true);
200}
Note: See TracBrowser for help on using the repository browser.