source: icGREP/icgrep-devel/icgrep/kernels/lz4_bytestream_decoder.cpp @ 5435

Last change on this file since 5435 was 5435, checked in by nmedfort, 2 years ago

Continued refactoring work.

File size: 9.7 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include "lz4_bytestream_decoder.h"
8
9using namespace llvm;
10using namespace kernel;
11
12
13namespace {
14
15Value * getInputPtr(IDISA::IDISA_Builder * const iBuilder, Value * blockStartPtr, Value * offset) {
16    return iBuilder->CreateGEP(
17            iBuilder->CreatePointerCast(blockStartPtr, iBuilder->getInt32Ty()->getPointerTo()),
18            offset
19            );
20}
21
22Value * selectMin(IDISA::IDISA_Builder * const iBuilder, Value * a, Value * b) {
23    return iBuilder->CreateSelect(iBuilder->CreateICmpULT(a, b), a, b);
24}
25
26}
27
28
29void LZ4ByteStreamDecoderKernel::generateDoBlockMethod() {
30    BasicBlock * entry_block = iBuilder->GetInsertBlock();
31    BasicBlock * loopBody = CreateBasicBlock("bytestream_block_loop_body");
32    BasicBlock * loopExit = CreateBasicBlock("bytestream_block_loop_exit");
33
34    Value * bufferSize = iBuilder->getSize(mBufferSize);
35    Value * bufferSizeMask = iBuilder->CreateSub(bufferSize, iBuilder->getSize(1));
36    Value * iterations = selectMin(iBuilder,
37            iBuilder->getSize(iBuilder->getBitBlockWidth()),
38            iBuilder->CreateSub(getAvailableItemCount("literalIndexes"), getProcessedItemCount("literalIndexes")));
39    Value * inputBufferBasePtr = getRawInputPointer("inputStream", iBuilder->getSize(0), iBuilder->getSize(0));
40    Value * outputBufferBasePtr = getRawOutputPointer("outputStream", iBuilder->getSize(0), iBuilder->getSize(0));
41    iBuilder->CreateBr(loopBody);
42
43    iBuilder->SetInsertPoint(loopBody);
44    PHINode * phiInputIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2, "inputIndex");
45    phiInputIndex->addIncoming(iBuilder->getSize(0), entry_block);
46
47    // =================================================
48    // Indexes extraction.
49    Value * literalStartPtr = getInputPtr(iBuilder,
50            getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(0)), phiInputIndex);
51    Value * literalLengthPtr = getInputPtr(iBuilder,
52            getInputStreamBlockPtr("literalIndexes", iBuilder->getSize(1)), phiInputIndex);
53    Value * matchOffsetPtr = getInputPtr(iBuilder,
54            getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(0)), phiInputIndex);
55    Value * matchLengthPtr = getInputPtr(iBuilder,
56            getInputStreamBlockPtr("matchIndexes", iBuilder->getSize(1)), phiInputIndex);
57    Value * literalStart = iBuilder->CreateZExt(iBuilder->CreateLoad(literalStartPtr), iBuilder->getSizeTy());
58    Value * literalLength = iBuilder->CreateZExt(iBuilder->CreateLoad(literalLengthPtr), iBuilder->getSizeTy());
59    Value * matchOffset = iBuilder->CreateZExt(iBuilder->CreateLoad(matchOffsetPtr), iBuilder->getSizeTy());
60    Value * matchLength = iBuilder->CreateZExt(iBuilder->CreateLoad(matchLengthPtr), iBuilder->getSizeTy());
61
62#if 0
63    Value * processedItem = iBuilder->CreateAdd(getProcessedItemCount("literalIndexes"), phiInputIndex);
64    iBuilder->CallPrintInt("ProccessedItem", processedItem);
65    iBuilder->CallPrintInt("LiteralStart", literalStart);
66    iBuilder->CallPrintInt("LiteralLength", literalLength);
67    iBuilder->CallPrintInt("MatchOffset", matchOffset);
68    iBuilder->CallPrintInt("MatchLength", matchLength);
69#endif
70
71    // =================================================
72    // Literals.
73    Value * outputItems = getProducedItemCount("outputStream");
74    Value * bufferOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
75    Value * remainingBuffer = iBuilder->CreateSub(bufferSize, bufferOffset);
76    Value * copyLength1 = selectMin(iBuilder, remainingBuffer, literalLength);
77    iBuilder->CreateMemCpy(
78            iBuilder->CreateGEP(outputBufferBasePtr, bufferOffset),
79            iBuilder->CreateGEP(inputBufferBasePtr, literalStart),
80            copyLength1, 1);    // no alignment guaranteed
81    // Potential wrap around.
82    iBuilder->CreateMemCpy(
83            outputBufferBasePtr,
84            iBuilder->CreateGEP(inputBufferBasePtr, iBuilder->CreateAdd(literalStart, copyLength1)),
85            iBuilder->CreateSub(literalLength, copyLength1), 8);        // Buffer start is aligned.
86    outputItems = iBuilder->CreateAdd(outputItems, literalLength);
87
88    // =================================================
89    // Match copy.
90    // Conceptually, copy [cur-matchOffset, cur-matchOffset+matchLength] to
91    // [cur, cur+matchLength] sequentially, with two ranges potentially overlapping.
92    // If matchOffset is larger than 4, we copy 4 bytes at a time; otherwise, one byte a time.
93    Value * matchStart = iBuilder->CreateSub(outputItems, matchOffset);
94    Value * baseSrcOffset = iBuilder->CreateAnd(matchStart, bufferSizeMask);
95    Value * baseDstOffset = iBuilder->CreateAnd(outputItems, bufferSizeMask);
96    Value * copyStep = iBuilder->CreateSelect(
97            iBuilder->CreateICmpULT(matchOffset, iBuilder->getSize(4)),
98            iBuilder->getSize(1),
99            iBuilder->getSize(4)
100            );
101    BasicBlock * cpyLoopCond = CreateBasicBlock("matchcopy_loop_cond");
102    BasicBlock * cpyLoopBody = CreateBasicBlock("matchcopy_loop_body");
103    BasicBlock * cpyLoopExit = CreateBasicBlock("matchcopy_loop_exit");
104    iBuilder->CreateBr(cpyLoopCond);
105
106    iBuilder->SetInsertPoint(cpyLoopCond);
107    PHINode * phiSrcOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "srcOffset");
108    PHINode * phiDstOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "dstOffset");
109    PHINode * phiIter = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3, "iterator");
110    phiSrcOffset->addIncoming(baseSrcOffset, loopBody);
111    phiDstOffset->addIncoming(baseDstOffset, loopBody);
112    phiIter->addIncoming(iBuilder->getSize(0), loopBody);
113    iBuilder->CreateCondBr(
114            iBuilder->CreateICmpUGE(phiIter, matchLength),
115            cpyLoopExit,
116            cpyLoopBody
117            );
118
119    iBuilder->SetInsertPoint(cpyLoopBody);
120#ifndef NDEBUG
121    iBuilder->CallPrintIntToStderr("srcOffset", phiSrcOffset);
122    iBuilder->CallPrintIntToStderr("dstOffset", phiDstOffset);
123#endif
124    BasicBlock * reachingBufferEnd_then = CreateBasicBlock("matchcopy_reaching_buf_end_then");
125    BasicBlock * reachingBufferEnd_else = CreateBasicBlock("matchcopy_reaching_buf_end_else");
126    Value * distSrcEnd = iBuilder->CreateSub(bufferSize, phiSrcOffset);
127    Value * distDstEnd = iBuilder->CreateSub(bufferSize, phiDstOffset);
128    Value * minDist = selectMin(iBuilder, distSrcEnd, distDstEnd);
129    iBuilder->CreateUnlikelyCondBr(
130            iBuilder->CreateICmpULE(minDist, iBuilder->getSize(4)),
131            reachingBufferEnd_then,
132            reachingBufferEnd_else
133            );
134
135    iBuilder->SetInsertPoint(reachingBufferEnd_then);
136    Value * src8 = iBuilder->CreateGEP(outputBufferBasePtr, phiSrcOffset);
137    Value * dst8 = iBuilder->CreateGEP(outputBufferBasePtr, phiDstOffset);
138    iBuilder->CreateStore(iBuilder->CreateLoad(src8), dst8);
139    Value * newSrcOffset = iBuilder->CreateAnd(
140            iBuilder->CreateAdd(phiSrcOffset, iBuilder->getSize(1)),
141            bufferSizeMask
142            );
143    Value * newDstOffset = iBuilder->CreateAnd(
144            iBuilder->CreateAdd(phiDstOffset, iBuilder->getSize(1)),
145            bufferSizeMask
146            );
147    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_then);
148    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_then);
149    phiIter->addIncoming(iBuilder->CreateAdd(phiIter, iBuilder->getSize(1)), reachingBufferEnd_then);
150    iBuilder->CreateBr(cpyLoopCond);
151
152    iBuilder->SetInsertPoint(reachingBufferEnd_else);
153    // Copy 4 bytes at a time (regardless of step length).
154    Value * src32 = iBuilder->CreatePointerCast(
155            iBuilder->CreateGEP(outputBufferBasePtr, phiSrcOffset),
156            iBuilder->getInt32Ty()->getPointerTo());
157    Value * dst32 = iBuilder->CreatePointerCast(
158            iBuilder->CreateGEP(outputBufferBasePtr, phiDstOffset),
159            iBuilder->getInt32Ty()->getPointerTo());
160    // Force unaligned load/store of an int32.
161    iBuilder->CreateAlignedStore(iBuilder->CreateAlignedLoad(src32, 1), dst32, 1);
162    newSrcOffset = iBuilder->CreateAnd(
163            iBuilder->CreateAdd(phiSrcOffset, copyStep),
164            bufferSizeMask
165            );
166    newDstOffset = iBuilder->CreateAnd(
167            iBuilder->CreateAdd(phiDstOffset, copyStep),
168            bufferSizeMask
169            );
170    phiSrcOffset->addIncoming(newSrcOffset, reachingBufferEnd_else);
171    phiDstOffset->addIncoming(newDstOffset, reachingBufferEnd_else);
172    phiIter->addIncoming(iBuilder->CreateAdd(phiIter, copyStep), reachingBufferEnd_else);
173    iBuilder->CreateBr(cpyLoopCond);
174
175    iBuilder->SetInsertPoint(cpyLoopExit);
176    outputItems = iBuilder->CreateAdd(outputItems, matchLength);
177    setProducedItemCount("outputStream", outputItems);
178
179    Value * newInputIndex = iBuilder->CreateAdd(phiInputIndex, iBuilder->getSize(1));
180    phiInputIndex->addIncoming(newInputIndex, cpyLoopExit);
181    iBuilder->CreateUnlikelyCondBr(
182            iBuilder->CreateICmpEQ(newInputIndex, iterations),
183            loopExit,
184            loopBody
185            );
186
187    iBuilder->SetInsertPoint(loopExit);
188#ifndef NDEBUG
189    iBuilder->CallPrintInt("Decompressed bytes", getProducedItemCount("outputStream"));
190#endif
191}
192
193
194LZ4ByteStreamDecoderKernel::LZ4ByteStreamDecoderKernel(const std::unique_ptr<IDISA::IDISA_Builder> & iBuilder, size_t bufferSize)
195: BlockOrientedKernel("lz4ByteStreamDecoder",
196    // Inputs
197    {Binding{iBuilder->getStreamSetTy(2, 32), "literalIndexes"},
198     Binding{iBuilder->getStreamSetTy(2, 32), "matchIndexes"},
199     Binding{iBuilder->getStreamSetTy(1, 8), "inputStream", UnknownRate()}},
200    // Outputs
201    {Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
202    // Arguments
203    {},
204    {},
205    {}),
206 mBufferSize(bufferSize) {
207    setNoTerminateAttribute(true);
208}
Note: See TracBrowser for help on using the repository browser.