source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_match_copy_kernel.cpp @ 5895

Last change on this file since 5895 was 5895, checked in by xwa163, 12 months ago
  1. Fix match copy kernel in large file for new infrastructure
  2. Enable testing for full LZ4 decode pipeline
File size: 16.9 KB
Line 
1//
2//
3
4#include "lz4_match_copy_kernel.h"
5#include <kernels/kernel_builder.h>
6#include <kernels/streamset.h>
7#include <toolchain/toolchain.h>
8
9#define OUTPUT_BIT_STREAM_NAME "outputStream"
10
11using namespace llvm;
12using namespace kernel;
13using namespace std;
14
15void LZ4MatchCopyKernel::generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder, Value *outputBlocks) {
16
17    Value *SIZE_ZERO = iBuilder->getSize(0);
18    Value *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
19
20    Value *previousProcessed = iBuilder->getProcessedItemCount("decompressedStream");
21
22//    BasicBlock *entryBlock = iBuilder->GetInsertBlock();
23    Value *inputBasePtr = iBuilder->getInputStreamBlockPtr("decompressedStream", SIZE_ZERO);
24
25    Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
26    Value *itemsToDo = mAvailableItemCount[0];
27    Value *copySize = iBuilder->CreateUMin(
28            itemsToDo,
29            iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH)
30    );
31//    iBuilder->CallPrintInt("itemsToDo", itemsToDo);
32//    iBuilder->CallPrintInt("itemsToDo1", mAvailableItemCount[1]);
33//    iBuilder->CallPrintInt("itemsToDo2", mAvailableItemCount[2]);
34//    iBuilder->CallPrintInt("itemsToDo3", mAvailableItemCount[3]);
35//    iBuilder->CallPrintInt("copySize", copySize);
36
37    iBuilder->CreateMemCpy(
38            outputBasePtr,
39            inputBasePtr,
40            copySize,
41            1 // Not align guaranteed in final block
42    );
43//    iBuilder->CallPrintInt("outputCpyPtr", outputBasePtr);
44//    iBuilder->CallPrintInt("outputBlocks", outputBlocks);
45    Value *newProcessed = iBuilder->CreateAdd(previousProcessed, copySize);
46    iBuilder->setProcessedItemCount("decompressedStream", newProcessed);
47    iBuilder->setProducedItemCount(OUTPUT_BIT_STREAM_NAME, newProcessed);
48
49}
50
51Value *LZ4MatchCopyKernel::getMaximumMatchCopyBlock(const unique_ptr<KernelBuilder> &iBuilder) {
52    Value *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
53    Value *SIZE_ZERO = iBuilder->getSize(0);
54    Value *SIZE_ONE = iBuilder->getSize(1);
55    Value *m0EndInitOffset = iBuilder->CreateURem(iBuilder->getProcessedItemCount("m0End"), SIZE_BIT_BLOCK_WIDTH);
56    Value *m0EndItemsToDo = mAvailableItemCount[2];
57    Value *m0EndBasePtr = iBuilder->getInputStreamBlockPtr("m0End", SIZE_ZERO);
58    m0EndBasePtr = iBuilder->CreatePointerCast(m0EndBasePtr, iBuilder->getInt64Ty()->getPointerTo());
59    Value *lastM0 = iBuilder->CreateLoad(
60            iBuilder->CreateGEP(
61                    m0EndBasePtr,
62                    iBuilder->CreateSub(
63                            iBuilder->CreateAdd(m0EndInitOffset, m0EndItemsToDo),
64                            SIZE_ONE
65                    )
66
67            )
68    );
69    Value *lastDepositPosition = iBuilder->CreateAdd(lastM0, SIZE_ONE);
70
71    // TODO maybe we can not use mIsFinal here
72    Value *currentMaxBlock = iBuilder->CreateSelect(
73            this->mIsFinalBlock,
74            iBuilder->CreateUDivCeil(lastDepositPosition, SIZE_BIT_BLOCK_WIDTH),
75            iBuilder->CreateUDiv(lastDepositPosition, SIZE_BIT_BLOCK_WIDTH)
76    );
77
78    // Produced Item Count will always be full bitblock except for final block
79    Value *previousProducedBlocks = iBuilder->CreateUDiv(
80            iBuilder->getProducedItemCount(OUTPUT_BIT_STREAM_NAME),
81            SIZE_BIT_BLOCK_WIDTH
82    );
83
84    // (m0 + 1) / BitBlockWidth - produceItemCount / BitBlockWidth
85    return iBuilder->CreateSub(currentMaxBlock, previousProducedBlocks);
86}
87
88void LZ4MatchCopyKernel::generateMultiBlockLogic(const unique_ptr<KernelBuilder> &iBuilder, Value *const numOfStrides) {
89    // Const
90    Constant *SIZE_ZERO = iBuilder->getSize(0);
91    Constant *SIZE_ONE = iBuilder->getSize(1);
92    Constant *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
93
94    Value *itemsToDo = mAvailableItemCount[0];
95
96    Value *isFinalBlock =
97            iBuilder->CreateOr(
98                    iBuilder->CreateICmpULT(itemsToDo, iBuilder->CreateMul(numOfStrides, SIZE_BIT_BLOCK_WIDTH)),
99                    iBuilder->CreateICmpEQ(itemsToDo, iBuilder->getSize(0))
100            );
101
102    this->mIsFinalBlock = isFinalBlock;
103//    iBuilder->CallPrintInt("isFinalBlock", isFinalBlock);
104    iBuilder->setTerminationSignal(isFinalBlock);
105
106
107
108
109    Value *previousProducedItemCount = iBuilder->getProducedItemCount(OUTPUT_BIT_STREAM_NAME);
110
111
112    // Space Calculation
113    Value *outputBufferBlocks = iBuilder->getSize(
114            this->getAnyStreamSetBuffer(OUTPUT_BIT_STREAM_NAME)->getBufferBlocks());
115    // TODO need to take previous produced size into account
116
117
118    Value *outputRawBeginPtr = iBuilder->CreatePointerCast(
119            iBuilder->getRawOutputPointer(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO),
120            iBuilder->getBitBlockType()->getPointerTo());
121    Value *outputCurrentPtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
122    Value *producedOffset = iBuilder->CreatePtrDiff(outputCurrentPtr, outputRawBeginPtr);
123    Value *remainSpace = iBuilder->CreateSub(outputBufferBlocks, producedOffset);
124    Value *matchCopyWindowBlock = iBuilder->getSize(256 * 256 / codegen::BlockSize);
125    Value *remainWindowBlock = iBuilder->CreateSelect(
126            iBuilder->CreateICmpUGE(producedOffset, matchCopyWindowBlock),
127            iBuilder->getSize(0),
128            iBuilder->CreateSub(matchCopyWindowBlock, producedOffset)
129    );
130    Value *writableBlocks = iBuilder->CreateSub(remainSpace,
131                                                remainWindowBlock); //TODO handle beginning, if producedItemCount / bitblockWidth < windowBlock, there is no need for the substraction here
132//    iBuilder->CallPrintInt("remainSpace", remainSpace);
133//    iBuilder->CallPrintInt("writableBlocks", writableBlocks);
134    Value *outputBlocks = iBuilder->CreateUMin(writableBlocks, numOfStrides);
135    // outputBlock === min(writableBlocks, numOfStrides, (matchOffsetPosition + matchLength - producedItemCount) / bitBlockWidth )
136
137    outputBlocks = iBuilder->CreateUMin(outputBlocks, this->getMaximumMatchCopyBlock(iBuilder));
138
139
140//    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
141
142    // Output Copy
143    this->generateOutputCopy(iBuilder, outputBlocks);
144//    return;
145
146    Value *newProducedItemCount = iBuilder->getProducedItemCount(OUTPUT_BIT_STREAM_NAME);
147
148    BasicBlock *copyEndBlock = iBuilder->CreateBasicBlock("copyEnd");
149    iBuilder->CreateBr(copyEndBlock);
150    iBuilder->SetInsertPoint(copyEndBlock);
151
152    // TODO match Copy
153    BasicBlock *exitBlock = iBuilder->CreateBasicBlock("exit_block");
154
155    Value *initM0StartProcessIndex = iBuilder->getProcessedItemCount("m0Start");
156    Value *totalM0StartItemsCount = iBuilder->CreateAdd(initM0StartProcessIndex, mAvailableItemCount[1]);
157
158    Value *initMatchOffset = iBuilder->getScalarField("pendingMatchOffset");
159    Value *initMatchLength = iBuilder->getScalarField("pendingMatchLength");
160    Value *initMatchPos = iBuilder->getScalarField("pendingMatchPos");
161
162
163    BasicBlock *matchCopyLoopCon = iBuilder->CreateBasicBlock("matchCopyLoopCon");
164    iBuilder->CreateBr(matchCopyLoopCon);
165
166    iBuilder->SetInsertPoint(matchCopyLoopCon);
167
168
169    PHINode *phiProcessIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
170    phiProcessIndex->addIncoming(initM0StartProcessIndex, copyEndBlock);
171
172    PHINode *phiMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
173    phiMatchOffset->addIncoming(initMatchOffset, copyEndBlock);
174
175    PHINode *phiMatchLength = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
176    phiMatchLength->addIncoming(initMatchLength, copyEndBlock);
177
178    PHINode *phiMatchPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
179    phiMatchPos->addIncoming(initMatchPos, copyEndBlock);
180
181    BasicBlock *loadNextMatchInfoConBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoConBlock");
182    BasicBlock *loadNextMatchInfoBodyBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoBodyBlock");
183
184    BasicBlock *matchCopyConBlock = iBuilder->CreateBasicBlock("matchCopyConBlock");
185    BasicBlock *matchCopyBodyBlock = iBuilder->CreateBasicBlock("matchCopyBodyBlock");
186
187
188    iBuilder->CreateCondBr(
189            iBuilder->CreateICmpEQ(phiMatchLength, iBuilder->getSize(0)),
190            loadNextMatchInfoConBlock,
191            matchCopyConBlock
192    );
193
194
195    iBuilder->SetInsertPoint(loadNextMatchInfoConBlock);
196
197    Value *hasMoreMatchInfo = iBuilder->CreateICmpULT(phiProcessIndex, totalM0StartItemsCount);
198    iBuilder->CreateCondBr(hasMoreMatchInfo, loadNextMatchInfoBodyBlock, exitBlock);
199
200    iBuilder->SetInsertPoint(loadNextMatchInfoBodyBlock);
201
202    Value *m0StartBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("m0Start", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
203    Value *m0EndBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("m0End", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
204    Value *matchOffsetBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("matchOffset", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
205
206
207    Value *m0StartBaseOffset = iBuilder->CreateURem(initM0StartProcessIndex, SIZE_BIT_BLOCK_WIDTH);
208//    iBuilder->CallPrintInt("rawPtr", iBuilder->getRawInputPointer("m0Start", SIZE_ZERO));
209//    iBuilder->CallPrintInt("ptr", m0StartBasePtr);
210//    iBuilder->CallPrintInt("initM0StartProcessIndex", initM0StartProcessIndex);
211    Value *m0StartLoadOffset = iBuilder->CreateAdd(m0StartBaseOffset,
212                                                   iBuilder->CreateSub(phiProcessIndex, initM0StartProcessIndex));
213
214    Value *newM0Start = iBuilder->CreateLoad(iBuilder->CreateGEP(m0StartBasePtr, m0StartLoadOffset));
215    Value *newM0End = iBuilder->CreateLoad(iBuilder->CreateGEP(m0EndBasePtr, m0StartLoadOffset));
216    Value *newMatchOffset = iBuilder->CreateLoad(iBuilder->CreateGEP(matchOffsetBasePtr, m0StartLoadOffset));
217
218    Value *depositStart = newM0Start;
219//    iBuilder->CallPrintInt("depositStart", depositStart);
220//    iBuilder->CallPrintInt("newMatchLength", newMatchLength);
221
222    Value *depositEnd = iBuilder->CreateAdd(newM0End, iBuilder->getInt64(1));
223    Value *newMatchLength = iBuilder->CreateSub(depositEnd, depositStart);
224    phiProcessIndex->addIncoming(iBuilder->CreateAdd(phiProcessIndex, SIZE_ONE), iBuilder->GetInsertBlock());
225
226    phiMatchPos->addIncoming(depositStart, iBuilder->GetInsertBlock());
227    phiMatchOffset->addIncoming(newMatchOffset, iBuilder->GetInsertBlock());
228    phiMatchLength->addIncoming(newMatchLength, iBuilder->GetInsertBlock());
229
230    iBuilder->CreateBr(matchCopyLoopCon);
231
232
233    iBuilder->SetInsertPoint(matchCopyConBlock);
234    Value *hasNotReachEnd = iBuilder->CreateICmpULT(phiMatchPos, newProducedItemCount);
235//    iBuilder->CallPrintInt("newProducedItemCount", newProducedItemCount);
236    iBuilder->CreateCondBr(hasNotReachEnd, matchCopyBodyBlock, exitBlock);
237
238    iBuilder->SetInsertPoint(matchCopyBodyBlock);
239    Value* matchCopyFromPos = iBuilder->CreateSub(phiMatchPos, phiMatchOffset);
240    Value* rawOutputBasePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO), iBuilder->getInt8PtrTy());
241//    iBuilder->CallPrintInt("rawOutputBasePtr", rawOutputBasePtr);
242//    iBuilder->CallPrintInt("rawOutputBasePtr1", iBuilder->CreateGEP(
243//            rawOutputBasePtr,
244//            iBuilder->CreateURem(matchCopyFromPos, iBuilder->CreateMul(outputBufferBlocks, SIZE_BIT_BLOCK_WIDTH))
245//    ));
246    Value* matchCopyFromValue = iBuilder->CreateLoad(
247            iBuilder->CreateGEP(
248                    rawOutputBasePtr,
249                    iBuilder->CreateURem(matchCopyFromPos, iBuilder->CreateMul(outputBufferBlocks, SIZE_BIT_BLOCK_WIDTH))
250            ));
251
252    // Output is guranteed to be full bit block except for final block
253    Value* outputBlockBasePtr = iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO), iBuilder->getInt8PtrTy());
254    Value* outputTargetPtr = iBuilder->CreateGEP(outputBlockBasePtr, iBuilder->CreateSub(phiMatchPos, previousProducedItemCount));
255//    iBuilder->CallPrintInt("matchCopyFromValue", matchCopyFromValue);
256//    iBuilder->CallPrintInt("phiMatchPos", phiMatchPos);
257//    iBuilder->CallPrintInt("aa", iBuilder->CreateSub(phiMatchPos, previousProducedItemCount));
258    iBuilder->CreateStore(matchCopyFromValue, outputTargetPtr);
259
260    phiProcessIndex->addIncoming(phiProcessIndex, iBuilder->GetInsertBlock());
261    phiMatchOffset->addIncoming(phiMatchOffset, iBuilder->GetInsertBlock());
262    phiMatchPos->addIncoming(iBuilder->CreateAdd(phiMatchPos, SIZE_ONE), iBuilder->GetInsertBlock());
263    phiMatchLength->addIncoming(iBuilder->CreateSub(phiMatchLength, SIZE_ONE), iBuilder->GetInsertBlock());
264
265    iBuilder->CreateBr(matchCopyLoopCon);
266
267    iBuilder->SetInsertPoint(exitBlock);
268//    iBuilder->CallPrintInt("test", SIZE_ZERO);
269    iBuilder->setScalarField("pendingMatchOffset", phiMatchOffset);
270    iBuilder->setScalarField("pendingMatchLength", phiMatchLength);
271    iBuilder->setScalarField("pendingMatchPos", phiMatchPos);
272//    iBuilder->CallPrintInt("pendingMatchLength", phiMatchLength);
273    iBuilder->setProcessedItemCount("m0Start", phiProcessIndex);
274    iBuilder->setProcessedItemCount("m0End", phiProcessIndex);
275    iBuilder->setProcessedItemCount("matchOffset", phiProcessIndex);
276}
277
278
279void LZ4MatchCopyKernel::generateStoreCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string outputBufferName,
280                                                     Value *offset, Type *pointerType, Value *value) {
281    size_t inputSize = this->getOutputBufferSize(iBuilder, outputBufferName);
282    Value *offsetMask = iBuilder->getSize(inputSize - 1);
283    Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
284
285    Value *outputBufferPtr = iBuilder->getRawOutputPointer(outputBufferName, iBuilder->getSize(0));
286
287    outputBufferPtr = iBuilder->CreatePointerCast(outputBufferPtr, pointerType);
288    iBuilder->CreateStore(value, iBuilder->CreateGEP(outputBufferPtr, maskedOffset));
289}
290
291Value *LZ4MatchCopyKernel::generateLoadCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName,
292                                                      Value *offset, Type *pointerType) {
293    size_t inputSize = this->getOutputBufferSize(iBuilder, inputBufferName);
294    Value *offsetMask = iBuilder->getSize(inputSize - 1);
295    Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
296
297    Value *inputBufferPtr = iBuilder->getRawOutputPointer(inputBufferName, iBuilder->getSize(0));
298
299    inputBufferPtr = iBuilder->CreatePointerCast(inputBufferPtr, pointerType);
300    return iBuilder->CreateLoad(iBuilder->CreateGEP(inputBufferPtr, maskedOffset));
301}
302
303Value *LZ4MatchCopyKernel::generateLoadCircularInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName,
304                                                     Value *offset, Type *pointerType) {
305    size_t inputSize = this->getInputBufferSize(iBuilder, inputBufferName);
306    Value *offsetMask = iBuilder->getSize(inputSize - 1);
307    Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
308
309    Value *inputBufferPtr = iBuilder->getRawInputPointer(inputBufferName, iBuilder->getSize(0));
310
311    inputBufferPtr = iBuilder->CreatePointerCast(inputBufferPtr, pointerType);
312    return iBuilder->CreateLoad(iBuilder->CreateGEP(inputBufferPtr, maskedOffset));
313}
314
315size_t LZ4MatchCopyKernel::getInputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
316    return this->getInputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
317}
318
319size_t LZ4MatchCopyKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
320    return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
321}
322
323LZ4MatchCopyKernel::LZ4MatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
324        : MultiBlockKernel("lz4MatchCopyKernel",
325        // Inputs
326                           {
327                                   Binding{iBuilder->getStreamSetTy(1, 8), "decompressedStream", BoundedRate(0, 1), AlwaysConsume()},
328                                   Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), AlwaysConsume()},
329                                   Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), AlwaysConsume()},
330                                   Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), AlwaysConsume()},
331
332                           },
333        // Outputs
334                           {Binding{iBuilder->getStreamSetTy(1, 8), OUTPUT_BIT_STREAM_NAME, BoundedRate(0, 1)}},
335        // Arguments
336                           {},
337                           {},
338                           {
339                                   Binding{iBuilder->getSizeTy(), "currentProcessIndex"},
340                                   Binding{iBuilder->getSizeTy(), "pendingMatchPos"},
341                                   Binding{iBuilder->getSizeTy(), "pendingMatchOffset"},
342                                   Binding{iBuilder->getSizeTy(), "pendingMatchLength"},
343                           }) {
344//    setNoTerminateAttribute(true);
345    addAttribute(MustExplicitlyTerminate());
346}
Note: See TracBrowser for help on using the repository browser.