source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_match_copy_kernel.cpp @ 5905

Last change on this file since 5905 was 5905, checked in by xwa163, 18 months ago
  1. Fix some bugs in match copy kernel
  2. Remove some legacy codes from match copy kernel and sequential kernel
File size: 14.6 KB
Line 
1//
2//
3
4#include "lz4_match_copy_kernel.h"
5#include <kernels/kernel_builder.h>
6#include <kernels/streamset.h>
7#include <toolchain/toolchain.h>
8#include <llvm/Support/raw_ostream.h>
9
10#define OUTPUT_STREAM_NAME "outputStream"
11
12using namespace llvm;
13using namespace kernel;
14using namespace std;
15
16void LZ4MatchCopyKernel::generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder, Value *outputBlocks) {
17
18    Value *SIZE_ZERO = iBuilder->getSize(0);
19    Value *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
20
21    Value *previousProcessed = iBuilder->getProcessedItemCount("decompressedStream");
22
23    Value *inputBasePtr = iBuilder->getInputStreamBlockPtr("decompressedStream", SIZE_ZERO);
24
25    Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_STREAM_NAME, SIZE_ZERO);
26    Value *itemsToDo = mAvailableItemCount[0];
27    Value *copySize = iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH);
28    Value* actualCopySize = iBuilder->CreateUMin(itemsToDo, copySize);
29
30    iBuilder->CreateMemCpy(
31            outputBasePtr,
32            inputBasePtr,
33            copySize,
34            iBuilder->getBitBlockWidth()
35    ); //It will be ok to always copy by full block
36
37    Value *newProcessed = iBuilder->CreateAdd(previousProcessed, actualCopySize);
38    iBuilder->setProcessedItemCount("decompressedStream", newProcessed);
39    iBuilder->setProducedItemCount(OUTPUT_STREAM_NAME, newProcessed);
40
41}
42
43Value *LZ4MatchCopyKernel::getMaximumMatchCopyBlock(const unique_ptr<KernelBuilder> &iBuilder) {
44    Value *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
45    Value *SIZE_ZERO = iBuilder->getSize(0);
46    Value *SIZE_ONE = iBuilder->getSize(1);
47    Value *m0EndInitOffset = iBuilder->CreateURem(iBuilder->getProcessedItemCount("m0End"), SIZE_BIT_BLOCK_WIDTH);
48    Value *m0EndItemsToDo = mAvailableItemCount[2];
49    Value *m0EndBasePtr = iBuilder->getInputStreamBlockPtr("m0End", SIZE_ZERO);
50    m0EndBasePtr = iBuilder->CreatePointerCast(m0EndBasePtr, iBuilder->getInt64Ty()->getPointerTo());
51    Value *lastM0 = iBuilder->CreateLoad(
52            iBuilder->CreateGEP(
53                    m0EndBasePtr,
54                    iBuilder->CreateSub(
55                            iBuilder->CreateAdd(m0EndInitOffset, m0EndItemsToDo),
56                            SIZE_ONE
57                    )
58
59            )
60    );
61    Value *lastDepositPosition = iBuilder->CreateAdd(lastM0, SIZE_ONE);
62
63    Value *currentMaxBlock = iBuilder->CreateSelect(
64            this->mIsFinalBlock,
65            iBuilder->CreateUDivCeil(lastDepositPosition, SIZE_BIT_BLOCK_WIDTH),
66            iBuilder->CreateUDiv(lastDepositPosition, SIZE_BIT_BLOCK_WIDTH)
67    );
68
69    // Produced Item Count will always be full bitblock except for final block
70    Value *previousProducedBlocks = iBuilder->CreateUDiv(
71            iBuilder->getProducedItemCount(OUTPUT_STREAM_NAME),
72            SIZE_BIT_BLOCK_WIDTH
73    );
74
75    // (m0 + 1) / BitBlockWidth - produceItemCount / BitBlockWidth
76    return iBuilder->CreateSub(currentMaxBlock, previousProducedBlocks);
77}
78
79void LZ4MatchCopyKernel::generateMultiBlockLogic(const unique_ptr<KernelBuilder> &iBuilder, Value *const numOfStrides) {
80    // Const
81    Constant *SIZE_ZERO = iBuilder->getSize(0);
82    Constant *SIZE_ONE = iBuilder->getSize(1);
83    Constant *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
84
85    Value *itemsToDo = mAvailableItemCount[0];
86
87
88//    iBuilder->CallPrintInt("isFinalBlock", isFinalBlock);
89
90//    iBuilder->CallPrintInt("matchCopy:isFinalBlock", isFinalBlock);
91
92    Value *previousProducedItemCount = iBuilder->getProducedItemCount(OUTPUT_STREAM_NAME);
93
94
95    // Space Calculation
96    Value *outputBufferBlocks = iBuilder->getSize(
97            this->getAnyStreamSetBuffer(OUTPUT_STREAM_NAME)->getBufferBlocks());
98
99    Value *outputCurrentPtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_STREAM_NAME, SIZE_ZERO); // [8 x <4 x i64>]*
100    Value *outputRawBeginPtr = iBuilder->CreatePointerCast(
101            iBuilder->getRawOutputPointer(OUTPUT_STREAM_NAME, SIZE_ZERO), outputCurrentPtr->getType());
102    Value *producedOffset = iBuilder->CreatePtrDiff(outputCurrentPtr, outputRawBeginPtr);
103    Value *remainSpace = iBuilder->CreateSub(outputBufferBlocks, producedOffset);
104    Value *matchCopyWindowBlock = iBuilder->getSize(256 * 256 / codegen::BlockSize);
105    Value *remainWindowBlock = iBuilder->CreateSelect(
106            iBuilder->CreateICmpUGE(producedOffset, matchCopyWindowBlock),
107            iBuilder->getSize(0),
108            iBuilder->CreateSub(matchCopyWindowBlock, producedOffset)
109    );
110    Value *writableBlocks = iBuilder->CreateSub(remainSpace,
111                                                remainWindowBlock); //TODO handle beginning, if producedItemCount / bitblockWidth < windowBlock, there is no need for the substraction here
112//    iBuilder->CallPrintInt("remainSpace", remainSpace);
113//    iBuilder->CallPrintInt("writableBlocks", writableBlocks);
114    Value *outputBlocks = iBuilder->CreateUMin(writableBlocks, numOfStrides);
115    // outputBlock === min(writableBlocks, numOfStrides, (matchOffsetPosition + matchLength - producedItemCount) / bitBlockWidth )
116//    iBuilder->CallPrintInt("outputBlocks1", outputBlocks);
117
118//    outputBlocks = iBuilder->CreateUMin(outputBlocks, this->getMaximumMatchCopyBlock(iBuilder)); //TODO need to handle final block, otherwise it may be deadloop when there is not match copy in final block
119//    iBuilder->CallPrintInt("outputBlocks2", outputBlocks);
120
121//    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
122
123
124    Value *isFinalBlock =
125            iBuilder->CreateOr(
126                    iBuilder->CreateICmpULT(itemsToDo, iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH)),
127                    iBuilder->CreateICmpEQ(itemsToDo, iBuilder->getSize(0))
128            );
129
130    this->mIsFinalBlock = isFinalBlock;
131    iBuilder->setTerminationSignal(isFinalBlock);
132    // Output Copy
133    this->generateOutputCopy(iBuilder, outputBlocks);
134
135    Value *newProducedItemCount = iBuilder->getProducedItemCount(OUTPUT_STREAM_NAME);
136
137    BasicBlock *copyEndBlock = iBuilder->CreateBasicBlock("copyEnd");
138    iBuilder->CreateBr(copyEndBlock);
139    iBuilder->SetInsertPoint(copyEndBlock);
140
141    // Match Copy
142    BasicBlock *exitBlock = iBuilder->CreateBasicBlock("exit_block");
143
144    Value *initM0StartProcessIndex = iBuilder->getProcessedItemCount("m0Start");
145    Value *totalM0StartItemsCount = iBuilder->CreateAdd(initM0StartProcessIndex, mAvailableItemCount[1]);
146
147    Value *initMatchOffset = iBuilder->getScalarField("pendingMatchOffset");
148    Value *initMatchLength = iBuilder->getScalarField("pendingMatchLength");
149    Value *initMatchPos = iBuilder->getScalarField("pendingMatchPos");
150
151
152    BasicBlock *matchCopyLoopCon = iBuilder->CreateBasicBlock("matchCopyLoopCon");
153    iBuilder->CreateBr(matchCopyLoopCon);
154
155    iBuilder->SetInsertPoint(matchCopyLoopCon);
156
157
158    PHINode *phiProcessIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
159    phiProcessIndex->addIncoming(initM0StartProcessIndex, copyEndBlock);
160
161    PHINode *phiMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
162    phiMatchOffset->addIncoming(initMatchOffset, copyEndBlock);
163
164    PHINode *phiMatchLength = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
165    phiMatchLength->addIncoming(initMatchLength, copyEndBlock);
166
167    PHINode *phiMatchPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
168    phiMatchPos->addIncoming(initMatchPos, copyEndBlock);
169
170    BasicBlock *loadNextMatchInfoConBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoConBlock");
171    BasicBlock *loadNextMatchInfoBodyBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoBodyBlock");
172
173    BasicBlock *matchCopyConBlock = iBuilder->CreateBasicBlock("matchCopyConBlock");
174    BasicBlock *matchCopyBodyBlock = iBuilder->CreateBasicBlock("matchCopyBodyBlock");
175
176
177    iBuilder->CreateCondBr(
178            iBuilder->CreateICmpEQ(phiMatchLength, iBuilder->getSize(0)),
179            loadNextMatchInfoConBlock,
180            matchCopyConBlock
181    );
182
183
184    iBuilder->SetInsertPoint(loadNextMatchInfoConBlock);
185
186    Value *hasMoreMatchInfo = iBuilder->CreateICmpULT(phiProcessIndex, totalM0StartItemsCount);
187    iBuilder->CreateCondBr(hasMoreMatchInfo, loadNextMatchInfoBodyBlock, exitBlock);
188
189    iBuilder->SetInsertPoint(loadNextMatchInfoBodyBlock);
190
191    Value *m0StartBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("m0Start", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
192    Value *m0EndBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("m0End", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
193    Value *matchOffsetBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("matchOffset", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
194
195
196    Value *m0StartBaseOffset = iBuilder->CreateURem(initM0StartProcessIndex, SIZE_BIT_BLOCK_WIDTH);
197//    iBuilder->CallPrintInt("rawPtr", iBuilder->getRawInputPointer("m0Start", SIZE_ZERO));
198//    iBuilder->CallPrintInt("ptr", m0StartBasePtr);
199//    iBuilder->CallPrintInt("initM0StartProcessIndex", initM0StartProcessIndex);
200    Value *m0StartLoadOffset = iBuilder->CreateAdd(m0StartBaseOffset,
201                                                   iBuilder->CreateSub(phiProcessIndex, initM0StartProcessIndex));
202
203    Value *newM0Start = iBuilder->CreateLoad(iBuilder->CreateGEP(m0StartBasePtr, m0StartLoadOffset));
204    Value *newM0End = iBuilder->CreateLoad(iBuilder->CreateGEP(m0EndBasePtr, m0StartLoadOffset));
205    Value *newMatchOffset = iBuilder->CreateLoad(iBuilder->CreateGEP(matchOffsetBasePtr, m0StartLoadOffset));
206
207    Value *depositStart = newM0Start;
208//    iBuilder->CallPrintInt("depositStart", depositStart);
209//    iBuilder->CallPrintInt("newMatchLength", newMatchLength);
210
211    Value *depositEnd = iBuilder->CreateAdd(newM0End, iBuilder->getInt64(1));
212    Value *newMatchLength = iBuilder->CreateSub(depositEnd, depositStart);
213    phiProcessIndex->addIncoming(iBuilder->CreateAdd(phiProcessIndex, SIZE_ONE), iBuilder->GetInsertBlock());
214
215    phiMatchPos->addIncoming(depositStart, iBuilder->GetInsertBlock());
216    phiMatchOffset->addIncoming(newMatchOffset, iBuilder->GetInsertBlock());
217    phiMatchLength->addIncoming(newMatchLength, iBuilder->GetInsertBlock());
218
219    iBuilder->CreateBr(matchCopyLoopCon);
220
221
222    iBuilder->SetInsertPoint(matchCopyConBlock);
223    Value *hasNotReachEnd = iBuilder->CreateICmpULT(phiMatchPos, newProducedItemCount);
224//    iBuilder->CallPrintInt("newProducedItemCount", newProducedItemCount);
225    iBuilder->CreateCondBr(hasNotReachEnd, matchCopyBodyBlock, exitBlock);
226
227    iBuilder->SetInsertPoint(matchCopyBodyBlock);
228    Value* matchCopyFromPos = iBuilder->CreateSub(phiMatchPos, phiMatchOffset);
229    Value* rawOutputBasePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(OUTPUT_STREAM_NAME, SIZE_ZERO), iBuilder->getInt8PtrTy());
230
231    Value* outputBufferSize = iBuilder->CreateMul(outputBufferBlocks, SIZE_BIT_BLOCK_WIDTH);
232    Value* matchCopyFromOffset = iBuilder->CreateURem(matchCopyFromPos, outputBufferSize);
233    Value* matchCopyFromPtr = iBuilder->CreateGEP(rawOutputBasePtr, matchCopyFromOffset);
234
235    // Output is guranteed to be full bit block except for final block
236    Value* outputBlockBasePtr = iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr(OUTPUT_STREAM_NAME, SIZE_ZERO), iBuilder->getInt8PtrTy());
237    Value* outputTargetPtr = iBuilder->CreateGEP(outputBlockBasePtr, iBuilder->CreateSub(phiMatchPos, previousProducedItemCount));
238
239    Value* matchCopyFromRemain = iBuilder->CreateSub(outputBufferSize, matchCopyFromOffset);
240    // phiMatchOffset
241    // phiMatchLength
242    Value* currentCopySize = iBuilder->CreateUMin(matchCopyFromRemain, phiMatchOffset);
243    currentCopySize = iBuilder->CreateUMin(currentCopySize, phiMatchLength);
244    currentCopySize = iBuilder->CreateUMin(currentCopySize, iBuilder->CreateSub(newProducedItemCount, phiMatchPos));
245
246    currentCopySize = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentCopySize, SIZE_ZERO), SIZE_ONE, currentCopySize); //Workaround for the last byte
247
248//    currentCopySize = SIZE_ONE;
249    iBuilder->CreateMemCpy(outputTargetPtr, matchCopyFromPtr, currentCopySize, 0);
250
251//    iBuilder->CallPrintInt("outputTargetPtr", iBuilder->CreateGEP(iBuilder->CreateLoad(outputTargetPtr), iBuilder->CreateSub(currentCopySize, SIZE_ONE)));
252//    iBuilder->CallPrintInt("matchCopyFromPtr", iBuilder->CreateGEP(iBuilder->CreateLoad(matchCopyFromPtr), iBuilder->CreateSub(currentCopySize, SIZE_ONE)));
253
254    phiProcessIndex->addIncoming(phiProcessIndex, iBuilder->GetInsertBlock());
255    phiMatchOffset->addIncoming(phiMatchOffset, iBuilder->GetInsertBlock());
256    phiMatchPos->addIncoming(iBuilder->CreateAdd(phiMatchPos, currentCopySize), iBuilder->GetInsertBlock());
257    phiMatchLength->addIncoming(iBuilder->CreateSub(phiMatchLength, currentCopySize), iBuilder->GetInsertBlock());
258
259    iBuilder->CreateBr(matchCopyLoopCon);
260
261    iBuilder->SetInsertPoint(exitBlock);
262//    iBuilder->CallPrintInt("test", SIZE_ZERO);
263    iBuilder->setScalarField("pendingMatchOffset", phiMatchOffset);
264    iBuilder->setScalarField("pendingMatchLength", phiMatchLength);
265    iBuilder->setScalarField("pendingMatchPos", phiMatchPos);
266//    iBuilder->CallPrintInt("pendingMatchLength", phiMatchLength);
267    iBuilder->setProcessedItemCount("m0Start", phiProcessIndex);
268    iBuilder->setProcessedItemCount("m0End", phiProcessIndex);
269    iBuilder->setProcessedItemCount("matchOffset", phiProcessIndex);
270}
271
272LZ4MatchCopyKernel::LZ4MatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
273        : MultiBlockKernel("lz4MatchCopyKernel",
274        // Inputs
275                           {
276                                   Binding{iBuilder->getStreamSetTy(1, 8), "decompressedStream", BoundedRate(0, 1), AlwaysConsume()},
277                                   Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), AlwaysConsume()},
278                                   Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), AlwaysConsume()},
279                                   Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), AlwaysConsume()},
280
281                           },
282        // Outputs
283                           {Binding{iBuilder->getStreamSetTy(1, 8), OUTPUT_STREAM_NAME, BoundedRate(0, 1)}},
284        // Arguments
285                           {},
286                           {},
287                           {
288                                   Binding{iBuilder->getSizeTy(), "currentProcessIndex"},
289                                   Binding{iBuilder->getSizeTy(), "pendingMatchPos"},
290                                   Binding{iBuilder->getSizeTy(), "pendingMatchOffset"},
291                                   Binding{iBuilder->getSizeTy(), "pendingMatchLength"},
292                           }) {
293//    setNoTerminateAttribute(true);
294    addAttribute(MustExplicitlyTerminate());
295}
Note: See TracBrowser for help on using the repository browser.