source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_match_copy_kernel.cpp @ 5864

Last change on this file since 5864 was 5864, checked in by xwa163, 13 months ago

Add LZ4D extract deposit related kernel, target and test cases

File size: 12.4 KB
Line 
1//
2//
3
4#include "lz4_match_copy_kernel.h"
5#include <kernels/kernel_builder.h>
6#include <kernels/streamset.h>
7
8using namespace llvm;
9using namespace kernel;
10using namespace std;
11
12void LZ4MatchCopyKernel::generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder) {
13    BasicBlock *entryBlock = iBuilder->GetInsertBlock();
14    Value *previousCopy = iBuilder->getScalarField("previousCopy");
15    Value* previousProduced = iBuilder->getProducedItemCount("outputStream");
16    Value* copyStart = iBuilder->CreateSelect(
17            iBuilder->CreateICmpULT(previousCopy, previousProduced),
18            previousProduced,
19            previousCopy
20    );
21
22
23    Value * itemsToDo = mAvailableItemCount[0];
24    Value *itemsAvailable = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("decompressedStream"), previousCopy);
25//    iBuilder->CallPrintInt("itemsAvailable", iBuilder->getAvailableItemCount("decompressedStream"));
26
27//    iBuilder->CallPrintInt("itemsToDo", itemsToDo);
28
29
30    size_t decompressedStreamBufferSize = this->getInputBufferSize(iBuilder, "decompressedStream");
31    Value *bufferSize = iBuilder->getSize(decompressedStreamBufferSize);
32
33    Value *inputBasePtr = iBuilder->getRawInputPointer("decompressedStream", iBuilder->getSize(0));
34    Value *outputBasePtr = iBuilder->getRawOutputPointer("outputStream", iBuilder->getSize(0));
35//    iBuilder->CallPrintInt("copyStart", copyStart);
36
37
38    Value *previousRound = iBuilder->CreateUDiv(copyStart, bufferSize);
39    Value *previousOffset = iBuilder->CreateURem(copyStart, bufferSize);
40
41    Value *curRound = iBuilder->CreateUDiv(itemsAvailable, bufferSize);
42    Value *curOffset = iBuilder->CreateURem(itemsAvailable, bufferSize);
43
44
45//    iBuilder->CallPrintInt("previousRound", previousRound);
46//    iBuilder->CallPrintInt("previousOffset", previousOffset);
47//    iBuilder->CallPrintInt("curRound", curRound);
48//    iBuilder->CallPrintInt("curOffset", curOffset);
49
50    Value *notReachEnd = iBuilder->CreateICmpEQ(previousRound, curRound);
51    Value *copyEndOffset1 = iBuilder->CreateSelect(notReachEnd, curOffset, bufferSize);
52    Value *copySize1 = iBuilder->CreateSub(copyEndOffset1, previousOffset);
53
54
55    iBuilder->CreateMemCpy(
56            iBuilder->CreateGEP(outputBasePtr, previousOffset),
57            iBuilder->CreateGEP(inputBasePtr, previousOffset),
58            copySize1,
59            1 // Not align guaranteed
60    );
61//    iBuilder->CallPrintInt("bbb", iBuilder->getSize(0));
62    iBuilder->CreateMemCpy(
63            iBuilder->CreateGEP(outputBasePtr, iBuilder->getSize(0)),
64            iBuilder->CreateGEP(inputBasePtr, iBuilder->getSize(0)),
65            iBuilder->CreateSelect(notReachEnd, iBuilder->getSize(0), curOffset),
66            1 // Not align guaranteed
67    );
68//    iBuilder->CallPrintInt("ccc", iBuilder->getSize(0));
69
70    iBuilder->setProcessedItemCount("decompressedStream", itemsAvailable);
71//    iBuilder->setProducedItemCount("outputStream", itemsAvailable);
72//    iBuilder->CallPrintInt("producedItemCount", iBuilder->getProducedItemCount("outputStream"));
73    iBuilder->setScalarField("previousCopy", itemsAvailable);
74
75}
76
77void LZ4MatchCopyKernel::generateMultiBlockLogic(const unique_ptr<KernelBuilder> &iBuilder, Value * const numOfStrides) {
78//    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
79
80    // Copy
81//    iBuilder->CallPrintInt("available", iBuilder->getAvailableItemCount("decompressedStream"));
82    this->generateOutputCopy(iBuilder);
83
84
85    BasicBlock *copyEndBlock = iBuilder->CreateBasicBlock("copyEnd");
86    iBuilder->CreateBr(copyEndBlock);
87    iBuilder->SetInsertPoint(copyEndBlock);
88//    return;
89    BasicBlock *exitBlock = iBuilder->CreateBasicBlock("exit_block");
90    Value *initProcessIndex = iBuilder->getScalarField("currentProcessIndex");
91    Value *itemsAvailable = iBuilder->CreateAdd(
92            iBuilder->getProcessedItemCount("m0Start"),
93            iBuilder->getAvailableItemCount("m0Start")
94    );
95
96
97    BasicBlock *iterLoopCon = iBuilder->CreateBasicBlock("iter_loop_con");
98    BasicBlock *iterLoopBody = iBuilder->CreateBasicBlock("iter_loop_body");
99    BasicBlock *iterLoopExit = iBuilder->CreateBasicBlock("iter_loop_exit");
100
101    iBuilder->CreateBr(iterLoopCon);
102
103    // Con
104    iBuilder->SetInsertPoint(iterLoopCon);
105    PHINode *currentProcessIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
106    currentProcessIndex->addIncoming(initProcessIndex, copyEndBlock);
107
108    iBuilder->CreateCondBr(
109            iBuilder->CreateICmpULT(currentProcessIndex, itemsAvailable),
110            iterLoopBody,
111            iterLoopExit
112    );
113
114    // Body
115    iBuilder->SetInsertPoint(iterLoopBody);
116
117
118    Value *currentM0Start = this->generateLoadCircularInput(iBuilder, "m0Start", currentProcessIndex,
119                                                            iBuilder->getInt64Ty()->getPointerTo());
120    Value *currentDepositStart = currentM0Start;
121
122    BasicBlock *matchCopyBody = iBuilder->CreateBasicBlock("match_copy_body");
123    Value *producedItemsCount = iBuilder->getProcessedItemCount("decompressedStream");
124
125    iBuilder->CreateCondBr(
126            iBuilder->CreateICmpULE(
127                    iBuilder->CreateSub(currentDepositStart, iBuilder->getInt64(1)),
128                    producedItemsCount
129            ),
130            matchCopyBody,
131            iterLoopExit
132    );
133
134    // matchCopyBody
135    iBuilder->SetInsertPoint(matchCopyBody);
136    this->generateMatchCopy(iBuilder, currentProcessIndex); // TODO main logic here
137    BasicBlock *matchCopyFinishBlock = iBuilder->CreateBasicBlock("match_copy_finish");
138    iBuilder->CreateBr(matchCopyFinishBlock);
139    iBuilder->SetInsertPoint(matchCopyFinishBlock);
140
141
142    Value *m0End = this->generateLoadCircularInput(iBuilder, "m0End", currentProcessIndex,
143                                                   iBuilder->getInt64Ty()->getPointerTo());
144    Value *depositEnd = iBuilder->CreateAdd(m0End, iBuilder->getInt64(1));
145    Value *maxProducedCount = iBuilder->CreateSelect(
146            iBuilder->CreateICmpUGT(
147                    producedItemsCount,
148                    depositEnd
149            ),
150            producedItemsCount,
151            depositEnd
152    );
153    iBuilder->setProducedItemCount("outputStream", maxProducedCount);
154    currentProcessIndex->addIncoming(
155            iBuilder->CreateAdd(currentProcessIndex, iBuilder->getSize(1)),
156            matchCopyFinishBlock
157    );
158    iBuilder->CreateBr(iterLoopCon);
159
160
161    // loop exit
162    iBuilder->SetInsertPoint(iterLoopExit);
163    iBuilder->setScalarField("currentProcessIndex", currentProcessIndex);
164
165    iBuilder->CreateBr(exitBlock);
166
167    // Exit
168    iBuilder->SetInsertPoint(exitBlock);
169}
170
171Value *LZ4MatchCopyKernel::generateMatchCopy(const unique_ptr<KernelBuilder> &iBuilder, Value *currentProcessIndex) {
172
173    BasicBlock *entryBlock = iBuilder->GetInsertBlock();
174
175
176    Value *m0Start = this->generateLoadCircularInput(iBuilder, "m0Start", currentProcessIndex,
177                                                     iBuilder->getInt64Ty()->getPointerTo());
178    Value *depositStart = m0Start;
179    Value *m0End = this->generateLoadCircularInput(iBuilder, "m0End", currentProcessIndex,
180                                                   iBuilder->getInt64Ty()->getPointerTo());
181    Value *depositEnd = iBuilder->CreateAdd(m0End, iBuilder->getInt64(1));
182
183    Value *matchOffset = this->generateLoadCircularInput(iBuilder, "matchOffset", currentProcessIndex,
184                                                         iBuilder->getInt64Ty()->getPointerTo());
185
186    Value *matchLength = iBuilder->CreateSub(depositEnd, depositStart);
187
188    Value *matchStart = iBuilder->CreateSub(depositStart, matchOffset);
189//    iBuilder->CallPrintInt("depositStart", depositStart);
190//    iBuilder->CallPrintInt("matchOffset", matchOffset);
191//    iBuilder->CallPrintInt("matchStart", matchStart);
192//    iBuilder->CallPrintInt("matchLength", matchLength);
193
194
195    BasicBlock* copyLoopCon = iBuilder->CreateBasicBlock("copy_loop_con");
196    BasicBlock* copyLoopBody = iBuilder->CreateBasicBlock("copy_loop_body");
197    BasicBlock* copyLoopExit = iBuilder->CreateBasicBlock("copy_loop_exit");
198
199    iBuilder->CreateBr(copyLoopCon);
200    iBuilder->SetInsertPoint(copyLoopCon);
201
202    PHINode* currentCopyIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
203    currentCopyIndex->addIncoming(iBuilder->getSize(0), entryBlock);
204
205    iBuilder->CreateCondBr(iBuilder->CreateICmpULT(currentCopyIndex, matchLength), copyLoopBody, copyLoopExit);
206
207    iBuilder->SetInsertPoint(copyLoopBody);
208    Value* value = this->generateLoadCircularOutput(iBuilder, "outputStream", iBuilder->CreateAdd(matchStart, currentCopyIndex), iBuilder->getInt8Ty()->getPointerTo());
209//    iBuilder->CallPrintInt("value", value);
210//    iBuilder->CallPrintInt("storePos", iBuilder->CreateAdd(currentCopyIndex, depositStart));
211    this->generateStoreCircularOutput(iBuilder, "outputStream", iBuilder->CreateAdd(currentCopyIndex, depositStart),iBuilder->getInt8Ty()->getPointerTo(), value);
212    currentCopyIndex->addIncoming(iBuilder->CreateAdd(currentCopyIndex, iBuilder->getSize(1)), copyLoopBody);
213
214    iBuilder->CreateBr(copyLoopCon);
215
216    iBuilder->SetInsertPoint(copyLoopExit);
217
218}
219
220void LZ4MatchCopyKernel::generateStoreCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string outputBufferName, Value* offset, Type* pointerType, Value* value) {
221    size_t inputSize = this->getOutputBufferSize(iBuilder, outputBufferName);
222    Value* offsetMask = iBuilder->getSize(inputSize - 1);
223    Value* maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
224
225    Value* outputBufferPtr = iBuilder->getRawOutputPointer(outputBufferName, iBuilder->getSize(0));
226
227    outputBufferPtr = iBuilder->CreatePointerCast(outputBufferPtr, pointerType);
228    iBuilder->CreateStore(value, iBuilder->CreateGEP(outputBufferPtr, maskedOffset));
229}
230
231Value* LZ4MatchCopyKernel::generateLoadCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value* offset, Type* pointerType) {
232    size_t inputSize = this->getOutputBufferSize(iBuilder, inputBufferName);
233    Value* offsetMask = iBuilder->getSize(inputSize - 1);
234    Value* maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
235
236    Value* inputBufferPtr = iBuilder->getRawOutputPointer(inputBufferName, iBuilder->getSize(0));
237
238    inputBufferPtr = iBuilder->CreatePointerCast(inputBufferPtr, pointerType);
239    return iBuilder->CreateLoad(iBuilder->CreateGEP(inputBufferPtr, maskedOffset));
240}
241
242Value *LZ4MatchCopyKernel::generateLoadCircularInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName,
243                                                     Value *offset, Type *pointerType) {
244    size_t inputSize = this->getInputBufferSize(iBuilder, inputBufferName);
245    Value *offsetMask = iBuilder->getSize(inputSize - 1);
246    Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
247
248    Value *inputBufferPtr = iBuilder->getRawInputPointer(inputBufferName, iBuilder->getSize(0));
249
250    inputBufferPtr = iBuilder->CreatePointerCast(inputBufferPtr, pointerType);
251    return iBuilder->CreateLoad(iBuilder->CreateGEP(inputBufferPtr, maskedOffset));
252}
253
254size_t LZ4MatchCopyKernel::getInputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
255    return this->getInputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
256}
257
258size_t LZ4MatchCopyKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, string bufferName) {
259    return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
260}
261
262LZ4MatchCopyKernel::LZ4MatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
263        : MultiBlockKernel("lz4MatchCopyKernel",
264        // Inputs
265                              {
266                                      Binding{iBuilder->getStreamSetTy(1, 8), "decompressedStream"},
267                                      Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
268                                      Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
269                                      Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)},
270
271                              },
272        // Outputs
273                              {Binding{iBuilder->getStreamSetTy(1, 8), "outputStream", UnknownRate()}},
274        // Arguments
275                              {},
276                              {},
277                              {
278                                      Binding{iBuilder->getSizeTy(), "currentProcessIndex"},
279                                      Binding{iBuilder->getSizeTy(), "previousCopy"}
280                              }) {
281//    setNoTerminateAttribute(true);
282}
Note: See TracBrowser for help on using the repository browser.