source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp @ 5985

Last change on this file since 5985 was 5985, checked in by nmedfort, 8 weeks ago

Restructured MultiBlock? kernel. Removal of Swizzled buffers. Inclusion of PopCount? rates / non-linear access. Modifications to several kernels to better align them with the kernel and pipeline changes.

File size: 15.4 KB
Line 
1//
2// Created by wxy325 on 2018/3/9.
3//
4
5#include "lz4_swizzled_match_copy_kernel.h"
6#include <kernels/kernel_builder.h>
7#include <kernels/streamset.h>
8#include <toolchain/toolchain.h>
9#include <vector>
10#include <llvm/Support/raw_ostream.h>
11
12using namespace llvm;
13using namespace std;
14namespace kernel {
15
16Value *LZ4SwizzledMatchCopyKernel::advanceUntilNextBit(const std::unique_ptr<KernelBuilder> &iBuilder, string inputName, Value *startPos, bool isNextOne) {
17    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
18
19    Constant* SIZE_0 = iBuilder->getSize(0);
20    Constant* SIZE_1 = iBuilder->getSize(1);
21    Value* SIZE_64 = iBuilder->getSize(64); // maybe need to handle 32 bit machine
22    Value* SIZE_INPUT_64_COUNT = iBuilder->getSize(this->getInputStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64);
23
24    Value* initCurrentPos = startPos;
25
26    Value* offsetMarkerRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_0), iBuilder->getInt64Ty()->getPointerTo());
27
28    BasicBlock* findNextMatchOffsetConBlock = iBuilder->CreateBasicBlock("findNextMatchOffsetConBlock");
29    BasicBlock* findNextMatchOffsetBodyBlock = iBuilder->CreateBasicBlock("findNextMatchOffsetBodyBlock");
30
31    iBuilder->CreateBr(findNextMatchOffsetConBlock);
32    iBuilder->SetInsertPoint(findNextMatchOffsetConBlock);
33    // Find position marker bit of next 1 bit
34
35    PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
36    phiCurrentPos->addIncoming(initCurrentPos, entryBlock);
37
38    Value* currentPosGlobalBlockIndex = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
39    Value* currentPosLocalBlockIndex = iBuilder->CreateURem(currentPosGlobalBlockIndex, SIZE_INPUT_64_COUNT);
40    Value* currentPosBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
41    Value* currentValue = iBuilder->CreateLoad(iBuilder->CreateGEP(offsetMarkerRawPtr, currentPosLocalBlockIndex));
42
43    Value* countValue = iBuilder->CreateLShr(currentValue, currentPosBlockOffset);
44    if (!isNextOne) {
45        countValue = iBuilder->CreateNot(countValue);
46    }
47    Value* forwardZero = iBuilder->CreateCountForwardZeroes(countValue);
48    Value* realForwardZero = iBuilder->CreateAdd(currentPosBlockOffset, forwardZero);
49
50    // If targetMarker == 0, move to next block, otherwise count forward zero
51    phiCurrentPos->addIncoming(iBuilder->CreateMul(SIZE_64, iBuilder->CreateAdd(currentPosGlobalBlockIndex, SIZE_1)), iBuilder->GetInsertBlock());
52    iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(realForwardZero, SIZE_64), findNextMatchOffsetConBlock, findNextMatchOffsetBodyBlock);
53
54    iBuilder->SetInsertPoint(findNextMatchOffsetBodyBlock);
55
56    Value* newPosition = iBuilder->CreateAdd(iBuilder->CreateMul(currentPosGlobalBlockIndex, SIZE_64), realForwardZero);
57
58    return newPosition;
59}
60
61pair<Value*, Value*> LZ4SwizzledMatchCopyKernel::loadNextMatchOffset(const unique_ptr<KernelBuilder> &iBuilder) {
62    Value* initCurrentPos = iBuilder->CreateAdd(iBuilder->getScalarField("currentOffsetMarkerPos"), iBuilder->getSize(1));
63    Value* newPosition = this->advanceUntilNextBit(iBuilder, "MatchOffsetMarker", initCurrentPos, true);
64
65    // Load Match Offset from newPosition
66    Value* matchOffsetPtr = iBuilder->getRawInputPointer("byteStream", newPosition);
67    // For now, it is safe to cast matchOffset pointer into i16 since the input byte stream is always linear available
68    matchOffsetPtr = iBuilder->CreatePointerCast(matchOffsetPtr, iBuilder->getInt16Ty()->getPointerTo());
69    Value* matchOffset = iBuilder->CreateZExt(iBuilder->CreateLoad(matchOffsetPtr), iBuilder->getSizeTy());
70
71    return std::make_pair(matchOffset, newPosition);
72}
73
74pair<Value*, Value*> LZ4SwizzledMatchCopyKernel::loadNextM0StartEnd(const unique_ptr<KernelBuilder> &iBuilder) {
75    Value* initCurrentPos = iBuilder->getScalarField("currentM0MarkerPos");
76    Value* m0Start = this->advanceUntilNextBit(iBuilder, "M0Marker", initCurrentPos, true);
77    Value* m0End = this->advanceUntilNextBit(iBuilder, "M0Marker", m0Start, false);
78    return std::make_pair(m0Start, m0End);
79};
80
81
82void LZ4SwizzledMatchCopyKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
83    ConstantInt * const SIZE_4_MEGS = iBuilder->getSize(4 * 1024 * 1024);
84
85    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
86
87    Value * const available = iBuilder->getAvailableItemCount("sourceStreamSet0");
88    Value * const processed = iBuilder->getProcessedItemCount("sourceStreamSet0");
89
90    Value * const itemsToDo = iBuilder->CreateUMin(iBuilder->CreateSub(available, processed), SIZE_4_MEGS);
91    iBuilder->setTerminationSignal(iBuilder->CreateICmpULT(itemsToDo, SIZE_4_MEGS));
92
93
94    // Output Copy
95    generateOutputCopy(iBuilder);
96
97    Value * const toProcessItemCount = iBuilder->CreateAdd(processed, itemsToDo);
98
99    // Match Copy
100    Value *initM0StartProcessIndex = iBuilder->getProcessedItemCount("M0CountMarker");
101    Value *totalM0StartItemsCount = iBuilder->getAvailableItemCount("M0CountMarker");
102
103    BasicBlock * const matchCopyLoopCon = iBuilder->CreateBasicBlock("matchCopyLoopCon");
104    BasicBlock * const processExitBlock = iBuilder->CreateBasicBlock("exit_block");
105
106    BasicBlock * const loadNextMatchInfoBodyBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoBodyBlock");
107    BasicBlock * const matchCopyConBlock = iBuilder->CreateBasicBlock("matchCopyConBlock");
108    BasicBlock * const matchCopyBodyBlock = iBuilder->CreateBasicBlock("matchCopyBodyBlock");
109
110
111    iBuilder->CreateBr(matchCopyLoopCon);
112
113    iBuilder->SetInsertPoint(matchCopyLoopCon);
114    PHINode * const phiProcessIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
115    phiProcessIndex->addIncoming(initM0StartProcessIndex, entryBlock);
116
117    Value * const hasMoreMatchInfo = iBuilder->CreateICmpULT(phiProcessIndex, totalM0StartItemsCount);
118
119    iBuilder->CreateCondBr(hasMoreMatchInfo, loadNextMatchInfoBodyBlock, processExitBlock);
120
121    iBuilder->SetInsertPoint(loadNextMatchInfoBodyBlock);
122
123    auto ret = this->loadNextM0StartEnd(iBuilder);
124    Value *newM0Start = ret.first;
125    Value *newM0End = ret.second;
126
127    auto matchOffsetRet = this->loadNextMatchOffset(iBuilder);
128    Value *newMatchOffset = matchOffsetRet.first;
129    Value* newMatchOffsetPos = matchOffsetRet.second;
130
131    Value * const newMatchLength = iBuilder->CreateAdd(iBuilder->CreateSub(newM0End, newM0Start), iBuilder->getInt64(1));
132
133    iBuilder->CreateBr(matchCopyConBlock);
134    iBuilder->SetInsertPoint(matchCopyConBlock);
135
136    Value * const hasNotReachEnd = iBuilder->CreateICmpULT(newM0Start, toProcessItemCount);
137    iBuilder->CreateLikelyCondBr(hasNotReachEnd, matchCopyBodyBlock, processExitBlock);
138
139    iBuilder->SetInsertPoint(matchCopyBodyBlock);
140
141    iBuilder->setScalarField("currentOffsetMarkerPos", newMatchOffsetPos);
142    iBuilder->setProcessedItemCount("MatchOffsetMarker", newMatchOffsetPos);
143    iBuilder->setScalarField("currentM0MarkerPos", newM0End);
144    iBuilder->setProcessedItemCount("M0Marker", newM0End);
145
146
147    BasicBlock* copyLoopCon = iBuilder->CreateBasicBlock("copyLoopCon");
148    BasicBlock* copyLoopBody = iBuilder->CreateBasicBlock("copyLoopBody");
149    iBuilder->CreateBr(copyLoopCon);
150    iBuilder->SetInsertPoint(copyLoopCon);
151    PHINode* phiMatchLength = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
152    PHINode* phiMatchPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
153
154    phiMatchLength->addIncoming(newMatchLength, matchCopyBodyBlock);
155    phiMatchPos->addIncoming(newM0Start, matchCopyBodyBlock);
156
157    phiProcessIndex->addIncoming(iBuilder->CreateAdd(phiProcessIndex, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
158
159
160    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(phiMatchLength, iBuilder->getSize(0)), copyLoopBody, matchCopyLoopCon);
161
162    iBuilder->SetInsertPoint(copyLoopBody);
163    Value* copySize = this->doMatchCopy(iBuilder, phiMatchPos, newMatchOffset, phiMatchLength);
164    phiMatchLength->addIncoming(iBuilder->CreateSub(phiMatchLength, copySize), iBuilder->GetInsertBlock());
165    phiMatchPos->addIncoming(iBuilder->CreateAdd(phiMatchPos, copySize), iBuilder->GetInsertBlock());
166    iBuilder->CreateBr(copyLoopCon);
167
168    iBuilder->SetInsertPoint(processExitBlock);
169    iBuilder->setProcessedItemCount("M0CountMarker", phiProcessIndex);
170    iBuilder->setProcessedItemCount("M0Marker", toProcessItemCount);
171    iBuilder->setProcessedItemCount("sourceStreamSet0", toProcessItemCount);
172    iBuilder->setScalarField("currentM0MarkerPos", toProcessItemCount);
173
174}
175
176llvm::Value* LZ4SwizzledMatchCopyKernel::doMatchCopy(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* phiMatchPos, llvm::Value* phiMatchOffset, llvm::Value* phiMatchLength) {
177    ConstantInt * const SIZE_ZERO = iBuilder->getSize(0);
178    ConstantInt * const SIZE_ONE = iBuilder->getSize(1);
179    ConstantInt * const SIZE_PDEP_WIDTH = iBuilder->getSize(mPDEPWidth);
180    ConstantInt * const SIZE_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
181
182    ConstantInt * const outputBufferBlocks = iBuilder->getSize(this->getAnyStreamSetBuffer("outputStreamSet0")->getBufferBlocks());
183
184    Value* matchPosLocalBlockIndex = iBuilder->CreateURem(iBuilder->CreateUDiv(phiMatchPos, SIZE_BLOCK_WIDTH), outputBufferBlocks);
185    Value * const matchCopyTargetStreamIndex = iBuilder->CreateURem(iBuilder->CreateUDiv(phiMatchPos, SIZE_PDEP_WIDTH), iBuilder->getSize(mStreamCount));
186    Value * const matchCopyTargetBlockOffset = iBuilder->CreateURem(phiMatchPos, SIZE_PDEP_WIDTH);
187
188    Value * const matchCopyFromPos = iBuilder->CreateSub(phiMatchPos, phiMatchOffset);
189
190
191    Value* matchCopyFromLocalBlockIndex = iBuilder->CreateURem(iBuilder->CreateUDiv(matchCopyFromPos, SIZE_BLOCK_WIDTH), outputBufferBlocks);
192    Value * const matchCopyFromStreamIndex = iBuilder->CreateURem(iBuilder->CreateUDiv(matchCopyFromPos, SIZE_PDEP_WIDTH), iBuilder->getSize(mStreamCount));
193    Value * const matchCopyFromBlockOffset = iBuilder->CreateURem(matchCopyFromPos, SIZE_PDEP_WIDTH);
194
195    Value* fromBlockRemain = iBuilder->CreateSub(SIZE_PDEP_WIDTH, matchCopyFromBlockOffset);
196
197    Value * currentCopySize = iBuilder->CreateSub(SIZE_PDEP_WIDTH, iBuilder->CreateUMax(matchCopyFromBlockOffset, matchCopyTargetBlockOffset));
198    currentCopySize = iBuilder->CreateUMin(currentCopySize, phiMatchOffset);
199    currentCopySize = iBuilder->CreateUMin(currentCopySize, phiMatchLength);
200    currentCopySize = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentCopySize, SIZE_ZERO), SIZE_ONE, currentCopySize); //Workaround for the last byte
201
202    Value * newCurrentCopySize = iBuilder->CreateSub(SIZE_PDEP_WIDTH, matchCopyTargetBlockOffset);
203    newCurrentCopySize = iBuilder->CreateUMin(newCurrentCopySize, phiMatchOffset);
204    newCurrentCopySize = iBuilder->CreateUMin(newCurrentCopySize, phiMatchLength);
205
206    Value * const fromBlockOffsetVector = iBuilder->simd_fill(mPDEPWidth, matchCopyFromBlockOffset);
207    Value * const fromBlockRemainVector = iBuilder->simd_fill(mPDEPWidth, fromBlockRemain);
208
209    Value * const targetLeftShiftVector = iBuilder->simd_fill(mPDEPWidth, iBuilder->CreateSub(SIZE_PDEP_WIDTH, newCurrentCopySize));
210    Value * const targetRightShiftVector = iBuilder->simd_fill(mPDEPWidth, iBuilder->CreateSub(SIZE_PDEP_WIDTH, iBuilder->CreateAdd(newCurrentCopySize, matchCopyTargetBlockOffset)));
211
212    for (unsigned i = 0; i < mStreamSize; i++) {
213        Value* basePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("outputStreamSet" + std::to_string(i), SIZE_ZERO), iBuilder->getBitBlockType()->getPointerTo());
214
215        Value * const matchCopyFromBlockPtr = iBuilder->CreateGEP(basePtr, iBuilder->CreateAdd(iBuilder->CreateMul(matchCopyFromLocalBlockIndex, iBuilder->getSize(mStreamCount)), matchCopyFromStreamIndex));
216        Value * const fromBlockValue = iBuilder->CreateBlockAlignedLoad(matchCopyFromBlockPtr);
217        Value * const fromNextBlockValue = iBuilder->CreateBlockAlignedLoad(iBuilder->CreateGEP(matchCopyFromBlockPtr, iBuilder->CreateSelect(iBuilder->CreateICmpULE(newCurrentCopySize, fromBlockRemain), SIZE_ZERO, SIZE_ONE)));
218
219        Value * allFromValue = iBuilder->CreateOr(
220                iBuilder->CreateLShr(fromBlockValue, fromBlockOffsetVector),
221                iBuilder->CreateShl(fromNextBlockValue, fromBlockRemainVector)
222        );
223        Value * allTargetValue = iBuilder->CreateLShr(iBuilder->CreateShl(allFromValue, targetLeftShiftVector), targetRightShiftVector);
224
225        Value * const outputTargetBlockPtr = iBuilder->CreateGEP(basePtr, iBuilder->CreateAdd(iBuilder->CreateMul(matchPosLocalBlockIndex, iBuilder->getSize(mStreamCount)), matchCopyTargetStreamIndex));
226        Value * const targetOriginalValue = iBuilder->CreateBlockAlignedLoad(outputTargetBlockPtr);
227
228        Value * const finalValue = iBuilder->CreateOr(targetOriginalValue, allTargetValue);
229
230        iBuilder->CreateStore(finalValue, outputTargetBlockPtr);
231    }
232    return currentCopySize;
233}
234
235void LZ4SwizzledMatchCopyKernel::generateOutputCopy(const std::unique_ptr<KernelBuilder> & iBuilder) {
236    Constant * SIZE_ZERO = iBuilder->getSize(0);
237    Constant * COPY_BYTES = iBuilder->getSize(4 * 1024 * 1024 * mStreamCount / 8);
238    for (unsigned i = 0; i < mStreamSize; i++) {
239        Value * inputBasePtr = iBuilder->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), SIZE_ZERO);
240        Value * outputBasePtr = iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_ZERO);
241        iBuilder->CreateMemCpy(outputBasePtr, inputBasePtr, COPY_BYTES, 1); // Not align guaranteed in final block
242    }
243}
244
245
246LZ4SwizzledMatchCopyKernel::LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned streamCount/*=4*/, unsigned streamSize/*=2*/, unsigned swizzleFactor/*=4*/, unsigned PDEP_width/*64*/)
247: SegmentOrientedKernel("LZ4SwizzledMatchCopyKernel",
248// Inputs
249{
250                                   Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", BoundedRate(0, 1)},
251                                   Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
252                                   Binding{iBuilder->getStreamSetTy(1, 1), "M0CountMarker", BoundedRate(0, 1)},
253                                   Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}
254},
255// Outputs
256{},
257// Arguments
258{
259},
260{},
261{
262       Binding{iBuilder->getSizeTy(), "currentProcessIndex"},
263       Binding{iBuilder->getSizeTy(), "pendingMatchPos"},
264       Binding{iBuilder->getSizeTy(), "pendingMatchOffset"},
265       Binding{iBuilder->getSizeTy(), "pendingMatchLength"},
266       Binding(iBuilder->getSizeTy(), "currentOffsetMarkerPos"),
267       Binding(iBuilder->getSizeTy(), "currentM0MarkerPos")
268})
269, mSwizzleFactor(swizzleFactor)
270, mPDEPWidth(PDEP_width)
271, mStreamSize(streamSize)
272, mStreamCount(streamCount) {
273
274    assert((mSwizzleFactor == (iBuilder->getBitBlockWidth() / PDEP_width)) && "swizzle factor must equal bitBlockWidth / PDEP_width");
275    assert((mPDEPWidth == 64 || mPDEPWidth == 32) && "PDEP width must be 32 or 64");
276    setStride(4 * 1024 * 1024);
277    addAttribute(MustExplicitlyTerminate());
278
279    mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", BoundedRate(0, 1), Swizzled()});
280    mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", RateEqualTo("sourceStreamSet0")});
281
282    for (unsigned i = 1; i < streamSize; i++) {
283        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0"), Swizzled()});
284        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0")});
285    }
286}
287
288}
Note: See TracBrowser for help on using the repository browser.