source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp @ 6047

Last change on this file since 6047 was 6047, checked in by nmedfort, 13 months ago

Major refactoring of buffer types. Static buffers replace Circular and CircularCopyback?. External buffers unify Source/External?.

File size: 20.7 KB
Line 
1
2
3#include "lz4_swizzled_match_copy_kernel.h"
4#include <kernels/kernel_builder.h>
5#include <kernels/streamset.h>
6#include <toolchain/toolchain.h>
7#include <vector>
8#include <llvm/Support/raw_ostream.h>
9
10
11using namespace llvm;
12using namespace std;
13namespace kernel {
14
15Value *LZ4SwizzledMatchCopyKernel::advanceUntilNextBit(const std::unique_ptr<KernelBuilder> &iBuilder, string inputName, Value *startPos, bool isNextOne) {
16    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
17
18    Constant* SIZE_0 = iBuilder->getSize(0);
19    Constant* SIZE_1 = iBuilder->getSize(1);
20    Value* SIZE_64 = iBuilder->getSize(64); // maybe need to handle 32 bit machine
21    Value* SIZE_INPUT_64_COUNT = iBuilder->CreateUDiv(iBuilder->getCapacity(inputName), iBuilder->getSize(64));
22    Value* initCurrentPos = startPos;
23    Value* offsetMarkerRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_0), iBuilder->getInt64Ty()->getPointerTo());
24
25    BasicBlock* findNextMatchOffsetConBlock = iBuilder->CreateBasicBlock("findNextMatchOffsetConBlock");
26    BasicBlock* findNextMatchOffsetBodyBlock = iBuilder->CreateBasicBlock("findNextMatchOffsetBodyBlock");
27
28    iBuilder->CreateBr(findNextMatchOffsetConBlock);
29    iBuilder->SetInsertPoint(findNextMatchOffsetConBlock);
30    // Find position marker bit of next 1 bit
31
32    PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
33    phiCurrentPos->addIncoming(initCurrentPos, entryBlock);
34
35    Value* currentPosGlobalBlockIndex = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
36    Value* currentPosLocalBlockIndex = iBuilder->CreateURem(currentPosGlobalBlockIndex, SIZE_INPUT_64_COUNT);
37    Value* currentPosBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
38    Value* currentValue = iBuilder->CreateLoad(iBuilder->CreateGEP(offsetMarkerRawPtr, currentPosLocalBlockIndex));
39
40    Value* countValue = iBuilder->CreateLShr(currentValue, currentPosBlockOffset);
41    if (!isNextOne) {
42        countValue = iBuilder->CreateNot(countValue);
43    }
44    Value* forwardZero = iBuilder->CreateCountForwardZeroes(countValue);
45    Value* realForwardZero = iBuilder->CreateAdd(currentPosBlockOffset, forwardZero);
46
47    // If targetMarker == 0, move to next block, otherwise count forward zero
48    phiCurrentPos->addIncoming(iBuilder->CreateMul(SIZE_64, iBuilder->CreateAdd(currentPosGlobalBlockIndex, SIZE_1)), iBuilder->GetInsertBlock());
49    iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(realForwardZero, SIZE_64), findNextMatchOffsetConBlock, findNextMatchOffsetBodyBlock);
50
51    iBuilder->SetInsertPoint(findNextMatchOffsetBodyBlock);
52
53    Value* newPosition = iBuilder->CreateAdd(iBuilder->CreateMul(currentPosGlobalBlockIndex, SIZE_64), realForwardZero);
54
55    return newPosition;
56}
57
58pair<Value*, Value*> LZ4SwizzledMatchCopyKernel::loadNextMatchOffset(const unique_ptr<KernelBuilder> &iBuilder) {
59    Value* initCurrentPos = iBuilder->CreateAdd(iBuilder->getScalarField("currentOffsetMarkerPos"), iBuilder->getSize(1));
60    Value* newPosition = this->advanceUntilNextBit(iBuilder, "MatchOffsetMarker", initCurrentPos, true);
61
62    // Load Match Offset from newPosition
63    Value* matchOffsetPtr = iBuilder->getRawInputPointer("byteStream", newPosition);
64    // For now, it is safe to cast matchOffset pointer into i16 since the input byte stream is always linear available
65    matchOffsetPtr = iBuilder->CreatePointerCast(matchOffsetPtr, iBuilder->getInt16Ty()->getPointerTo());
66    Value* matchOffset = iBuilder->CreateZExt(iBuilder->CreateLoad(matchOffsetPtr), iBuilder->getSizeTy());
67
68    return std::make_pair(matchOffset, newPosition);
69}
70
71
72void LZ4SwizzledMatchCopyKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
73    // ---- Contant
74    ConstantInt * const SIZE_4_MEGS = iBuilder->getSize(4 * 1024 * 1024);
75    ConstantInt * const SIZE_0 = iBuilder->getSize(0);
76    ConstantInt * const SIZE_1 = iBuilder->getSize(1);
77    ConstantInt * const SIZE_64 = iBuilder->getSize(64);
78    ConstantInt * const INT64_0 = iBuilder->getInt64(0);
79    ConstantInt * const INT64_1 = iBuilder->getInt64(1);
80
81    Value * BITBLOCK_0 = iBuilder->CreateBitCast(ConstantInt::get(iBuilder->getIntNTy(iBuilder->getBitBlockWidth()), 0), iBuilder->getBitBlockType());
82
83    // ---- Type
84    Type* BITBLOCK_TYPE = iBuilder->getBitBlockType();
85    Type* BITBLOCK_PTR_TYPE = BITBLOCK_TYPE->getPointerTo();
86    Type* I64_TY = iBuilder->getInt64Ty();
87    Type* I64_PTR_TY = I64_TY->getPointerTo();
88
89    Value * PDEP_func = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_bmi_pdep_64); //TODO for now only consider 64 bits
90
91    // ---- EntryBlock
92    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
93    BasicBlock * const exitBlock = iBuilder->CreateBasicBlock("exitBlock");
94
95    Value * const available = iBuilder->getAvailableItemCount("sourceStreamSet0");
96    Value * const processed = iBuilder->getProcessedItemCount("sourceStreamSet0");
97
98    Value * const itemsToDo = iBuilder->CreateUMin(iBuilder->CreateSub(available, processed), SIZE_4_MEGS);
99    iBuilder->setTerminationSignal(iBuilder->CreateICmpULT(itemsToDo, SIZE_4_MEGS));
100
101
102    Value* m0MarkerBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("M0Marker", SIZE_0), I64_PTR_TY); // i64*
103    vector<Value*> sourceStreamBasePtrs, outputStreamBasePtrs; // <4 * i64>*
104    for (int i = 0; i < mStreamSize; i++) {
105        sourceStreamBasePtrs.push_back(iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
106        outputStreamBasePtrs.push_back(iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
107    }
108
109
110    BasicBlock * const processLoopCon = iBuilder->CreateBasicBlock("processLoopCon");
111    BasicBlock * const processLoopBody = iBuilder->CreateBasicBlock("processLoopBody");
112    BasicBlock * const processLoopExit = iBuilder->CreateBasicBlock("processLoopExit");
113
114    iBuilder->CreateBr(processLoopCon);
115
116    // ---- ProcessLoopCon
117    // ProcessLoop will process one block of data each time (64bit m0, <4 * i64> input and output data)
118    iBuilder->SetInsertPoint(processLoopCon);
119
120    // carryBit === 0x1 only when the most significant bit of the target M0 block is one, which means the first position of next block need to be deposited (match copy)
121
122    PHINode* phiCarryBit = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
123    PHINode* phiCurrentPosition = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2); // 0~4mb, and all M0 related
124    PHINode* phiCarryMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
125
126    phiCarryBit->addIncoming(INT64_0, entryBlock);
127    phiCurrentPosition->addIncoming(INT64_0, entryBlock);
128    phiCarryMatchOffset->addIncoming(SIZE_0, entryBlock);
129
130    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(phiCurrentPosition, itemsToDo), processLoopBody, processLoopExit);
131
132    // ---- ProcessLoopBody
133    iBuilder->SetInsertPoint(processLoopBody);
134
135    Value* dataBlockIndex = iBuilder->CreateUDiv(phiCurrentPosition, SIZE_64);
136    Value* currentInitM0 = iBuilder->CreateLoad(iBuilder->CreateGEP(m0MarkerBasePtr, dataBlockIndex));
137    vector<Value*> initSourceData;
138    for (int i = 0; i < mStreamSize; i++) {
139        // Because of swizzled form, the sourceStream can be accessed linearly
140        initSourceData.push_back(iBuilder->CreateLoad(iBuilder->CreateGEP(sourceStreamBasePtrs[i], dataBlockIndex)));
141    }
142
143    BasicBlock* carryBitProcessBlock = iBuilder->CreateBasicBlock("CarryBitProcessBlock");
144
145    BasicBlock* matchCopyLoopCon = iBuilder->CreateBasicBlock("MatchCopyLoopCon");
146    BasicBlock* matchCopyLoopBody = iBuilder->CreateBasicBlock("MatchCopyLoopBody");
147    BasicBlock* matchCopyLoopExit = iBuilder->CreateBasicBlock("MatchCopyLoopExit");
148
149    //
150    // The carry bit will need to be processed specially only when
151    // the most significant bit of previous block is 1 (the carry bit is 0x1) and the
152    // least significant bit of current block is 0
153    // e.g.
154    //   Assume the most significant bit is on the right side
155    //
156    //                    i64_1       i64_2
157    //   M0         ... 0000 0011 | 0111 0000 ...  - Carry bit need to be handle specially
158    //   M0         ... 0000 0011 | 1011 0000 ...  - Carry bit will be handle in the loop of i64_2
159    //   Carry Bit                  1000 0000 ...  - 0x1
160
161    Value* needProcessCarryBit = iBuilder->CreateAnd(phiCarryBit, iBuilder->CreateNot(iBuilder->CreateAnd(currentInitM0, iBuilder->getInt64(1))));
162    needProcessCarryBit = iBuilder->CreateICmpNE(needProcessCarryBit, INT64_0);
163
164    iBuilder->CreateUnlikelyCondBr(needProcessCarryBit, carryBitProcessBlock, matchCopyLoopCon);
165
166    // ---- CarryBitProcessBlock
167    iBuilder->SetInsertPoint(carryBitProcessBlock);
168    vector<Value*> initSourceDataWithCarry;
169    Value* carryCopyFromPos = iBuilder->CreateSub(phiCurrentPosition, phiCarryMatchOffset);
170    Value* carryCopyFromBlockIndex = iBuilder->CreateUDiv(carryCopyFromPos, SIZE_64);
171    Value* carryCopyFromOffset = iBuilder->CreateURem(carryCopyFromPos, SIZE_64);
172    for (int i = 0; i < mStreamSize; i++) {
173        Value* v = iBuilder->CreateLoad(iBuilder->CreateGEP(outputStreamBasePtrs[i], carryCopyFromBlockIndex));
174        v = iBuilder->CreateLShr(v, iBuilder->simd_fill(mPDEPWidth, carryCopyFromOffset));
175        v = iBuilder->CreateAnd(v, iBuilder->simd_fill(mPDEPWidth, INT64_1));
176        initSourceDataWithCarry.push_back(iBuilder->CreateOr(v, initSourceData[i]));
177    }
178    iBuilder->CreateBr(matchCopyLoopCon);
179
180    // ---- MatchCopyLoopCon
181    // MatchCopy Loop will handle one continuous data deposit each time
182    iBuilder->SetInsertPoint(matchCopyLoopCon);
183
184    PHINode* phiLatestMatchOffset = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
185    phiLatestMatchOffset->addIncoming(phiCarryMatchOffset, processLoopBody);
186    phiLatestMatchOffset->addIncoming(phiCarryMatchOffset, carryBitProcessBlock);
187
188    PHINode* phiRemainingM0Marker = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
189    phiRemainingM0Marker->addIncoming(currentInitM0, processLoopBody);
190    phiRemainingM0Marker->addIncoming(currentInitM0, carryBitProcessBlock);
191
192    vector<PHINode*> outputData;
193    for (int i = 0; i < mStreamSize; i++) {
194        PHINode* outputValue = iBuilder->CreatePHI(iBuilder->getBitBlockType(), 3);
195        outputValue->addIncoming(initSourceData[i], processLoopBody);
196        outputValue->addIncoming(initSourceDataWithCarry[i], carryBitProcessBlock);
197        outputData.push_back(outputValue);
198    }
199//    iBuilder->CreateOr()
200    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(phiRemainingM0Marker, INT64_0), matchCopyLoopBody, matchCopyLoopExit);
201
202    // ---- MatchCopyLoopBody
203    iBuilder->SetInsertPoint(matchCopyLoopBody);
204
205    // Match Offset
206    // M0      0111 1000  - load new match offset
207    // M0      1100 0011  - use carryMatchOffset
208    Value* remainM0ForwardZero = iBuilder->CreateCountForwardZeroes(phiRemainingM0Marker);
209
210    BasicBlock* loadNextMatchOffsetBlock = iBuilder->CreateBasicBlock("loadNextMatchOffsetBlock");
211    BasicBlock* doMatchCopyBlock = iBuilder->CreateBasicBlock("DoMatchCopyBlock");
212
213    iBuilder->CreateLikelyCondBr(
214            iBuilder->CreateOr(iBuilder->CreateICmpEQ(phiLatestMatchOffset, INT64_0),
215                               iBuilder->CreateICmpNE(remainM0ForwardZero, INT64_0)
216            ),
217            loadNextMatchOffsetBlock, doMatchCopyBlock
218    );
219
220    // ---- loadNextMatchOffsetBlock
221    iBuilder->SetInsertPoint(loadNextMatchOffsetBlock);
222    auto matchOffsetRet = this->loadNextMatchOffset(iBuilder);
223    BasicBlock* loadNextMatchOffsetExitBlock = iBuilder->GetInsertBlock();
224    Value* newMatchOffset = matchOffsetRet.first;
225    Value* newMatchOffsetPos = matchOffsetRet.second;
226    iBuilder->setScalarField("currentOffsetMarkerPos", newMatchOffsetPos);
227    iBuilder->setProcessedItemCount("MatchOffsetMarker", newMatchOffsetPos);
228    iBuilder->CreateBr(doMatchCopyBlock);
229
230    // ---- doMatchCopyBlock
231    iBuilder->SetInsertPoint(doMatchCopyBlock);
232
233    PHINode* phiTargetMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
234    phiTargetMatchOffset->addIncoming(phiLatestMatchOffset, matchCopyLoopBody);
235    phiTargetMatchOffset->addIncoming(newMatchOffset, loadNextMatchOffsetExitBlock);
236
237    //
238    // M0            0011 0010
239    // boundary      0000 1000
240    // nextMask      0000 0111
241    // deposit       0011 1000
242    // newM0         0000 0010
243
244    Value* remainStart = iBuilder->CreateShl(INT64_1, remainM0ForwardZero);
245
246    Value* boundaryMarker = iBuilder->CreateAnd(iBuilder->CreateAdd(phiRemainingM0Marker, remainStart), iBuilder->CreateNot(phiRemainingM0Marker));
247
248    Value* nextMask = iBuilder->CreateSub(INT64_0, iBuilder->CreateShl(boundaryMarker, INT64_1));
249    Value* depositMarker = iBuilder->CreateAnd(
250            iBuilder->CreateOr(phiRemainingM0Marker, boundaryMarker),
251            iBuilder->CreateNot(nextMask)
252    );
253    Value* newM0Marker = iBuilder->CreateAnd(phiRemainingM0Marker, nextMask);
254    Value* depositMarkerPopcount = iBuilder->CreatePopcount(depositMarker);
255
256    Value* matchCopyFromStart = iBuilder->CreateSub(iBuilder->CreateAdd(phiCurrentPosition, remainM0ForwardZero), phiTargetMatchOffset);
257    Value* matchCopyFromBlockIndex = iBuilder->CreateUDiv(matchCopyFromStart, SIZE_64);
258
259    Value* matchCopyFromOffset = iBuilder->CreateURem(matchCopyFromStart, SIZE_64);
260    Value* matchCopyFromRemaining = iBuilder->CreateSub(SIZE_64, matchCopyFromOffset);
261    Value* matchCopyFromNextBlockIndex = iBuilder->CreateAdd(matchCopyFromBlockIndex, iBuilder->CreateSelect(iBuilder->CreateICmpULE(depositMarkerPopcount, matchCopyFromRemaining), SIZE_0, SIZE_1));
262
263
264    vector<Value*> pdepSourceData;
265
266    for (int i = 0; i < mStreamSize; i++) {
267        Value* fromPtr = iBuilder->CreateGEP(outputStreamBasePtrs[i], matchCopyFromBlockIndex);
268        Value* fromBlockValue = iBuilder->CreateLoad(fromPtr);
269        // when dataBlockIndex == matchCopyFromBlockIndex, we need to use current output value as input
270        fromBlockValue = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(dataBlockIndex, matchCopyFromBlockIndex), outputData[i], fromBlockValue);
271
272        Value* fromNextPtr = iBuilder->CreateGEP(outputStreamBasePtrs[i], matchCopyFromNextBlockIndex);
273        Value* fromNextBlockValue = iBuilder->CreateLoad(fromNextPtr);
274        fromNextBlockValue = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(dataBlockIndex, matchCopyFromNextBlockIndex), outputData[i], fromNextBlockValue);
275
276
277        Value * allFromValue = iBuilder->CreateOr(
278                iBuilder->CreateLShr(fromBlockValue, iBuilder->simd_fill(mPDEPWidth, matchCopyFromOffset)),
279                iBuilder->CreateShl(fromNextBlockValue, iBuilder->simd_fill(mPDEPWidth, matchCopyFromRemaining))
280        );
281        pdepSourceData.push_back(allFromValue);
282    }
283
284    BasicBlock* doubleSourceDataCon = iBuilder->CreateBasicBlock("doubleSourceDataCon");
285    BasicBlock* doubleSourceDataBody = iBuilder->CreateBasicBlock("doubleSourceDataBody");
286    BasicBlock* doubleSourceDataExit = iBuilder->CreateBasicBlock("doubleSourceDataExit");
287
288    iBuilder->CreateBr(doubleSourceDataCon);
289
290    //
291    // When matchOffset < depositMarkerPopcount, we need to use log2 approach to double the source data
292    // e.g.
293    // Assume that match copy start position is 1, matchOffset is 1, match length is 5
294    //     outputBuffer              a000 0000 0000 0000
295    //     sourceDataBeforeDouble    a000 0000 0000 0000
296    // At this point, only 1 bit of source data is accessable, so it will double the source data 3 times until we have
297    // 1 * 2 ^ 3 = 8 bits accessable
298    //     sourceDataAfterDouble     aaaa aaaa 0000 0000
299    //     outputBuffer(after copy)  aaaa aa00 0000 0000
300    //
301
302    // ---- doubleSourceDataCon
303    iBuilder->SetInsertPoint(doubleSourceDataCon);
304    PHINode* phiSourceDataAccessable = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
305    phiSourceDataAccessable->addIncoming(phiTargetMatchOffset, doMatchCopyBlock);
306    vector<PHINode*> phiPdepSourceData;
307    for (int i = 0; i < mStreamSize; i++) {
308        PHINode* v = iBuilder->CreatePHI(iBuilder->getBitBlockType(), 2);
309        v->addIncoming(pdepSourceData[i], doMatchCopyBlock);
310        phiPdepSourceData.push_back(v);
311    }
312    iBuilder->CreateUnlikelyCondBr(iBuilder->CreateICmpULT(phiSourceDataAccessable, depositMarkerPopcount), doubleSourceDataBody, doubleSourceDataExit);
313
314    // ---- doubleSourceDataBody
315    iBuilder->SetInsertPoint(doubleSourceDataBody);
316    for (int i = 0; i < mStreamSize; i++) {
317        PHINode* v = phiPdepSourceData[i];
318        Value* newValue = iBuilder->CreateOr(v, iBuilder->CreateShl(v, iBuilder->simd_fill(mPDEPWidth, phiSourceDataAccessable)));
319        v->addIncoming(newValue, doubleSourceDataBody);
320    }
321    phiSourceDataAccessable->addIncoming(iBuilder->CreateShl(phiSourceDataAccessable, SIZE_1), doubleSourceDataBody);
322
323    iBuilder->CreateBr(doubleSourceDataCon);
324
325    // ---- doubleSourceDataExit
326    iBuilder->SetInsertPoint(doubleSourceDataExit);
327    // At this point, we can guarantee we have enough data for pdep
328    for (int i = 0; i < mStreamSize; i++) {
329        // Do Match Copy by PDEP
330        Value* allFromValue = phiPdepSourceData[i];
331        Value* newValue = BITBLOCK_0;
332        for (uint64_t j = 0; j < 4; j++) { // For now, we assume bit block type is always <4 * i64>
333            Value* source_field = iBuilder->CreateExtractElement(allFromValue, j);
334            Value * PDEP_field = iBuilder->CreateCall(PDEP_func, {source_field, depositMarker});
335            newValue = iBuilder->CreateInsertElement(newValue, PDEP_field, j);
336        }
337        PHINode* outputValue = outputData[i];
338        Value* newOutputValue = iBuilder->CreateOr(outputValue, newValue);
339        outputValue->addIncoming(newOutputValue, iBuilder->GetInsertBlock());
340    }
341    phiRemainingM0Marker->addIncoming(newM0Marker, iBuilder->GetInsertBlock());
342    phiLatestMatchOffset->addIncoming(phiTargetMatchOffset, iBuilder->GetInsertBlock());
343
344    iBuilder->CreateBr(matchCopyLoopCon);
345
346    // ---- MatchCopyLoopExit
347    iBuilder->SetInsertPoint(matchCopyLoopExit);
348    for (int i = 0; i < mStreamSize; i++) {
349        iBuilder->CreateStore(outputData[i], iBuilder->CreateGEP(outputStreamBasePtrs[i], dataBlockIndex));
350    }
351    Value* hasNewCarryBit = iBuilder->CreateAnd(currentInitM0, iBuilder->CreateShl(INT64_1, iBuilder->getInt64(63)));
352    hasNewCarryBit = iBuilder->CreateICmpNE(hasNewCarryBit, INT64_0);
353    Value* newCarryBit = iBuilder->CreateSelect(hasNewCarryBit, INT64_1, INT64_0);
354    phiCarryBit->addIncoming(newCarryBit, iBuilder->GetInsertBlock());
355
356    phiCarryMatchOffset->addIncoming(iBuilder->CreateSelect(hasNewCarryBit, phiLatestMatchOffset, iBuilder->getSize(0)), iBuilder->GetInsertBlock());
357
358    phiCurrentPosition->addIncoming(iBuilder->CreateAdd(phiCurrentPosition, SIZE_64), iBuilder->GetInsertBlock());
359
360    iBuilder->CreateBr(processLoopCon);
361
362    // ---- ProcessLoopExit
363    iBuilder->SetInsertPoint(processLoopExit);
364    Value * const toProcessItemCount = iBuilder->CreateAdd(processed, itemsToDo);
365    iBuilder->setProcessedItemCount("M0Marker", toProcessItemCount);
366    iBuilder->CreateBr(exitBlock);
367
368    // ---- ExitBlock
369    iBuilder->SetInsertPoint(exitBlock);
370
371
372}
373
374LZ4SwizzledMatchCopyKernel::LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned streamCount, unsigned streamSize, unsigned swizzleFactor, unsigned PDEP_width, std::string name)
375: SegmentOrientedKernel(std::move(name),
376// Inputs
377{
378                                   Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", BoundedRate(0, 1)},
379                                   Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
380                                   Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", RateEqualTo("MatchOffsetMarker")}
381},
382// Outputs
383{},
384// Arguments
385{
386},
387{},
388{
389       Binding(iBuilder->getSizeTy(), "currentOffsetMarkerPos"),
390//       Binding(iBuilder->getSizeTy(), "currentOffsetMarkerPos"),
391})
392, mSwizzleFactor(swizzleFactor)
393, mPDEPWidth(PDEP_width)
394, mStreamSize(streamSize)
395, mStreamCount(streamCount) {
396
397    assert((mSwizzleFactor == (iBuilder->getBitBlockWidth() / PDEP_width)) && "swizzle factor must equal bitBlockWidth / PDEP_width");
398    assert((mPDEPWidth == 64 || mPDEPWidth == 32) && "PDEP width must be 32 or 64");
399    setStride(4 * 1024 * 1024);
400    addAttribute(MustExplicitlyTerminate());
401
402    mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", RateEqualTo("M0Marker"), Swizzled()});
403    mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", RateEqualTo("M0Marker")});
404
405    for (unsigned i = 1; i < streamSize; i++) {
406        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("M0Marker"), Swizzled()});
407        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("M0Marker")});
408    }
409}
410
411}
Note: See TracBrowser for help on using the repository browser.