source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp @ 6066

Last change on this file since 6066 was 6066, checked in by xwa163, 11 months ago

fix some warning in lz4 related kernels

File size: 20.8 KB
Line 
1
2
3#include "lz4_swizzled_match_copy_kernel.h"
4#include <kernels/kernel_builder.h>
5#include <kernels/streamset.h>
6#include <toolchain/toolchain.h>
7#include <vector>
8#include <llvm/Support/raw_ostream.h>
9#include <llvm/IR/Intrinsics.h>
10
11
12using namespace llvm;
13using namespace std;
14namespace kernel {
15
16Value *LZ4SwizzledMatchCopyKernel::advanceUntilNextBit(const std::unique_ptr<KernelBuilder> &iBuilder, string inputName, Value *startPos, bool isNextOne) {
17    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
18
19    Constant* SIZE_0 = iBuilder->getSize(0);
20    Constant* SIZE_1 = iBuilder->getSize(1);
21    Value* SIZE_64 = iBuilder->getSize(64); // maybe need to handle 32 bit machine
22    Value* SIZE_INPUT_64_COUNT = iBuilder->CreateUDiv(iBuilder->getCapacity(inputName), iBuilder->getSize(64));
23    Value* initCurrentPos = startPos;
24    Value* offsetMarkerRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_0), iBuilder->getInt64Ty()->getPointerTo());
25
26    BasicBlock* findNextMatchOffsetConBlock = iBuilder->CreateBasicBlock("findNextMatchOffsetConBlock");
27    BasicBlock* findNextMatchOffsetBodyBlock = iBuilder->CreateBasicBlock("findNextMatchOffsetBodyBlock");
28
29    iBuilder->CreateBr(findNextMatchOffsetConBlock);
30    iBuilder->SetInsertPoint(findNextMatchOffsetConBlock);
31    // Find position marker bit of next 1 bit
32
33    PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
34    phiCurrentPos->addIncoming(initCurrentPos, entryBlock);
35
36    Value* currentPosGlobalBlockIndex = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
37    Value* currentPosLocalBlockIndex = iBuilder->CreateURem(currentPosGlobalBlockIndex, SIZE_INPUT_64_COUNT);
38    Value* currentPosBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
39    Value* currentValue = iBuilder->CreateLoad(iBuilder->CreateGEP(offsetMarkerRawPtr, currentPosLocalBlockIndex));
40
41    Value* countValue = iBuilder->CreateLShr(currentValue, currentPosBlockOffset);
42    if (!isNextOne) {
43        countValue = iBuilder->CreateNot(countValue);
44    }
45    Value* forwardZero = iBuilder->CreateCountForwardZeroes(countValue);
46    Value* realForwardZero = iBuilder->CreateAdd(currentPosBlockOffset, forwardZero);
47
48    // If targetMarker == 0, move to next block, otherwise count forward zero
49    phiCurrentPos->addIncoming(iBuilder->CreateMul(SIZE_64, iBuilder->CreateAdd(currentPosGlobalBlockIndex, SIZE_1)), iBuilder->GetInsertBlock());
50    iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(realForwardZero, SIZE_64), findNextMatchOffsetConBlock, findNextMatchOffsetBodyBlock);
51
52    iBuilder->SetInsertPoint(findNextMatchOffsetBodyBlock);
53
54    Value* newPosition = iBuilder->CreateAdd(iBuilder->CreateMul(currentPosGlobalBlockIndex, SIZE_64), realForwardZero);
55
56    return newPosition;
57}
58
59pair<Value*, Value*> LZ4SwizzledMatchCopyKernel::loadNextMatchOffset(const unique_ptr<KernelBuilder> &iBuilder) {
60    Value* initCurrentPos = iBuilder->CreateAdd(iBuilder->getScalarField("currentOffsetMarkerPos"), iBuilder->getSize(1));
61    Value* newPosition = this->advanceUntilNextBit(iBuilder, "MatchOffsetMarker", initCurrentPos, true);
62
63    // Load Match Offset from newPosition
64    Value* matchOffsetPtr = iBuilder->getRawInputPointer("byteStream", newPosition);
65    // For now, it is safe to cast matchOffset pointer into i16 since the input byte stream is always linear available
66    matchOffsetPtr = iBuilder->CreatePointerCast(matchOffsetPtr, iBuilder->getInt16Ty()->getPointerTo());
67    Value* matchOffset = iBuilder->CreateZExt(iBuilder->CreateLoad(matchOffsetPtr), iBuilder->getSizeTy());
68
69    return std::make_pair(matchOffset, newPosition);
70}
71
72
73void LZ4SwizzledMatchCopyKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
74    // ---- Contant
75    ConstantInt * const SIZE_4_MEGS = iBuilder->getSize(4 * 1024 * 1024);
76    ConstantInt * const SIZE_0 = iBuilder->getSize(0);
77    ConstantInt * const SIZE_1 = iBuilder->getSize(1);
78    ConstantInt * const SIZE_64 = iBuilder->getSize(64);
79    ConstantInt * const INT64_0 = iBuilder->getInt64(0);
80    ConstantInt * const INT64_1 = iBuilder->getInt64(1);
81
82    Value * BITBLOCK_0 = iBuilder->CreateBitCast(ConstantInt::get(iBuilder->getIntNTy(iBuilder->getBitBlockWidth()), 0), iBuilder->getBitBlockType());
83
84    // ---- Type
85    Type* BITBLOCK_TYPE = iBuilder->getBitBlockType();
86    Type* BITBLOCK_PTR_TYPE = BITBLOCK_TYPE->getPointerTo();
87    Type* I64_TY = iBuilder->getInt64Ty();
88    Type* I64_PTR_TY = I64_TY->getPointerTo();
89
90    Value * PDEP_func = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_bmi_pdep_64); //TODO for now only consider 64 bits
91
92    // ---- EntryBlock
93    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
94    BasicBlock * const exitBlock = iBuilder->CreateBasicBlock("exitBlock");
95
96    Value * const available = iBuilder->getAvailableItemCount("sourceStreamSet0");
97    Value * const processed = iBuilder->getProcessedItemCount("sourceStreamSet0");
98
99    Value * const itemsToDo = iBuilder->CreateUMin(iBuilder->CreateSub(available, processed), SIZE_4_MEGS);
100    iBuilder->setTerminationSignal(iBuilder->CreateICmpULT(itemsToDo, SIZE_4_MEGS));
101
102
103    Value* m0MarkerBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("M0Marker", SIZE_0), I64_PTR_TY); // i64*
104    vector<Value*> sourceStreamBasePtrs, outputStreamBasePtrs; // <4 * i64>*
105    for (unsigned i = 0; i < mStreamSize; i++) {
106        sourceStreamBasePtrs.push_back(iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
107        outputStreamBasePtrs.push_back(iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
108    }
109
110
111    BasicBlock * const processLoopCon = iBuilder->CreateBasicBlock("processLoopCon");
112    BasicBlock * const processLoopBody = iBuilder->CreateBasicBlock("processLoopBody");
113    BasicBlock * const processLoopExit = iBuilder->CreateBasicBlock("processLoopExit");
114
115    iBuilder->CreateBr(processLoopCon);
116
117    // ---- ProcessLoopCon
118    // ProcessLoop will process one block of data each time (64bit m0, <4 * i64> input and output data)
119    iBuilder->SetInsertPoint(processLoopCon);
120
121    // carryBit === 0x1 only when the most significant bit of the target M0 block is one, which means the first position of next block need to be deposited (match copy)
122
123    PHINode* phiCarryBit = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
124    PHINode* phiCurrentPosition = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2); // 0~4mb, and all M0 related
125    PHINode* phiCarryMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
126
127    phiCarryBit->addIncoming(INT64_0, entryBlock);
128    phiCurrentPosition->addIncoming(INT64_0, entryBlock);
129    phiCarryMatchOffset->addIncoming(SIZE_0, entryBlock);
130
131    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(phiCurrentPosition, itemsToDo), processLoopBody, processLoopExit);
132
133    // ---- ProcessLoopBody
134    iBuilder->SetInsertPoint(processLoopBody);
135
136    Value* dataBlockIndex = iBuilder->CreateUDiv(phiCurrentPosition, SIZE_64);
137    Value* currentInitM0 = iBuilder->CreateLoad(iBuilder->CreateGEP(m0MarkerBasePtr, dataBlockIndex));
138    vector<Value*> initSourceData;
139    for (unsigned i = 0; i < mStreamSize; i++) {
140        // Because of swizzled form, the sourceStream can be accessed linearly
141        initSourceData.push_back(iBuilder->CreateLoad(iBuilder->CreateGEP(sourceStreamBasePtrs[i], dataBlockIndex)));
142    }
143
144    BasicBlock* carryBitProcessBlock = iBuilder->CreateBasicBlock("CarryBitProcessBlock");
145
146    BasicBlock* matchCopyLoopCon = iBuilder->CreateBasicBlock("MatchCopyLoopCon");
147    BasicBlock* matchCopyLoopBody = iBuilder->CreateBasicBlock("MatchCopyLoopBody");
148    BasicBlock* matchCopyLoopExit = iBuilder->CreateBasicBlock("MatchCopyLoopExit");
149
150    //
151    // The carry bit will need to be processed specially only when
152    // the most significant bit of previous block is 1 (the carry bit is 0x1) and the
153    // least significant bit of current block is 0
154    // e.g.
155    //   Assume the most significant bit is on the right side
156    //
157    //                    i64_1       i64_2
158    //   M0         ... 0000 0011 | 0111 0000 ...  - Carry bit need to be handle specially
159    //   M0         ... 0000 0011 | 1011 0000 ...  - Carry bit will be handle in the loop of i64_2
160    //   Carry Bit                  1000 0000 ...  - 0x1
161
162    Value* needProcessCarryBit = iBuilder->CreateAnd(phiCarryBit, iBuilder->CreateNot(iBuilder->CreateAnd(currentInitM0, iBuilder->getInt64(1))));
163    needProcessCarryBit = iBuilder->CreateICmpNE(needProcessCarryBit, INT64_0);
164
165    iBuilder->CreateUnlikelyCondBr(needProcessCarryBit, carryBitProcessBlock, matchCopyLoopCon);
166
167    // ---- CarryBitProcessBlock
168    iBuilder->SetInsertPoint(carryBitProcessBlock);
169    vector<Value*> initSourceDataWithCarry;
170    Value* carryCopyFromPos = iBuilder->CreateSub(phiCurrentPosition, phiCarryMatchOffset);
171    Value* carryCopyFromBlockIndex = iBuilder->CreateUDiv(carryCopyFromPos, SIZE_64);
172    Value* carryCopyFromOffset = iBuilder->CreateURem(carryCopyFromPos, SIZE_64);
173    for (unsigned i = 0; i < mStreamSize; i++) {
174        Value* v = iBuilder->CreateLoad(iBuilder->CreateGEP(outputStreamBasePtrs[i], carryCopyFromBlockIndex));
175        v = iBuilder->CreateLShr(v, iBuilder->simd_fill(mPDEPWidth, carryCopyFromOffset));
176        v = iBuilder->CreateAnd(v, iBuilder->simd_fill(mPDEPWidth, INT64_1));
177        initSourceDataWithCarry.push_back(iBuilder->CreateOr(v, initSourceData[i]));
178    }
179    iBuilder->CreateBr(matchCopyLoopCon);
180
181    // ---- MatchCopyLoopCon
182    // MatchCopy Loop will handle one continuous data deposit each time
183    iBuilder->SetInsertPoint(matchCopyLoopCon);
184
185    PHINode* phiLatestMatchOffset = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
186    phiLatestMatchOffset->addIncoming(phiCarryMatchOffset, processLoopBody);
187    phiLatestMatchOffset->addIncoming(phiCarryMatchOffset, carryBitProcessBlock);
188
189    PHINode* phiRemainingM0Marker = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
190    phiRemainingM0Marker->addIncoming(currentInitM0, processLoopBody);
191    phiRemainingM0Marker->addIncoming(currentInitM0, carryBitProcessBlock);
192
193    vector<PHINode*> outputData;
194    for (unsigned i = 0; i < mStreamSize; i++) {
195        PHINode* outputValue = iBuilder->CreatePHI(iBuilder->getBitBlockType(), 3);
196        outputValue->addIncoming(initSourceData[i], processLoopBody);
197        outputValue->addIncoming(initSourceDataWithCarry[i], carryBitProcessBlock);
198        outputData.push_back(outputValue);
199    }
200//    iBuilder->CreateOr()
201    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(phiRemainingM0Marker, INT64_0), matchCopyLoopBody, matchCopyLoopExit);
202
203    // ---- MatchCopyLoopBody
204    iBuilder->SetInsertPoint(matchCopyLoopBody);
205
206    // Match Offset
207    // M0      0111 1000  - load new match offset
208    // M0      1100 0011  - use carryMatchOffset
209    Value* remainM0ForwardZero = iBuilder->CreateCountForwardZeroes(phiRemainingM0Marker);
210
211    BasicBlock* loadNextMatchOffsetBlock = iBuilder->CreateBasicBlock("loadNextMatchOffsetBlock");
212    BasicBlock* doMatchCopyBlock = iBuilder->CreateBasicBlock("DoMatchCopyBlock");
213
214    iBuilder->CreateLikelyCondBr(
215            iBuilder->CreateOr(iBuilder->CreateICmpEQ(phiLatestMatchOffset, INT64_0),
216                               iBuilder->CreateICmpNE(remainM0ForwardZero, INT64_0)
217            ),
218            loadNextMatchOffsetBlock, doMatchCopyBlock
219    );
220
221    // ---- loadNextMatchOffsetBlock
222    iBuilder->SetInsertPoint(loadNextMatchOffsetBlock);
223    auto matchOffsetRet = this->loadNextMatchOffset(iBuilder);
224    BasicBlock* loadNextMatchOffsetExitBlock = iBuilder->GetInsertBlock();
225    Value* newMatchOffset = matchOffsetRet.first;
226    Value* newMatchOffsetPos = matchOffsetRet.second;
227    iBuilder->setScalarField("currentOffsetMarkerPos", newMatchOffsetPos);
228    iBuilder->setProcessedItemCount("MatchOffsetMarker", newMatchOffsetPos);
229    iBuilder->CreateBr(doMatchCopyBlock);
230
231    // ---- doMatchCopyBlock
232    iBuilder->SetInsertPoint(doMatchCopyBlock);
233
234    PHINode* phiTargetMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
235    phiTargetMatchOffset->addIncoming(phiLatestMatchOffset, matchCopyLoopBody);
236    phiTargetMatchOffset->addIncoming(newMatchOffset, loadNextMatchOffsetExitBlock);
237
238    //
239    // M0            0011 0010
240    // boundary      0000 1000
241    // nextMask      0000 0111
242    // deposit       0011 1000
243    // newM0         0000 0010
244
245    Value* remainStart = iBuilder->CreateShl(INT64_1, remainM0ForwardZero);
246
247    Value* boundaryMarker = iBuilder->CreateAnd(iBuilder->CreateAdd(phiRemainingM0Marker, remainStart), iBuilder->CreateNot(phiRemainingM0Marker));
248
249    Value* nextMask = iBuilder->CreateSub(INT64_0, iBuilder->CreateShl(boundaryMarker, INT64_1));
250    Value* depositMarker = iBuilder->CreateAnd(
251            iBuilder->CreateOr(phiRemainingM0Marker, boundaryMarker),
252            iBuilder->CreateNot(nextMask)
253    );
254    Value* newM0Marker = iBuilder->CreateAnd(phiRemainingM0Marker, nextMask);
255    Value* depositMarkerPopcount = iBuilder->CreatePopcount(depositMarker);
256
257    Value* matchCopyFromStart = iBuilder->CreateSub(iBuilder->CreateAdd(phiCurrentPosition, remainM0ForwardZero), phiTargetMatchOffset);
258    Value* matchCopyFromBlockIndex = iBuilder->CreateUDiv(matchCopyFromStart, SIZE_64);
259
260    Value* matchCopyFromOffset = iBuilder->CreateURem(matchCopyFromStart, SIZE_64);
261    Value* matchCopyFromRemaining = iBuilder->CreateSub(SIZE_64, matchCopyFromOffset);
262    Value* matchCopyFromNextBlockIndex = iBuilder->CreateAdd(matchCopyFromBlockIndex, iBuilder->CreateSelect(iBuilder->CreateICmpULE(depositMarkerPopcount, matchCopyFromRemaining), SIZE_0, SIZE_1));
263
264
265    vector<Value*> pdepSourceData;
266
267    for (unsigned i = 0; i < mStreamSize; i++) {
268        Value* fromPtr = iBuilder->CreateGEP(outputStreamBasePtrs[i], matchCopyFromBlockIndex);
269        Value* fromBlockValue = iBuilder->CreateLoad(fromPtr);
270        // when dataBlockIndex == matchCopyFromBlockIndex, we need to use current output value as input
271        fromBlockValue = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(dataBlockIndex, matchCopyFromBlockIndex), outputData[i], fromBlockValue);
272
273        Value* fromNextPtr = iBuilder->CreateGEP(outputStreamBasePtrs[i], matchCopyFromNextBlockIndex);
274        Value* fromNextBlockValue = iBuilder->CreateLoad(fromNextPtr);
275        fromNextBlockValue = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(dataBlockIndex, matchCopyFromNextBlockIndex), outputData[i], fromNextBlockValue);
276
277
278        Value * allFromValue = iBuilder->CreateOr(
279                iBuilder->CreateLShr(fromBlockValue, iBuilder->simd_fill(mPDEPWidth, matchCopyFromOffset)),
280                iBuilder->CreateShl(fromNextBlockValue, iBuilder->simd_fill(mPDEPWidth, matchCopyFromRemaining))
281        );
282        pdepSourceData.push_back(allFromValue);
283    }
284
285    BasicBlock* doubleSourceDataCon = iBuilder->CreateBasicBlock("doubleSourceDataCon");
286    BasicBlock* doubleSourceDataBody = iBuilder->CreateBasicBlock("doubleSourceDataBody");
287    BasicBlock* doubleSourceDataExit = iBuilder->CreateBasicBlock("doubleSourceDataExit");
288
289    iBuilder->CreateBr(doubleSourceDataCon);
290
291    //
292    // When matchOffset < depositMarkerPopcount, we need to use log2 approach to double the source data
293    // e.g.
294    // Assume that match copy start position is 1, matchOffset is 1, match length is 5
295    //     outputBuffer              a000 0000 0000 0000
296    //     sourceDataBeforeDouble    a000 0000 0000 0000
297    // At this point, only 1 bit of source data is accessable, so it will double the source data 3 times until we have
298    // 1 * 2 ^ 3 = 8 bits accessable
299    //     sourceDataAfterDouble     aaaa aaaa 0000 0000
300    //     outputBuffer(after copy)  aaaa aa00 0000 0000
301    //
302
303    // ---- doubleSourceDataCon
304    iBuilder->SetInsertPoint(doubleSourceDataCon);
305    PHINode* phiSourceDataAccessable = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
306    phiSourceDataAccessable->addIncoming(phiTargetMatchOffset, doMatchCopyBlock);
307    vector<PHINode*> phiPdepSourceData;
308    for (unsigned i = 0; i < mStreamSize; i++) {
309        PHINode* v = iBuilder->CreatePHI(iBuilder->getBitBlockType(), 2);
310        v->addIncoming(pdepSourceData[i], doMatchCopyBlock);
311        phiPdepSourceData.push_back(v);
312    }
313    iBuilder->CreateUnlikelyCondBr(iBuilder->CreateICmpULT(phiSourceDataAccessable, depositMarkerPopcount), doubleSourceDataBody, doubleSourceDataExit);
314
315    // ---- doubleSourceDataBody
316    iBuilder->SetInsertPoint(doubleSourceDataBody);
317    for (unsigned i = 0; i < mStreamSize; i++) {
318        PHINode* v = phiPdepSourceData[i];
319        Value* newValue = iBuilder->CreateOr(v, iBuilder->CreateShl(v, iBuilder->simd_fill(mPDEPWidth, phiSourceDataAccessable)));
320        v->addIncoming(newValue, doubleSourceDataBody);
321    }
322    phiSourceDataAccessable->addIncoming(iBuilder->CreateShl(phiSourceDataAccessable, SIZE_1), doubleSourceDataBody);
323
324    iBuilder->CreateBr(doubleSourceDataCon);
325
326    // ---- doubleSourceDataExit
327    iBuilder->SetInsertPoint(doubleSourceDataExit);
328    // At this point, we can guarantee we have enough data for pdep
329    for (unsigned i = 0; i < mStreamSize; i++) {
330        // Do Match Copy by PDEP
331        Value* allFromValue = phiPdepSourceData[i];
332        Value* newValue = BITBLOCK_0;
333        for (uint64_t j = 0; j < 4; j++) { // For now, we assume bit block type is always <4 * i64>
334            Value* source_field = iBuilder->CreateExtractElement(allFromValue, j);
335            Value * PDEP_field = iBuilder->CreateCall(PDEP_func, {source_field, depositMarker});
336            newValue = iBuilder->CreateInsertElement(newValue, PDEP_field, j);
337        }
338        PHINode* outputValue = outputData[i];
339        Value* newOutputValue = iBuilder->CreateOr(outputValue, newValue);
340        outputValue->addIncoming(newOutputValue, iBuilder->GetInsertBlock());
341    }
342    phiRemainingM0Marker->addIncoming(newM0Marker, iBuilder->GetInsertBlock());
343    phiLatestMatchOffset->addIncoming(phiTargetMatchOffset, iBuilder->GetInsertBlock());
344
345    iBuilder->CreateBr(matchCopyLoopCon);
346
347    // ---- MatchCopyLoopExit
348    iBuilder->SetInsertPoint(matchCopyLoopExit);
349    for (unsigned i = 0; i < mStreamSize; i++) {
350        iBuilder->CreateStore(outputData[i], iBuilder->CreateGEP(outputStreamBasePtrs[i], dataBlockIndex));
351    }
352    Value* hasNewCarryBit = iBuilder->CreateAnd(currentInitM0, iBuilder->CreateShl(INT64_1, iBuilder->getInt64(63)));
353    hasNewCarryBit = iBuilder->CreateICmpNE(hasNewCarryBit, INT64_0);
354    Value* newCarryBit = iBuilder->CreateSelect(hasNewCarryBit, INT64_1, INT64_0);
355    phiCarryBit->addIncoming(newCarryBit, iBuilder->GetInsertBlock());
356
357    phiCarryMatchOffset->addIncoming(iBuilder->CreateSelect(hasNewCarryBit, phiLatestMatchOffset, iBuilder->getSize(0)), iBuilder->GetInsertBlock());
358
359    phiCurrentPosition->addIncoming(iBuilder->CreateAdd(phiCurrentPosition, SIZE_64), iBuilder->GetInsertBlock());
360
361    iBuilder->CreateBr(processLoopCon);
362
363    // ---- ProcessLoopExit
364    iBuilder->SetInsertPoint(processLoopExit);
365    Value * const toProcessItemCount = iBuilder->CreateAdd(processed, itemsToDo);
366    iBuilder->setProcessedItemCount("M0Marker", toProcessItemCount);
367    iBuilder->CreateBr(exitBlock);
368
369    // ---- ExitBlock
370    iBuilder->SetInsertPoint(exitBlock);
371
372
373}
374
375LZ4SwizzledMatchCopyKernel::LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned streamCount, unsigned streamSize, unsigned swizzleFactor, unsigned PDEP_width, std::string name)
376: SegmentOrientedKernel(std::move(name),
377// Inputs
378{
379                                   Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", BoundedRate(0, 1)},
380                                   Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
381                                   Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", RateEqualTo("MatchOffsetMarker")}
382},
383// Outputs
384{},
385// Arguments
386{
387},
388{},
389{
390       Binding(iBuilder->getSizeTy(), "currentOffsetMarkerPos"),
391//       Binding(iBuilder->getSizeTy(), "currentOffsetMarkerPos"),
392})
393, mSwizzleFactor(swizzleFactor)
394, mPDEPWidth(PDEP_width)
395, mStreamSize(streamSize)
396, mStreamCount(streamCount) {
397
398    assert((mSwizzleFactor == (iBuilder->getBitBlockWidth() / PDEP_width)) && "swizzle factor must equal bitBlockWidth / PDEP_width");
399    assert((mPDEPWidth == 64 || mPDEPWidth == 32) && "PDEP width must be 32 or 64");
400    setStride(4 * 1024 * 1024);
401    addAttribute(MustExplicitlyTerminate());
402
403    mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", RateEqualTo("M0Marker"), Swizzled()});
404    mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", RateEqualTo("M0Marker")});
405
406    for (unsigned i = 1; i < streamSize; i++) {
407        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("M0Marker"), Swizzled()});
408        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("M0Marker")});
409    }
410}
411
412}
Note: See TracBrowser for help on using the repository browser.