source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp @ 6020

Last change on this file since 6020 was 6020, checked in by xwa163, 5 months ago
  1. New version of lz4_swizzled_match_copy kernel with higher performance
  2. Adjust related pipeline code
  3. Remove legacy comments
File size: 20.8 KB
Line 
1
2
3#include "lz4_swizzled_match_copy_kernel.h"
4#include <kernels/kernel_builder.h>
5#include <kernels/streamset.h>
6#include <toolchain/toolchain.h>
7#include <vector>
8#include <llvm/Support/raw_ostream.h>
9
10
11using namespace llvm;
12using namespace std;
13namespace kernel {
14
15Value *LZ4SwizzledMatchCopyKernel::advanceUntilNextBit(const std::unique_ptr<KernelBuilder> &iBuilder, string inputName, Value *startPos, bool isNextOne) {
16    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
17
18    Constant* SIZE_0 = iBuilder->getSize(0);
19    Constant* SIZE_1 = iBuilder->getSize(1);
20    Value* SIZE_64 = iBuilder->getSize(64); // maybe need to handle 32 bit machine
21    Value* SIZE_INPUT_64_COUNT = iBuilder->getSize(this->getInputStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64);
22
23    Value* initCurrentPos = startPos;
24
25    Value* offsetMarkerRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_0), iBuilder->getInt64Ty()->getPointerTo());
26
27    BasicBlock* findNextMatchOffsetConBlock = iBuilder->CreateBasicBlock("findNextMatchOffsetConBlock");
28    BasicBlock* findNextMatchOffsetBodyBlock = iBuilder->CreateBasicBlock("findNextMatchOffsetBodyBlock");
29
30    iBuilder->CreateBr(findNextMatchOffsetConBlock);
31    iBuilder->SetInsertPoint(findNextMatchOffsetConBlock);
32    // Find position marker bit of next 1 bit
33
34    PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
35    phiCurrentPos->addIncoming(initCurrentPos, entryBlock);
36
37    Value* currentPosGlobalBlockIndex = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
38    Value* currentPosLocalBlockIndex = iBuilder->CreateURem(currentPosGlobalBlockIndex, SIZE_INPUT_64_COUNT);
39    Value* currentPosBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
40    Value* currentValue = iBuilder->CreateLoad(iBuilder->CreateGEP(offsetMarkerRawPtr, currentPosLocalBlockIndex));
41
42    Value* countValue = iBuilder->CreateLShr(currentValue, currentPosBlockOffset);
43    if (!isNextOne) {
44        countValue = iBuilder->CreateNot(countValue);
45    }
46    Value* forwardZero = iBuilder->CreateCountForwardZeroes(countValue);
47    Value* realForwardZero = iBuilder->CreateAdd(currentPosBlockOffset, forwardZero);
48
49    // If targetMarker == 0, move to next block, otherwise count forward zero
50    phiCurrentPos->addIncoming(iBuilder->CreateMul(SIZE_64, iBuilder->CreateAdd(currentPosGlobalBlockIndex, SIZE_1)), iBuilder->GetInsertBlock());
51    iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(realForwardZero, SIZE_64), findNextMatchOffsetConBlock, findNextMatchOffsetBodyBlock);
52
53    iBuilder->SetInsertPoint(findNextMatchOffsetBodyBlock);
54
55    Value* newPosition = iBuilder->CreateAdd(iBuilder->CreateMul(currentPosGlobalBlockIndex, SIZE_64), realForwardZero);
56
57    return newPosition;
58}
59
60pair<Value*, Value*> LZ4SwizzledMatchCopyKernel::loadNextMatchOffset(const unique_ptr<KernelBuilder> &iBuilder) {
61    Value* initCurrentPos = iBuilder->CreateAdd(iBuilder->getScalarField("currentOffsetMarkerPos"), iBuilder->getSize(1));
62    Value* newPosition = this->advanceUntilNextBit(iBuilder, "MatchOffsetMarker", initCurrentPos, true);
63
64    // Load Match Offset from newPosition
65    Value* matchOffsetPtr = iBuilder->getRawInputPointer("byteStream", newPosition);
66    // For now, it is safe to cast matchOffset pointer into i16 since the input byte stream is always linear available
67    matchOffsetPtr = iBuilder->CreatePointerCast(matchOffsetPtr, iBuilder->getInt16Ty()->getPointerTo());
68    Value* matchOffset = iBuilder->CreateZExt(iBuilder->CreateLoad(matchOffsetPtr), iBuilder->getSizeTy());
69
70    return std::make_pair(matchOffset, newPosition);
71}
72
73
74void LZ4SwizzledMatchCopyKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
75    // ---- Contant
76    ConstantInt * const SIZE_4_MEGS = iBuilder->getSize(4 * 1024 * 1024);
77    ConstantInt * const SIZE_0 = iBuilder->getSize(0);
78    ConstantInt * const SIZE_1 = iBuilder->getSize(1);
79    ConstantInt * const SIZE_64 = iBuilder->getSize(64);
80    ConstantInt * const INT64_0 = iBuilder->getInt64(0);
81    ConstantInt * const INT64_1 = iBuilder->getInt64(1);
82
83    Value * BITBLOCK_0 = iBuilder->CreateBitCast(ConstantInt::get(iBuilder->getIntNTy(iBuilder->getBitBlockWidth()), 0), iBuilder->getBitBlockType());
84
85    // ---- Type
86    Type* BITBLOCK_TYPE = iBuilder->getBitBlockType();
87    Type* BITBLOCK_PTR_TYPE = BITBLOCK_TYPE->getPointerTo();
88    Type* I64_TY = iBuilder->getInt64Ty();
89    Type* I64_PTR_TY = I64_TY->getPointerTo();
90
91    Value * PDEP_func = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_bmi_pdep_64); //TODO for now only consider 64 bits
92
93    // ---- EntryBlock
94    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
95    BasicBlock * const exitBlock = iBuilder->CreateBasicBlock("exitBlock");
96
97    Value * const available = iBuilder->getAvailableItemCount("sourceStreamSet0");
98    Value * const processed = iBuilder->getProcessedItemCount("sourceStreamSet0");
99
100    Value * const itemsToDo = iBuilder->CreateUMin(iBuilder->CreateSub(available, processed), SIZE_4_MEGS);
101    iBuilder->setTerminationSignal(iBuilder->CreateICmpULT(itemsToDo, SIZE_4_MEGS));
102
103
104    Value* m0MarkerBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("M0Marker", SIZE_0), I64_PTR_TY); // i64*
105    vector<Value*> sourceStreamBasePtrs, outputStreamBasePtrs; // <4 * i64>*
106    for (int i = 0; i < mStreamSize; i++) {
107        sourceStreamBasePtrs.push_back(iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
108        outputStreamBasePtrs.push_back(iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
109    }
110
111
112    BasicBlock * const processLoopCon = iBuilder->CreateBasicBlock("processLoopCon");
113    BasicBlock * const processLoopBody = iBuilder->CreateBasicBlock("processLoopBody");
114    BasicBlock * const processLoopExit = iBuilder->CreateBasicBlock("processLoopExit");
115
116    iBuilder->CreateBr(processLoopCon);
117
118    // ---- ProcessLoopCon
119    // ProcessLoop will process one block of data each time (64bit m0, <4 * i64> input and output data)
120    iBuilder->SetInsertPoint(processLoopCon);
121
122    // carryBit === 0x1 only when the most significant bit of the target M0 block is one, which means the first position of next block need to be deposited (match copy)
123
124    PHINode* phiCarryBit = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
125    PHINode* phiCurrentPosition = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2); // 0~4mb, and all M0 related
126    PHINode* phiCarryMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
127
128    phiCarryBit->addIncoming(INT64_0, entryBlock);
129    phiCurrentPosition->addIncoming(INT64_0, entryBlock);
130    phiCarryMatchOffset->addIncoming(SIZE_0, entryBlock);
131
132    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(phiCurrentPosition, itemsToDo), processLoopBody, processLoopExit);
133
134    // ---- ProcessLoopBody
135    iBuilder->SetInsertPoint(processLoopBody);
136
137    Value* dataBlockIndex = iBuilder->CreateUDiv(phiCurrentPosition, SIZE_64);
138    Value* currentInitM0 = iBuilder->CreateLoad(iBuilder->CreateGEP(m0MarkerBasePtr, dataBlockIndex));
139    vector<Value*> initSourceData;
140    for (int i = 0; i < mStreamSize; i++) {
141        // Because of swizzled form, the sourceStream can be accessed linearly
142        initSourceData.push_back(iBuilder->CreateLoad(iBuilder->CreateGEP(sourceStreamBasePtrs[i], dataBlockIndex)));
143    }
144
145    BasicBlock* carryBitProcessBlock = iBuilder->CreateBasicBlock("CarryBitProcessBlock");
146
147    BasicBlock* matchCopyLoopCon = iBuilder->CreateBasicBlock("MatchCopyLoopCon");
148    BasicBlock* matchCopyLoopBody = iBuilder->CreateBasicBlock("MatchCopyLoopBody");
149    BasicBlock* matchCopyLoopExit = iBuilder->CreateBasicBlock("MatchCopyLoopExit");
150
151    //
152    // The carry bit will need to be processed specially only when
153    // the most significant bit of previous block is 1 (the carry bit is 0x1) and the
154    // least significant bit of current block is 0
155    // e.g.
156    //   Assume the most significant bit is on the right side
157    //
158    //                    i64_1       i64_2
159    //   M0         ... 0000 0011 | 0111 0000 ...  - Carry bit need to be handle specially
160    //   M0         ... 0000 0011 | 1011 0000 ...  - Carry bit will be handle in the loop of i64_2
161    //   Carry Bit                  1000 0000 ...  - 0x1
162
163    Value* needProcessCarryBit = iBuilder->CreateAnd(phiCarryBit, iBuilder->CreateNot(iBuilder->CreateAnd(currentInitM0, iBuilder->getInt64(1))));
164    needProcessCarryBit = iBuilder->CreateICmpNE(needProcessCarryBit, INT64_0);
165
166    iBuilder->CreateUnlikelyCondBr(needProcessCarryBit, carryBitProcessBlock, matchCopyLoopCon);
167
168    // ---- CarryBitProcessBlock
169    iBuilder->SetInsertPoint(carryBitProcessBlock);
170    vector<Value*> initSourceDataWithCarry;
171    Value* carryCopyFromPos = iBuilder->CreateSub(phiCurrentPosition, phiCarryMatchOffset);
172    Value* carryCopyFromBlockIndex = iBuilder->CreateUDiv(carryCopyFromPos, SIZE_64);
173    Value* carryCopyFromOffset = iBuilder->CreateURem(carryCopyFromPos, SIZE_64);
174    for (int i = 0; i < mStreamSize; i++) {
175        Value* v = iBuilder->CreateLoad(iBuilder->CreateGEP(outputStreamBasePtrs[i], carryCopyFromBlockIndex));
176        v = iBuilder->CreateLShr(v, iBuilder->simd_fill(mPDEPWidth, carryCopyFromOffset));
177        v = iBuilder->CreateAnd(v, iBuilder->simd_fill(mPDEPWidth, INT64_1));
178        initSourceDataWithCarry.push_back(iBuilder->CreateOr(v, initSourceData[i]));
179    }
180    iBuilder->CreateBr(matchCopyLoopCon);
181
182    // ---- MatchCopyLoopCon
183    // MatchCopy Loop will handle one continuous data deposit each time
184    iBuilder->SetInsertPoint(matchCopyLoopCon);
185
186    PHINode* phiLatestMatchOffset = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
187    phiLatestMatchOffset->addIncoming(phiCarryMatchOffset, processLoopBody);
188    phiLatestMatchOffset->addIncoming(phiCarryMatchOffset, carryBitProcessBlock);
189
190    PHINode* phiRemainingM0Marker = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
191    phiRemainingM0Marker->addIncoming(currentInitM0, processLoopBody);
192    phiRemainingM0Marker->addIncoming(currentInitM0, carryBitProcessBlock);
193
194    vector<PHINode*> outputData;
195    for (int i = 0; i < mStreamSize; i++) {
196        PHINode* outputValue = iBuilder->CreatePHI(iBuilder->getBitBlockType(), 3);
197        outputValue->addIncoming(initSourceData[i], processLoopBody);
198        outputValue->addIncoming(initSourceDataWithCarry[i], carryBitProcessBlock);
199        outputData.push_back(outputValue);
200    }
201//    iBuilder->CreateOr()
202    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(phiRemainingM0Marker, INT64_0), matchCopyLoopBody, matchCopyLoopExit);
203
204    // ---- MatchCopyLoopBody
205    iBuilder->SetInsertPoint(matchCopyLoopBody);
206
207    // Match Offset
208    // M0      0111 1000  - load new match offset
209    // M0      1100 0011  - use carryMatchOffset
210    Value* remainM0ForwardZero = iBuilder->CreateCountForwardZeroes(phiRemainingM0Marker);
211
212    BasicBlock* loadNextMatchOffsetBlock = iBuilder->CreateBasicBlock("loadNextMatchOffsetBlock");
213    BasicBlock* doMatchCopyBlock = iBuilder->CreateBasicBlock("DoMatchCopyBlock");
214
215    iBuilder->CreateLikelyCondBr(
216            iBuilder->CreateOr(iBuilder->CreateICmpEQ(phiLatestMatchOffset, INT64_0),
217                               iBuilder->CreateICmpNE(remainM0ForwardZero, INT64_0)
218            ),
219            loadNextMatchOffsetBlock, doMatchCopyBlock
220    );
221
222    // ---- loadNextMatchOffsetBlock
223    iBuilder->SetInsertPoint(loadNextMatchOffsetBlock);
224    auto matchOffsetRet = this->loadNextMatchOffset(iBuilder);
225    BasicBlock* loadNextMatchOffsetExitBlock = iBuilder->GetInsertBlock();
226    Value* newMatchOffset = matchOffsetRet.first;
227    Value* newMatchOffsetPos = matchOffsetRet.second;
228    iBuilder->setScalarField("currentOffsetMarkerPos", newMatchOffsetPos);
229    iBuilder->setProcessedItemCount("MatchOffsetMarker", newMatchOffsetPos);
230    iBuilder->CreateBr(doMatchCopyBlock);
231
232    // ---- doMatchCopyBlock
233    iBuilder->SetInsertPoint(doMatchCopyBlock);
234
235    PHINode* phiTargetMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
236    phiTargetMatchOffset->addIncoming(phiLatestMatchOffset, matchCopyLoopBody);
237    phiTargetMatchOffset->addIncoming(newMatchOffset, loadNextMatchOffsetExitBlock);
238
239    //
240    // M0            0011 0010
241    // boundary      0000 1000
242    // nextMask      0000 0111
243    // deposit       0011 1000
244    // newM0         0000 0010
245
246    Value* remainStart = iBuilder->CreateShl(INT64_1, remainM0ForwardZero);
247
248    Value* boundaryMarker = iBuilder->CreateAnd(iBuilder->CreateAdd(phiRemainingM0Marker, remainStart), iBuilder->CreateNot(phiRemainingM0Marker));
249
250    Value* nextMask = iBuilder->CreateSub(INT64_0, iBuilder->CreateShl(boundaryMarker, INT64_1));
251    Value* depositMarker = iBuilder->CreateAnd(
252            iBuilder->CreateOr(phiRemainingM0Marker, boundaryMarker),
253            iBuilder->CreateNot(nextMask)
254    );
255    Value* newM0Marker = iBuilder->CreateAnd(phiRemainingM0Marker, nextMask);
256    Value* depositMarkerPopcount = iBuilder->CreatePopcount(depositMarker);
257
258    Value* matchCopyFromStart = iBuilder->CreateSub(iBuilder->CreateAdd(phiCurrentPosition, remainM0ForwardZero), phiTargetMatchOffset);
259    Value* matchCopyFromBlockIndex = iBuilder->CreateUDiv(matchCopyFromStart, SIZE_64);
260
261    Value* matchCopyFromOffset = iBuilder->CreateURem(matchCopyFromStart, SIZE_64);
262    Value* matchCopyFromRemaining = iBuilder->CreateSub(SIZE_64, matchCopyFromOffset);
263    Value* matchCopyFromNextBlockIndex = iBuilder->CreateAdd(matchCopyFromBlockIndex, iBuilder->CreateSelect(iBuilder->CreateICmpULE(depositMarkerPopcount, matchCopyFromRemaining), SIZE_0, SIZE_1));
264
265
266    vector<Value*> pdepSourceData;
267
268    for (int i = 0; i < mStreamSize; i++) {
269        Value* fromPtr = iBuilder->CreateGEP(outputStreamBasePtrs[i], matchCopyFromBlockIndex);
270        Value* fromBlockValue = iBuilder->CreateLoad(fromPtr);
271        // when dataBlockIndex == matchCopyFromBlockIndex, we need to use current output value as input
272        fromBlockValue = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(dataBlockIndex, matchCopyFromBlockIndex), outputData[i], fromBlockValue);
273
274        Value* fromNextPtr = iBuilder->CreateGEP(outputStreamBasePtrs[i], matchCopyFromNextBlockIndex);
275        Value* fromNextBlockValue = iBuilder->CreateLoad(fromNextPtr);
276        fromNextBlockValue = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(dataBlockIndex, matchCopyFromNextBlockIndex), outputData[i], fromNextBlockValue);
277
278
279        Value * allFromValue = iBuilder->CreateOr(
280                iBuilder->CreateLShr(fromBlockValue, iBuilder->simd_fill(mPDEPWidth, matchCopyFromOffset)),
281                iBuilder->CreateShl(fromNextBlockValue, iBuilder->simd_fill(mPDEPWidth, matchCopyFromRemaining))
282        );
283        pdepSourceData.push_back(allFromValue);
284    }
285
286    BasicBlock* doubleSourceDataCon = iBuilder->CreateBasicBlock("doubleSourceDataCon");
287    BasicBlock* doubleSourceDataBody = iBuilder->CreateBasicBlock("doubleSourceDataBody");
288    BasicBlock* doubleSourceDataExit = iBuilder->CreateBasicBlock("doubleSourceDataExit");
289
290    iBuilder->CreateBr(doubleSourceDataCon);
291
292    //
293    // When matchOffset < depositMarkerPopcount, we need to use log2 approach to double the source data
294    // e.g.
295    // Assume that match copy start position is 1, matchOffset is 1, match length is 5
296    //     outputBuffer              a000 0000 0000 0000
297    //     sourceDataBeforeDouble    a000 0000 0000 0000
298    // At this point, only 1 bit of source data is accessable, so it will double the source data 3 times until we have
299    // 1 * 2 ^ 3 = 8 bits accessable
300    //     sourceDataAfterDouble     aaaa aaaa 0000 0000
301    //     outputBuffer(after copy)  aaaa aa00 0000 0000
302    //
303
304    // ---- doubleSourceDataCon
305    iBuilder->SetInsertPoint(doubleSourceDataCon);
306    PHINode* phiSourceDataAccessable = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
307    phiSourceDataAccessable->addIncoming(phiTargetMatchOffset, doMatchCopyBlock);
308    vector<PHINode*> phiPdepSourceData;
309    for (int i = 0; i < mStreamSize; i++) {
310        PHINode* v = iBuilder->CreatePHI(iBuilder->getBitBlockType(), 2);
311        v->addIncoming(pdepSourceData[i], doMatchCopyBlock);
312        phiPdepSourceData.push_back(v);
313    }
314    iBuilder->CreateUnlikelyCondBr(iBuilder->CreateICmpULT(phiSourceDataAccessable, depositMarkerPopcount), doubleSourceDataBody, doubleSourceDataExit);
315
316    // ---- doubleSourceDataBody
317    iBuilder->SetInsertPoint(doubleSourceDataBody);
318    for (int i = 0; i < mStreamSize; i++) {
319        PHINode* v = phiPdepSourceData[i];
320        Value* newValue = iBuilder->CreateOr(v, iBuilder->CreateShl(v, iBuilder->simd_fill(mPDEPWidth, phiSourceDataAccessable)));
321        v->addIncoming(newValue, doubleSourceDataBody);
322    }
323    phiSourceDataAccessable->addIncoming(iBuilder->CreateShl(phiSourceDataAccessable, SIZE_1), doubleSourceDataBody);
324
325    iBuilder->CreateBr(doubleSourceDataCon);
326
327    // ---- doubleSourceDataExit
328    iBuilder->SetInsertPoint(doubleSourceDataExit);
329    // At this point, we can guarantee we have enough data for pdep
330    for (int i = 0; i < mStreamSize; i++) {
331        // Do Match Copy by PDEP
332        Value* allFromValue = phiPdepSourceData[i];
333        Value* newValue = BITBLOCK_0;
334        for (uint64_t j = 0; j < 4; j++) { // For now, we assume bit block type is always <4 * i64>
335            Value* source_field = iBuilder->CreateExtractElement(allFromValue, j);
336            Value * PDEP_field = iBuilder->CreateCall(PDEP_func, {source_field, depositMarker});
337            newValue = iBuilder->CreateInsertElement(newValue, PDEP_field, j);
338        }
339        PHINode* outputValue = outputData[i];
340        Value* newOutputValue = iBuilder->CreateOr(outputValue, newValue);
341        outputValue->addIncoming(newOutputValue, iBuilder->GetInsertBlock());
342    }
343    phiRemainingM0Marker->addIncoming(newM0Marker, iBuilder->GetInsertBlock());
344    phiLatestMatchOffset->addIncoming(phiTargetMatchOffset, iBuilder->GetInsertBlock());
345
346    iBuilder->CreateBr(matchCopyLoopCon);
347
348    // ---- MatchCopyLoopExit
349    iBuilder->SetInsertPoint(matchCopyLoopExit);
350    for (int i = 0; i < mStreamSize; i++) {
351        iBuilder->CreateStore(outputData[i], iBuilder->CreateGEP(outputStreamBasePtrs[i], dataBlockIndex));
352    }
353    Value* hasNewCarryBit = iBuilder->CreateAnd(currentInitM0, iBuilder->CreateShl(INT64_1, iBuilder->getInt64(63)));
354    hasNewCarryBit = iBuilder->CreateICmpNE(hasNewCarryBit, INT64_0);
355    Value* newCarryBit = iBuilder->CreateSelect(hasNewCarryBit, INT64_1, INT64_0);
356    phiCarryBit->addIncoming(newCarryBit, iBuilder->GetInsertBlock());
357
358    phiCarryMatchOffset->addIncoming(iBuilder->CreateSelect(hasNewCarryBit, phiLatestMatchOffset, iBuilder->getSize(0)), iBuilder->GetInsertBlock());
359
360    phiCurrentPosition->addIncoming(iBuilder->CreateAdd(phiCurrentPosition, SIZE_64), iBuilder->GetInsertBlock());
361
362    iBuilder->CreateBr(processLoopCon);
363
364    // ---- ProcessLoopExit
365    iBuilder->SetInsertPoint(processLoopExit);
366    Value * const toProcessItemCount = iBuilder->CreateAdd(processed, itemsToDo);
367    iBuilder->setProcessedItemCount("M0Marker", toProcessItemCount);
368    iBuilder->CreateBr(exitBlock);
369
370    // ---- ExitBlock
371    iBuilder->SetInsertPoint(exitBlock);
372
373
374}
375
376LZ4SwizzledMatchCopyKernel::LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned streamCount/*=4*/, unsigned streamSize/*=2*/, unsigned swizzleFactor/*=4*/, unsigned PDEP_width/*64*/)
377: SegmentOrientedKernel("LZ4SwizzledMatchCopyKernel",
378// Inputs
379{
380                                   Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", BoundedRate(0, 1)},
381                                   Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
382                                   Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", RateEqualTo("MatchOffsetMarker")}
383},
384// Outputs
385{},
386// Arguments
387{
388},
389{},
390{
391       Binding(iBuilder->getSizeTy(), "currentOffsetMarkerPos"),
392//       Binding(iBuilder->getSizeTy(), "currentOffsetMarkerPos"),
393})
394, mSwizzleFactor(swizzleFactor)
395, mPDEPWidth(PDEP_width)
396, mStreamSize(streamSize)
397, mStreamCount(streamCount) {
398
399    assert((mSwizzleFactor == (iBuilder->getBitBlockWidth() / PDEP_width)) && "swizzle factor must equal bitBlockWidth / PDEP_width");
400    assert((mPDEPWidth == 64 || mPDEPWidth == 32) && "PDEP width must be 32 or 64");
401    setStride(4 * 1024 * 1024);
402    addAttribute(MustExplicitlyTerminate());
403
404    mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", RateEqualTo("M0Marker"), Swizzled()});
405    mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", RateEqualTo("M0Marker")});
406
407    for (unsigned i = 1; i < streamSize; i++) {
408        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("M0Marker"), Swizzled()});
409        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("M0Marker")});
410    }
411}
412
413}
Note: See TracBrowser for help on using the repository browser.