source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp @ 5967

Last change on this file since 5967 was 5967, checked in by nmedfort, 15 months ago

Updated LZ4SwizzledMatchCopy + minor changes

File size: 11.2 KB
Line 
1//
2// Created by wxy325 on 2018/3/9.
3//
4
5#include "lz4_swizzled_match_copy_kernel.h"
6#include <kernels/kernel_builder.h>
7
8using namespace llvm;
9
10namespace kernel {
11
12void LZ4SwizzledMatchCopyKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
13
14    ConstantInt * const SIZE_ZERO = iBuilder->getSize(0);
15    ConstantInt * const SIZE_ONE = iBuilder->getSize(1);
16    ConstantInt * const SIZE_PDEP_WIDTH = iBuilder->getSize(mPDEPWidth);
17    ConstantInt * const SIZE_4_MEGS = iBuilder->getSize(4 * 1024 * 1024);
18    ConstantInt * const SIZE_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
19
20    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
21
22    Value * const available = iBuilder->getAvailableItemCount("sourceStreamSet0");
23    Value * const processed = iBuilder->getProcessedItemCount("sourceStreamSet0");
24
25    Value * const itemsToDo = iBuilder->CreateUMin(iBuilder->CreateSub(available, processed), SIZE_4_MEGS);
26
27    iBuilder->setTerminationSignal(iBuilder->CreateICmpULT(itemsToDo, SIZE_4_MEGS));
28
29    Value * previousProducedItemCount = iBuilder->getProducedItemCount("outputStreamSet0");
30
31    // Output Copy
32    generateOutputCopy(iBuilder);
33
34    Value * const toProcessItemCount = iBuilder->CreateAdd(processed, itemsToDo);
35
36    // Match Copy
37    Value * const initM0StartProcessIndex = iBuilder->getProcessedItemCount("m0Start");
38    Value * const totalM0StartItemsCount = iBuilder->getAvailableItemCount("m0Start");
39
40    Value * const initMatchOffset = iBuilder->getScalarField("pendingMatchOffset");
41    Value * const initMatchLength = iBuilder->getScalarField("pendingMatchLength");
42    Value * const initMatchPos = iBuilder->getScalarField("pendingMatchPos");
43
44    BasicBlock * const matchCopyLoopCon = iBuilder->CreateBasicBlock("matchCopyLoopCon");
45    iBuilder->CreateBr(matchCopyLoopCon);
46
47    iBuilder->SetInsertPoint(matchCopyLoopCon);
48    PHINode * const phiProcessIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
49    phiProcessIndex->addIncoming(initM0StartProcessIndex, entryBlock);
50    PHINode * const phiMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
51    phiMatchOffset->addIncoming(initMatchOffset, entryBlock);
52    PHINode * const phiMatchLength = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
53    phiMatchLength->addIncoming(initMatchLength, entryBlock);
54    PHINode * const phiMatchPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
55    phiMatchPos->addIncoming(initMatchPos, entryBlock);
56
57    BasicBlock * const loadNextMatchInfoConBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoConBlock");
58    BasicBlock * const loadNextMatchInfoBodyBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoBodyBlock");
59
60    BasicBlock * const matchCopyConBlock = iBuilder->CreateBasicBlock("matchCopyConBlock");
61    BasicBlock * const matchCopyBodyBlock = iBuilder->CreateBasicBlock("matchCopyBodyBlock");
62
63    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(phiMatchLength, SIZE_ZERO), loadNextMatchInfoConBlock, matchCopyConBlock);
64
65    iBuilder->SetInsertPoint(loadNextMatchInfoConBlock);
66    Value * const hasMoreMatchInfo = iBuilder->CreateICmpULT(phiProcessIndex, totalM0StartItemsCount);
67    BasicBlock * const processExitBlock = iBuilder->CreateBasicBlock("exit_block");
68    iBuilder->CreateCondBr(hasMoreMatchInfo, loadNextMatchInfoBodyBlock, processExitBlock);
69
70    iBuilder->SetInsertPoint(loadNextMatchInfoBodyBlock);
71
72    Value * const newM0Start = loadOffset(iBuilder, "m0Start", phiProcessIndex);
73    Value * const newM0End = loadOffset(iBuilder, "m0End", phiProcessIndex);
74    Value * const newMatchOffset = loadOffset(iBuilder, "matchOffset", phiProcessIndex);
75    Value * const newMatchLength = iBuilder->CreateAdd(iBuilder->CreateSub(newM0End, newM0Start), iBuilder->getInt64(1));
76
77    phiProcessIndex->addIncoming(iBuilder->CreateAdd(phiProcessIndex, SIZE_ONE), loadNextMatchInfoBodyBlock);
78
79    phiMatchPos->addIncoming(newM0Start, loadNextMatchInfoBodyBlock);
80    phiMatchOffset->addIncoming(newMatchOffset, loadNextMatchInfoBodyBlock);
81    phiMatchLength->addIncoming(newMatchLength, loadNextMatchInfoBodyBlock);
82
83    iBuilder->CreateBr(matchCopyLoopCon);
84
85    iBuilder->SetInsertPoint(matchCopyConBlock);
86
87    Value * const hasNotReachEnd = iBuilder->CreateICmpULT(phiMatchPos, toProcessItemCount);
88    iBuilder->CreateCondBr(hasNotReachEnd, matchCopyBodyBlock, processExitBlock);
89
90    iBuilder->SetInsertPoint(matchCopyBodyBlock);
91
92    Value * const matchCopyTargetPos = iBuilder->CreateSub(phiMatchPos, previousProducedItemCount);
93    Value * const matchCopyTargetBlockIndex = iBuilder->CreateUDiv(matchCopyTargetPos, SIZE_BLOCK_WIDTH);
94    Value * const matchCopyTargetStreamIndex = iBuilder->CreateUDiv(iBuilder->CreateURem(matchCopyTargetPos, SIZE_BLOCK_WIDTH), SIZE_PDEP_WIDTH); // should SIZE_PDEP_WIDTH be SIZE_STREAM_COUNT?
95    Value * const matchCopyTargetBlockOffset = iBuilder->CreateURem(phiMatchPos, SIZE_PDEP_WIDTH);
96
97    Value * const matchCopyFromPos = iBuilder->CreateSub(matchCopyTargetPos, phiMatchOffset);
98    Value * const matchCopyFromBlockIndex = iBuilder->CreateUDiv(matchCopyFromPos, SIZE_BLOCK_WIDTH);
99    Value * const matchCopyFromStreamIndex = iBuilder->CreateUDiv(iBuilder->CreateURem(matchCopyFromPos, SIZE_BLOCK_WIDTH), SIZE_PDEP_WIDTH);
100    Value * const matchCopyFromBlockOffset = iBuilder->CreateURem(matchCopyFromPos, SIZE_PDEP_WIDTH);
101
102    Value * currentCopySize = iBuilder->CreateSub(SIZE_PDEP_WIDTH, iBuilder->CreateUMax(matchCopyFromBlockOffset, matchCopyTargetBlockOffset));
103    currentCopySize = iBuilder->CreateUMin(currentCopySize, phiMatchOffset);
104    currentCopySize = iBuilder->CreateUMin(currentCopySize, phiMatchLength);
105    currentCopySize = iBuilder->CreateUMin(currentCopySize, iBuilder->CreateSub(toProcessItemCount, phiMatchPos));
106    currentCopySize = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentCopySize, SIZE_ZERO), SIZE_ONE, currentCopySize); //Workaround for the last byte
107
108    Value * const shiftOffset = iBuilder->CreateAdd(matchCopyFromBlockOffset, currentCopySize);
109    Value * highOffset = iBuilder->CreateShl(SIZE_ONE, shiftOffset);
110    highOffset = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentCopySize, SIZE_PDEP_WIDTH), SIZE_ZERO, highOffset); // When currentCopySize == SIZE_PDEP_WIDTH, shl will overflow
111    Value * const lowOffset = iBuilder->CreateShl(SIZE_ONE, matchCopyFromBlockOffset);
112    Value * const maskVector = iBuilder->simd_fill(mPDEPWidth, iBuilder->CreateSub(highOffset, lowOffset));
113    Value * const fromBlockOffsetVector = iBuilder->simd_fill(mPDEPWidth, matchCopyFromBlockOffset);
114    Value * const targetBlockOffsetVector = iBuilder->simd_fill(mPDEPWidth, matchCopyTargetBlockOffset);
115
116    for (unsigned i = 0; i < mStreamSize; i++) {
117        Value * const matchCopyFromBlockPtr = iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), matchCopyFromStreamIndex, matchCopyFromBlockIndex);
118        Value * const fromBlockValue = iBuilder->CreateBlockAlignedLoad(matchCopyFromBlockPtr);
119
120        Value * const outputTargetBlockPtr = iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), matchCopyTargetStreamIndex, matchCopyTargetBlockIndex);
121        Value * const targetOriginalValue = iBuilder->CreateBlockAlignedLoad(outputTargetBlockPtr);
122
123        Value * copiedValue = iBuilder->simd_and(fromBlockValue, maskVector);
124        copiedValue = iBuilder->CreateLShr(copiedValue, fromBlockOffsetVector);
125        copiedValue = iBuilder->CreateShl(copiedValue, targetBlockOffsetVector);
126        Value * const finalValue = iBuilder->CreateOr(targetOriginalValue, copiedValue);
127
128        iBuilder->CreateStore(finalValue, outputTargetBlockPtr);
129    }
130
131    phiProcessIndex->addIncoming(phiProcessIndex, matchCopyBodyBlock);
132    phiMatchOffset->addIncoming(phiMatchOffset, matchCopyBodyBlock);
133    phiMatchPos->addIncoming(iBuilder->CreateAdd(phiMatchPos, currentCopySize), matchCopyBodyBlock);
134    phiMatchLength->addIncoming(iBuilder->CreateSub(phiMatchLength, currentCopySize), matchCopyBodyBlock);
135
136    iBuilder->CreateBr(matchCopyLoopCon);
137
138    iBuilder->SetInsertPoint(processExitBlock);
139    iBuilder->setScalarField("pendingMatchOffset", phiMatchOffset);
140    iBuilder->setScalarField("pendingMatchLength", phiMatchLength);
141    iBuilder->setScalarField("pendingMatchPos", phiMatchPos);
142    iBuilder->setProcessedItemCount("m0Start", phiProcessIndex);
143    iBuilder->setProcessedItemCount("m0End", phiProcessIndex);
144    iBuilder->setProcessedItemCount("matchOffset", phiProcessIndex);
145    iBuilder->setProcessedItemCount("sourceStreamSet0", toProcessItemCount);
146}
147
148void LZ4SwizzledMatchCopyKernel::generateOutputCopy(const std::unique_ptr<KernelBuilder> & iBuilder) {
149    Constant * SIZE_ZERO = iBuilder->getSize(0);
150    Constant * COPY_BYTES = iBuilder->getSize(4 * 1024 * 1024 * mStreamCount / 8);
151    for (unsigned i = 0; i < mStreamSize; i++) {
152        Value * inputBasePtr = iBuilder->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), SIZE_ZERO);
153        Value * outputBasePtr = iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_ZERO);
154        iBuilder->CreateMemCpy(outputBasePtr, inputBasePtr, COPY_BYTES, 1); // Not align guaranteed in final block
155    }
156}
157
158Value* LZ4SwizzledMatchCopyKernel::loadOffset(const std::unique_ptr<KernelBuilder> & iBuilder, const std::string & bufferName, Value* offset) {
159    return iBuilder->CreateLoad(iBuilder->getRawInputPointer(bufferName, offset));
160}
161
162LZ4SwizzledMatchCopyKernel::LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned streamCount/*=4*/, unsigned streamSize/*=2*/, unsigned swizzleFactor/*=4*/, unsigned PDEP_width/*64*/)
163: SegmentOrientedKernel("LZ4SwizzledMatchCopyKernel",
164// Inputs
165{
166       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), DisableSufficientChecking()},
167       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), DisableSufficientChecking()},
168       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), DisableSufficientChecking()},
169},
170// Outputs
171{},
172// Arguments
173{
174       Binding{iBuilder->getSizeTy(), "fileSize"} //TODO remove
175},
176{},
177{
178       Binding{iBuilder->getSizeTy(), "currentProcessIndex"},
179       Binding{iBuilder->getSizeTy(), "pendingMatchPos"},
180       Binding{iBuilder->getSizeTy(), "pendingMatchOffset"},
181       Binding{iBuilder->getSizeTy(), "pendingMatchLength"},
182})
183, mSwizzleFactor(swizzleFactor)
184, mPDEPWidth(PDEP_width)
185, mStreamSize(streamSize)
186, mStreamCount(streamCount) {
187
188    assert((mSwizzleFactor == (iBuilder->getBitBlockWidth() / PDEP_width)) && "swizzle factor must equal bitBlockWidth / PDEP_width");
189    assert((mPDEPWidth == 64 || mPDEPWidth == 32) && "PDEP width must be 32 or 64");
190    setStride(4 * 1024 * 1024);
191    addAttribute(MustExplicitlyTerminate());
192
193    mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", BoundedRate(0, 1), Swizzled()});
194    mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", RateEqualTo("sourceStreamSet0")});
195
196    for (unsigned i = 1; i < streamSize; i++) {
197        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0"), Swizzled()});
198        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0")});
199    }
200}
201
202}
Note: See TracBrowser for help on using the repository browser.