source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.cpp @ 5948

Last change on this file since 5948 was 5948, checked in by xwa163, 19 months ago
  1. Remove legacy kernels and codes for lz4
  2. Remove old approach for lz4 decoder
  3. Fixed some bugs of lz4 decoder new approach in large file by adding workaround attribute
  4. Add related test cases
File size: 10.4 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_block_decoder.h"
6
7#include <kernels/kernel_builder.h>
8#include <iostream>
9#include <string>
10#include <llvm/Support/raw_ostream.h>
11#include <kernels/streamset.h>
12
13using namespace llvm;
14using namespace kernel;
15using namespace std;
16
17namespace kernel{
18
19    LZ4BlockDecoderNewKernel::LZ4BlockDecoderNewKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
20: MultiBlockKernel("LZ4BlockDecoderNewKernel",
21    // Inputs
22    {
23                           Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", FixedRate(1)},
24                   },
25    //Outputs
26    {
27        Binding{iBuilder->getStreamSetTy(1, 8), "isCompressed", BoundedRate(0, 1)},
28        Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1)},
29        Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1)}},
30    //Arguments
31    {
32        Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"},
33        Binding{iBuilder->getSizeTy(), "headerSize"},
34        Binding{iBuilder->getSizeTy(), "fileSize"}
35    },
36    {},
37    //Internal states:
38    {
39    Binding{iBuilder->getInt1Ty(), "hasSkipHeader"},
40    Binding{iBuilder->getSizeTy(), "previousOffset"},
41    Binding{iBuilder->getInt1Ty(), "reachFinalBlock"},
42
43    Binding{iBuilder->getInt1Ty(), "pendingIsCompressed"},
44    Binding{iBuilder->getInt64Ty(), "pendingBlockStart"},
45    Binding{iBuilder->getInt64Ty(), "pendingBlockEnd"},
46    }) {
47        addAttribute(MustExplicitlyTerminate());
48}
49
50void LZ4BlockDecoderNewKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
51                                                        std::vector<std::string> outputList) {
52    previousProducedMap.clear();
53    for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
54        previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter)));
55    }
56}
57
58void LZ4BlockDecoderNewKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, Value * const numOfStrides) {
59    // Constant
60    Constant* INT8_0 = iBuilder->getInt8(0);
61    Constant* INT8_1 = iBuilder->getInt8(1);
62    Constant* INT64_0 = iBuilder->getInt64(0);
63
64
65    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
66    BasicBlock * exitBlock = iBuilder->CreateBasicBlock("exit");
67
68    this->resetPreviousProducedMap(iBuilder, {"isCompressed", "blockStart", "blockEnd"});
69
70    // Skip Header
71    Value* hasSkipHeader = iBuilder->getScalarField("hasSkipHeader");
72    iBuilder->setScalarField("hasSkipHeader", iBuilder->getInt1(true));
73    Value* skipLength = iBuilder->CreateSelect(hasSkipHeader, iBuilder->getSize(0), iBuilder->getScalarField("headerSize"));
74    Value* previousOffset = iBuilder->getScalarField("previousOffset");
75    previousOffset = iBuilder->CreateAdd(skipLength, previousOffset);
76    Value* initBlockStart = iBuilder->getScalarField("pendingBlockStart");
77    Value* initBlockEnd = iBuilder->getScalarField("pendingBlockEnd");
78    Value* initIsCompressed = iBuilder->getScalarField("pendingIsCompressed");
79
80
81    Value* availableItemCount = iBuilder->getAvailableItemCount("byteStream");
82    Value* processedItemCount = iBuilder->getProcessedItemCount("byteStream");
83
84    Value* totalItemCount = iBuilder->CreateAdd(availableItemCount, processedItemCount);
85
86    Value* mIsFinalBlock = iBuilder->CreateICmpEQ(totalItemCount, iBuilder->getScalarField("fileSize"));
87
88    iBuilder->setTerminationSignal(mIsFinalBlock);
89
90
91    BasicBlock* processCon = iBuilder->CreateBasicBlock("process_con");
92    iBuilder->CreateBr(processCon);
93
94    iBuilder->SetInsertPoint(processCon);
95
96    PHINode* phiIsCompressed = iBuilder->CreatePHI(iBuilder->getInt8Ty(), 3);
97    PHINode* phiBlockStart = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
98    PHINode* phiBlockEnd = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
99    PHINode* sOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
100
101    phiIsCompressed->addIncoming(initIsCompressed, entryBlock);
102    phiBlockStart->addIncoming(initBlockStart, entryBlock);
103    phiBlockEnd->addIncoming(initBlockEnd, entryBlock);
104    sOffset->addIncoming(previousOffset, entryBlock);
105
106    // Store Output
107    BasicBlock* storeOutputBlock = iBuilder->CreateBasicBlock("storeOutputBlock");
108    BasicBlock * block_decoder_con = iBuilder->CreateBasicBlock("block_decoder_con_block");
109
110    iBuilder->CreateUnlikelyCondBr(
111            iBuilder->CreateAnd(
112                    iBuilder->CreateICmpULE(phiBlockEnd, totalItemCount),
113                    iBuilder->CreateNot(iBuilder->CreateICmpEQ(phiBlockEnd, INT64_0))
114            ),
115            storeOutputBlock,
116            block_decoder_con
117    );
118
119    iBuilder->SetInsertPoint(storeOutputBlock);
120    this->appendOutput(iBuilder, phiIsCompressed, phiBlockStart, phiBlockEnd);
121    phiIsCompressed->addIncoming(INT8_0, storeOutputBlock);
122    phiBlockStart->addIncoming(INT64_0, storeOutputBlock);
123    phiBlockEnd->addIncoming(INT64_0, storeOutputBlock);
124    sOffset->addIncoming(sOffset, storeOutputBlock);
125
126
127    iBuilder->CreateBr(processCon);
128
129
130    // block decoder entry
131    iBuilder->SetInsertPoint(block_decoder_con);
132
133    BasicBlock * block_decoder_body = iBuilder->CreateBasicBlock("block_decoder_body_block");
134    BasicBlock * block_decoder_exit = iBuilder->CreateBasicBlock("block_decoder_exit_block");
135
136    Value* reachFinalBlock = iBuilder->getScalarField("reachFinalBlock");
137
138    iBuilder->CreateCondBr(
139        iBuilder->CreateAnd(
140            iBuilder->CreateICmpULT(sOffset, totalItemCount),
141            iBuilder->CreateNot(reachFinalBlock)
142        ),
143        block_decoder_body,
144        block_decoder_exit);
145
146    //block_decoder_body
147    iBuilder->SetInsertPoint(block_decoder_body);
148    Value* currentBlockSize = iBuilder->getSize(0);
149    for (size_t i = 0; i < 4; i++) {
150        Value* offset = iBuilder->CreateAdd(sOffset, iBuilder->getSize(i));
151        Value* rawOffset = iBuilder->CreateZExt(this->generateLoadInput(iBuilder, offset), iBuilder->getSizeTy());
152
153        currentBlockSize = iBuilder->CreateAdd(currentBlockSize, iBuilder->CreateShl(rawOffset, iBuilder->getSize(8 * i)));
154    }
155
156    Value* realBlockSize = iBuilder->CreateAnd(currentBlockSize, 0x7fffffff);
157    Value* highestBit = iBuilder->CreateTrunc(iBuilder->CreateLShr(currentBlockSize, 31), iBuilder->getInt1Ty());
158    Value* isCompressed = iBuilder->CreateNot(highestBit);
159//    iBuilder->CallPrintInt("----realBlockSize", realBlockSize);
160
161    Value* isFinalBlock = iBuilder->CreateICmpEQ(realBlockSize, iBuilder->getSize(0));
162    iBuilder->setScalarField("reachFinalBlock", isFinalBlock);
163
164    Value* blockStart = iBuilder->CreateAdd(sOffset, iBuilder->getSize(4));
165    Value* blockEnd = iBuilder->CreateAdd(blockStart, realBlockSize);
166
167    Value* newOffset = sOffset;
168    newOffset = iBuilder->CreateAdd(newOffset, iBuilder->getSize(4)); // Block Size
169    newOffset = iBuilder->CreateAdd(newOffset, realBlockSize); // Block Content
170    newOffset = iBuilder->CreateAdd(
171            newOffset,
172            iBuilder->CreateSelect(
173                    iBuilder->getScalarField("hasBlockChecksum"),
174                    iBuilder->getSize(4),
175                    iBuilder->getSize(0))
176    ); // Block Checksum
177
178    sOffset->addIncoming(newOffset, iBuilder->GetInsertBlock());
179    phiIsCompressed->addIncoming(iBuilder->CreateSelect(isCompressed, INT8_1, INT8_0), iBuilder->GetInsertBlock());
180    phiBlockStart->addIncoming(blockStart, iBuilder->GetInsertBlock());
181    phiBlockEnd->addIncoming(blockEnd, iBuilder->GetInsertBlock());
182    iBuilder->CreateBr(processCon);
183
184    // block_decoder_exit_block
185    iBuilder->SetInsertPoint(block_decoder_exit);
186
187    iBuilder->setScalarField("pendingIsCompressed", phiIsCompressed);
188    iBuilder->setScalarField("pendingBlockStart", phiBlockStart);
189    iBuilder->setScalarField("pendingBlockEnd", phiBlockEnd);
190    iBuilder->setScalarField("previousOffset", sOffset);
191
192    iBuilder->CreateBr(exitBlock);
193    iBuilder->SetInsertPoint(exitBlock);
194}
195
196
197    Value* LZ4BlockDecoderNewKernel::generateLoadInput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* offset) {
198        // The external buffer is always linear accessible, so the GEP here is safe
199        Value * inputBufferBasePtr = iBuilder->getRawInputPointer("byteStream", iBuilder->getSize(0));
200        Value* targetPtr = iBuilder->CreateGEP(inputBufferBasePtr, offset);
201        return iBuilder->CreateLoad(targetPtr);
202    }
203
204    void LZ4BlockDecoderNewKernel::appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, Value* isCompressed, Value* blockStart, Value* blockEnd) {
205        // Constant
206        this->generateStoreNumberOutput(iBuilder, "isCompressed", iBuilder->getInt8Ty()->getPointerTo(), isCompressed);
207        this->generateStoreNumberOutput(iBuilder, "blockStart", iBuilder->getInt64Ty()->getPointerTo(), blockStart);
208        this->generateStoreNumberOutput(iBuilder, "blockEnd", iBuilder->getInt64Ty()->getPointerTo(), blockEnd);
209    }
210
211    void LZ4BlockDecoderNewKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
212                                                             const string &outputBufferName, Type *pointerType,
213                                                             Value *value) {
214        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
215        Value* SIZE_ZERO = iBuilder->getSize(0);
216        Value* SIZE_ONE = iBuilder->getSize(1);
217
218        Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
219
220        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
221        Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName);
222        Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH);
223
224        Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH);
225
226        // i8, [8 x <4 x i64>]*
227        // i64, [64 x <4 x i64>]*
228        Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
229        ptr = iBuilder->CreatePointerCast(ptr, pointerType);
230        // GEP here is safe
231        iBuilder->CreateStore(value, iBuilder->CreateGEP(ptr, blockOffset));
232
233        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
234    }
235
236    size_t LZ4BlockDecoderNewKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, const string& bufferName) {
237//        size_t s = this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks();
238        return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
239    }
240}
Note: See TracBrowser for help on using the repository browser.