source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder_new.cpp @ 5921

Last change on this file since 5921 was 5921, checked in by xwa163, 13 months ago
  1. Initial checkin for new approach for lz4 index decoder that always use 4MB buffer
  2. Add test case for new approach (for now test cases will fail when test file is larger than 4MB)
File size: 9.6 KB
Line 
1//
2// Created by wxy325 on 2018/3/16.
3//
4
5#include "lz4_block_decoder_new.h"
6
7#include <kernels/kernel_builder.h>
8#include <iostream>
9#include <string>
10#include <llvm/Support/raw_ostream.h>
11#include <kernels/streamset.h>
12
13using namespace llvm;
14using namespace kernel;
15using namespace std;
16
17namespace kernel{
18
19    LZ4BlockDecoderNewKernel::LZ4BlockDecoderNewKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
20: MultiBlockKernel("LZ4BlockDecoderNewKernel",
21    // Inputs
22    {
23                           Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", FixedRate(1), AlwaysConsume()},
24                           Binding{iBuilder->getStreamSetTy(1, 1), "extender", FixedRate(1), AlwaysConsume()}
25                   },
26    //Outputs
27    {
28        Binding{iBuilder->getStreamSetTy(1, 8), "isCompressed", BoundedRate(0, 1)},
29        Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1)},
30        Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1)}},
31    //Arguments
32    {
33        Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"},
34        Binding{iBuilder->getSizeTy(), "headerSize"}
35    },
36    {},
37    //Internal states:
38    {
39    Binding{iBuilder->getInt1Ty(), "hasSkipHeader"},
40    Binding{iBuilder->getSizeTy(), "previousOffset"},
41    Binding{iBuilder->getInt1Ty(), "reachFinalBlock"},
42
43    Binding{iBuilder->getInt1Ty(), "pendingIsCompressed"},
44    Binding{iBuilder->getInt64Ty(), "pendingBlockStart"},
45    Binding{iBuilder->getInt64Ty(), "pendingBlockEnd"},
46    }) {
47        addAttribute(MustExplicitlyTerminate());
48}
49
50void LZ4BlockDecoderNewKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, Value * const numOfStrides) {
51    // Constant
52    Constant* INT8_0 = iBuilder->getInt8(0);
53    Constant* INT8_1 = iBuilder->getInt8(1);
54    Constant* INT64_0 = iBuilder->getInt64(0);
55
56
57    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
58    BasicBlock * exitBlock = iBuilder->CreateBasicBlock("exit");
59
60    // Skip Header
61    Value* hasSkipHeader = iBuilder->getScalarField("hasSkipHeader");
62    iBuilder->setScalarField("hasSkipHeader", iBuilder->getInt1(true));
63    Value* skipLength = iBuilder->CreateSelect(hasSkipHeader, iBuilder->getSize(0), iBuilder->getScalarField("headerSize"));
64    Value* previousOffset = iBuilder->getScalarField("previousOffset");
65    previousOffset = iBuilder->CreateAdd(skipLength, previousOffset);
66    Value* initBlockStart = iBuilder->getScalarField("pendingBlockStart");
67    Value* initBlockEnd = iBuilder->getScalarField("pendingBlockEnd");
68    Value* initIsCompressed = iBuilder->getScalarField("pendingIsCompressed");
69
70
71    Value* availableItemCount = iBuilder->getAvailableItemCount("byteStream");
72    Value* processedItemCount = iBuilder->getProcessedItemCount("byteStream");
73
74    Value* mIsFinalBlock = iBuilder->CreateICmpEQ(availableItemCount, INT64_0);
75    iBuilder->setTerminationSignal(mIsFinalBlock);
76
77    Value* totalItemCount = iBuilder->CreateAdd(availableItemCount, processedItemCount);
78
79    Value* totalItemCount2 = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
80
81//    iBuilder->CallPrintInt("===totalItemCount2", totalItemCount2);
82
83    BasicBlock* processCon = iBuilder->CreateBasicBlock("process_con");
84    iBuilder->CreateBr(processCon);
85
86    iBuilder->SetInsertPoint(processCon);
87
88    PHINode* phiIsCompressed = iBuilder->CreatePHI(iBuilder->getInt8Ty(), 3);
89    PHINode* phiBlockStart = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
90    PHINode* phiBlockEnd = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
91    PHINode* sOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
92
93    phiIsCompressed->addIncoming(initIsCompressed, entryBlock);
94    phiBlockStart->addIncoming(initBlockStart, entryBlock);
95    phiBlockEnd->addIncoming(initBlockEnd, entryBlock);
96    sOffset->addIncoming(previousOffset, entryBlock);
97
98    // Store Output
99    BasicBlock* storeOutputBlock = iBuilder->CreateBasicBlock("storeOutputBlock");
100    BasicBlock * block_decoder_con = iBuilder->CreateBasicBlock("block_decoder_con_block");
101
102    iBuilder->CreateUnlikelyCondBr(
103            iBuilder->CreateAnd(
104                    iBuilder->CreateAnd(iBuilder->CreateICmpULE(phiBlockEnd, totalItemCount), iBuilder->CreateICmpULE(phiBlockEnd, totalItemCount2)),
105                    iBuilder->CreateNot(iBuilder->CreateICmpEQ(phiBlockEnd, INT64_0))
106            ),
107            storeOutputBlock,
108            block_decoder_con
109    );
110
111    iBuilder->SetInsertPoint(storeOutputBlock);
112    this->appendOutput(iBuilder, phiIsCompressed, phiBlockStart, phiBlockEnd);
113    phiIsCompressed->addIncoming(INT8_0, storeOutputBlock);
114    phiBlockStart->addIncoming(INT64_0, storeOutputBlock);
115    phiBlockEnd->addIncoming(INT64_0, storeOutputBlock);
116    sOffset->addIncoming(sOffset, storeOutputBlock);
117    iBuilder->CreateBr(processCon);
118
119
120    // block decoder entry
121    iBuilder->SetInsertPoint(block_decoder_con);
122
123    BasicBlock * block_decoder_body = iBuilder->CreateBasicBlock("block_decoder_body_block");
124    BasicBlock * block_decoder_exit = iBuilder->CreateBasicBlock("block_decoder_exit_block");
125
126    Value* reachFinalBlock = iBuilder->getScalarField("reachFinalBlock");
127
128    iBuilder->CreateCondBr(
129        iBuilder->CreateAnd(
130            iBuilder->CreateICmpULT(sOffset, totalItemCount),
131            iBuilder->CreateNot(reachFinalBlock)
132        ),
133        block_decoder_body,
134        block_decoder_exit);
135
136    //block_decoder_body
137    iBuilder->SetInsertPoint(block_decoder_body);
138    Value* currentBlockSize = iBuilder->getSize(0);
139    for (size_t i = 0; i < 4; i++) {
140        Value* offset = iBuilder->CreateAdd(sOffset, iBuilder->getSize(i));
141        Value* rawOffset = iBuilder->CreateZExt(this->generateLoadInput(iBuilder, offset), iBuilder->getSizeTy());
142
143        currentBlockSize = iBuilder->CreateAdd(currentBlockSize, iBuilder->CreateShl(rawOffset, iBuilder->getSize(8 * i)));
144    }
145
146    Value* realBlockSize = iBuilder->CreateAnd(currentBlockSize, 0x7fffffff);
147    Value* highestBit = iBuilder->CreateTrunc(iBuilder->CreateLShr(currentBlockSize, 31), iBuilder->getInt1Ty());
148    Value* isCompressed = iBuilder->CreateNot(highestBit);
149
150    Value* isFinalBlock = iBuilder->CreateICmpEQ(realBlockSize, iBuilder->getSize(0));
151    iBuilder->setScalarField("reachFinalBlock", isFinalBlock);
152
153    Value* blockStart = iBuilder->CreateAdd(sOffset, iBuilder->getSize(4));
154    Value* blockEnd = iBuilder->CreateAdd(blockStart, realBlockSize);
155
156    Value* newOffset = sOffset;
157    newOffset = iBuilder->CreateAdd(newOffset, iBuilder->getSize(4)); // Block Size
158    newOffset = iBuilder->CreateAdd(newOffset, realBlockSize); // Block Content
159    newOffset = iBuilder->CreateAdd(
160            newOffset,
161            iBuilder->CreateSelect(
162                    iBuilder->getScalarField("hasBlockChecksum"),
163                    iBuilder->getSize(4),
164                    iBuilder->getSize(0))
165    ); // Block Checksum
166
167    sOffset->addIncoming(newOffset, iBuilder->GetInsertBlock());
168    phiIsCompressed->addIncoming(iBuilder->CreateSelect(isCompressed, INT8_1, INT8_0), iBuilder->GetInsertBlock());
169    phiBlockStart->addIncoming(blockStart, iBuilder->GetInsertBlock());
170    phiBlockEnd->addIncoming(blockEnd, iBuilder->GetInsertBlock());
171    iBuilder->CreateBr(processCon);
172
173    // block_decoder_exit_block
174    iBuilder->SetInsertPoint(block_decoder_exit);
175
176    iBuilder->setScalarField("pendingIsCompressed", phiIsCompressed);
177    iBuilder->setScalarField("pendingBlockStart", phiBlockStart);
178    iBuilder->setScalarField("pendingBlockEnd", phiBlockEnd);
179    iBuilder->setScalarField("previousOffset", sOffset);
180
181    iBuilder->CreateBr(exitBlock);
182    iBuilder->SetInsertPoint(exitBlock);
183}
184
185
186    Value* LZ4BlockDecoderNewKernel::generateLoadInput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* offset) {
187        Value * inputBufferBasePtr = iBuilder->getRawInputPointer("byteStream", iBuilder->getSize(0));
188        Value* targetPtr = iBuilder->CreateGEP(inputBufferBasePtr, offset);
189        return iBuilder->CreateLoad(targetPtr);
190    }
191
192    void LZ4BlockDecoderNewKernel::appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, Value* isCompressed, Value* blockStart, Value* blockEnd) {
193        // Constant
194        this->generateStoreCircularOutput(iBuilder, "isCompressed", iBuilder->getInt8Ty()->getPointerTo(), isCompressed);
195        this->generateStoreCircularOutput(iBuilder, "blockStart", iBuilder->getInt64Ty()->getPointerTo(), blockStart);
196        this->generateStoreCircularOutput(iBuilder, "blockEnd", iBuilder->getInt64Ty()->getPointerTo(), blockEnd);
197    }
198
199    void LZ4BlockDecoderNewKernel::generateStoreCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, const string& outputBufferName, Type* pointerType, Value* value) {
200        Value* offset = iBuilder->getProducedItemCount(outputBufferName);
201
202        size_t inputSize = this->getOutputBufferSize(iBuilder, outputBufferName);
203        Value* offsetMask = iBuilder->getSize(inputSize - 1);
204        Value* maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
205
206        Value* outputBufferPtr = iBuilder->getRawOutputPointer(outputBufferName, iBuilder->getSize(0));
207
208        outputBufferPtr = iBuilder->CreatePointerCast(outputBufferPtr, pointerType);
209        iBuilder->CreateStore(value, iBuilder->CreateGEP(outputBufferPtr, maskedOffset));
210
211        offset = iBuilder->CreateAdd(offset, iBuilder->getSize(1));
212        iBuilder->setProducedItemCount(outputBufferName, offset);
213    }
214
215    size_t LZ4BlockDecoderNewKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, const string& bufferName) {
216//        size_t s = this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks();
217        return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
218    }
219}
Note: See TracBrowser for help on using the repository browser.