source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.cpp @ 6111

Last change on this file since 6111 was 6111, checked in by xwa163, 11 months ago
  1. Cleanup LZ4 AIO related kernels
  2. Improve LZ4ParallelByteStreamAIOKernel
  3. Implement simd_cttz
File size: 7.1 KB
RevLine 
[5864]1
[6020]2
[5864]3#include "lz4_block_decoder.h"
4
5#include <kernels/kernel_builder.h>
6#include <iostream>
7#include <string>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/streamset.h>
10
11using namespace llvm;
12using namespace kernel;
13using namespace std;
14
15namespace kernel{
16
[6111]17LZ4BlockDecoderKernel::LZ4BlockDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, std::string&& kernelName)
[6026]18: SegmentOrientedKernel(std::string(kernelName),
[5984]19// Inputs
20{
21    Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
22},
23//Outputs
24{
25    Binding{iBuilder->getStreamSetTy(1, 8), "isCompressed", BoundedRate(0, 1)},
26    Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", RateEqualTo("isCompressed")},
27    Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", RateEqualTo("isCompressed")}},
28//Arguments
29{
30    Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"},
31    Binding{iBuilder->getSizeTy(), "headerSize"},
32    Binding{iBuilder->getSizeTy(), "fileSize"}
33},
34{},
35//Internal states:
36{
37Binding{iBuilder->getInt1Ty(), "hasSkipHeader"},
38Binding{iBuilder->getSizeTy(), "previousOffset"},
39Binding{iBuilder->getInt1Ty(), "reachFinalBlock"},
[5948]40
[6077]41Binding{iBuilder->getInt8Ty(), "pendingIsCompressed"},
[5984]42Binding{iBuilder->getInt64Ty(), "pendingBlockStart"},
43Binding{iBuilder->getInt64Ty(), "pendingBlockEnd"},
44}) {
[5864]45
[5948]46}
[5864]47
[6111]48void LZ4BlockDecoderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) {
[5984]49
[6081]50    Constant* INT64_0 = b->getInt64(0);
[5864]51
[6081]52    BasicBlock * entryBlock = b->GetInsertBlock();
[5948]53
54    // Skip Header
[6081]55    Value* hasSkipHeader = b->getScalarField("hasSkipHeader");
56    b->setScalarField("hasSkipHeader", b->getTrue());
57    Value* skipLength = b->CreateSelect(hasSkipHeader, b->getSize(0), b->getScalarField("headerSize"));
58    Value* previousOffset = b->getScalarField("previousOffset");
59    previousOffset = b->CreateAdd(skipLength, previousOffset);
60    Value* initBlockStart = b->getScalarField("pendingBlockStart");
61    Value* initBlockEnd = b->getScalarField("pendingBlockEnd");
62    Value* initIsCompressed = b->getScalarField("pendingIsCompressed");
63    Value * availableItemCount = b->getAvailableItemCount("byteStream");
64    BasicBlock * processCon = b->CreateBasicBlock("process_con");
65    b->CreateBr(processCon);
[5864]66
[6081]67    b->SetInsertPoint(processCon);
[5948]68
[6081]69    PHINode* phiIsCompressed = b->CreatePHI(initIsCompressed->getType(), 3);
70    PHINode* phiBlockStart = b->CreatePHI(initBlockStart->getType(), 3);
71    PHINode* phiBlockEnd = b->CreatePHI(initBlockEnd->getType(), 3);
72    PHINode* sOffset = b->CreatePHI(previousOffset->getType(), 3);
[5948]73
74    phiIsCompressed->addIncoming(initIsCompressed, entryBlock);
75    phiBlockStart->addIncoming(initBlockStart, entryBlock);
76    phiBlockEnd->addIncoming(initBlockEnd, entryBlock);
77    sOffset->addIncoming(previousOffset, entryBlock);
78
79    // Store Output
[6081]80    BasicBlock* storeOutputBlock = b->CreateBasicBlock("storeOutputBlock");
81    BasicBlock * block_decoder_con = b->CreateBasicBlock("block_decoder_con_block");
[5948]82
[6081]83    b->CreateUnlikelyCondBr(
84            b->CreateAnd(
85                    b->CreateICmpULE(phiBlockEnd, availableItemCount),
86                    b->CreateNot(b->CreateICmpEQ(phiBlockEnd, INT64_0))
[5948]87            ),
88            storeOutputBlock,
89            block_decoder_con
90    );
91
[6081]92    b->SetInsertPoint(storeOutputBlock);
[5984]93
[6081]94    appendOutput(b, phiIsCompressed, phiBlockStart, phiBlockEnd);
[5984]95
[6077]96
[6081]97    phiIsCompressed->addIncoming(b->getInt8(0), storeOutputBlock);
[5948]98    phiBlockStart->addIncoming(INT64_0, storeOutputBlock);
99    phiBlockEnd->addIncoming(INT64_0, storeOutputBlock);
100    sOffset->addIncoming(sOffset, storeOutputBlock);
101
[6081]102    b->CreateBr(processCon);
[5948]103
104
105    // block decoder entry
[6081]106    b->SetInsertPoint(block_decoder_con);
[5864]107
[6081]108    BasicBlock * block_decoder_body = b->CreateBasicBlock("block_decoder_body_block");
109    BasicBlock * block_decoder_exit = b->CreateBasicBlock("block_decoder_exit_block");
[5864]110
[6081]111    Value * reachFinalBlock = b->getScalarField("reachFinalBlock");
112    b->CreateCondBr(
113        b->CreateAnd(
114            b->CreateICmpULT(sOffset, availableItemCount),
115            b->CreateNot(reachFinalBlock)
[5864]116        ),
117        block_decoder_body,
118        block_decoder_exit);
119
120    //block_decoder_body
[6081]121    b->SetInsertPoint(block_decoder_body);
122    Value* currentBlockSize = b->getSize(0);
[5864]123    for (size_t i = 0; i < 4; i++) {
[6081]124        Value * offset = b->CreateAdd(sOffset, b->getSize(i));
125        Value * rawOffset = b->CreateZExt(generateLoadInput(b, offset), b->getSizeTy());
126        currentBlockSize = b->CreateOr(currentBlockSize, b->CreateShl(rawOffset, b->getSize(8 * i)));
[5864]127    }
128
[6081]129    Value * realBlockSize = b->CreateAnd(currentBlockSize, 0x7fffffff);
[5864]130
[6081]131    Value * isCompressed = b->CreateNot(currentBlockSize);
132    isCompressed = b->CreateLShr(isCompressed, 31);
133    isCompressed = b->CreateTrunc(isCompressed, b->getInt1Ty());
[5984]134
[6081]135    Value * isFinalBlock = b->CreateICmpEQ(realBlockSize, b->getSize(0));
136    b->setScalarField("reachFinalBlock", isFinalBlock);
[5864]137
[6081]138    Value * blockStart = b->CreateAdd(sOffset, b->getSize(4));
139    Value * blockEnd = b->CreateAdd(blockStart, realBlockSize);
[5864]140
[5984]141    Value * newOffset = sOffset;
[6081]142    newOffset = b->CreateAdd(newOffset, b->getSize(4)); // Block Size
143    newOffset = b->CreateAdd(newOffset, realBlockSize); // Block Content
144    Value * const blockChecksumOffset = b->CreateSelect(b->getScalarField("hasBlockChecksum"), b->getSize(4), b->getSize(0));
145    newOffset = b->CreateAdd(newOffset, blockChecksumOffset);
[5864]146
[5984]147    sOffset->addIncoming(newOffset, block_decoder_body);
[6081]148    phiIsCompressed->addIncoming(b->CreateZExt(isCompressed, b->getInt8Ty()), block_decoder_body);
[5984]149    phiBlockStart->addIncoming(blockStart, block_decoder_body);
150    phiBlockEnd->addIncoming(blockEnd, block_decoder_body);
[6081]151    b->CreateBr(processCon);
[5864]152
153    // block_decoder_exit_block
[6081]154    b->SetInsertPoint(block_decoder_exit);
155    b->setScalarField("pendingIsCompressed", phiIsCompressed);
156    b->setScalarField("pendingBlockStart", phiBlockStart);
157    b->setScalarField("pendingBlockEnd", phiBlockEnd);
158    b->setScalarField("previousOffset", sOffset);
159    b->setProcessedItemCount("byteStream", availableItemCount);
160    b->setTerminationSignal(mIsFinal);
[5984]161}
[5864]162
[6111]163void LZ4BlockDecoderKernel::appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, Value * const isCompressed, Value * const blockStart, Value * const blockEnd) {
[5984]164    Value * const offset = iBuilder->getProducedItemCount("isCompressed");
165    generateStoreNumberOutput(iBuilder, "isCompressed", offset, iBuilder->CreateZExt(isCompressed, iBuilder->getInt8Ty()));
166    generateStoreNumberOutput(iBuilder, "blockStart", offset, blockStart);
167    generateStoreNumberOutput(iBuilder, "blockEnd", offset, blockEnd);
168    iBuilder->setProducedItemCount("isCompressed", iBuilder->CreateAdd(offset, iBuilder->getSize(1)));
[5864]169}
170
[6111]171Value* LZ4BlockDecoderKernel::generateLoadInput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* offset) {
[5984]172    return iBuilder->CreateLoad(iBuilder->getRawInputPointer("byteStream", offset));
173}
[5864]174
[6111]175void LZ4BlockDecoderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder, const string &outputBufferName, Value * offset, Value *value) {
[5984]176    iBuilder->CreateStore(value, iBuilder->getRawOutputPointer(outputBufferName, offset));
177}
[5864]178
[5957]179}
Note: See TracBrowser for help on using the repository browser.