source: icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.cpp @ 6111

Last change on this file since 6111 was 6111, checked in by xwa163, 10 months ago
  1. Cleanup LZ4 AIO related kernels
  2. Improve LZ4ParallelByteStreamAIOKernel
  3. Implement simd_cttz
File size: 7.1 KB
Line 
1
2
3#include "lz4_block_decoder.h"
4
5#include <kernels/kernel_builder.h>
6#include <iostream>
7#include <string>
8#include <llvm/Support/raw_ostream.h>
9#include <kernels/streamset.h>
10
11using namespace llvm;
12using namespace kernel;
13using namespace std;
14
15namespace kernel{
16
17LZ4BlockDecoderKernel::LZ4BlockDecoderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, std::string&& kernelName)
18: SegmentOrientedKernel(std::string(kernelName),
19// Inputs
20{
21    Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
22},
23//Outputs
24{
25    Binding{iBuilder->getStreamSetTy(1, 8), "isCompressed", BoundedRate(0, 1)},
26    Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", RateEqualTo("isCompressed")},
27    Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", RateEqualTo("isCompressed")}},
28//Arguments
29{
30    Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"},
31    Binding{iBuilder->getSizeTy(), "headerSize"},
32    Binding{iBuilder->getSizeTy(), "fileSize"}
33},
34{},
35//Internal states:
36{
37Binding{iBuilder->getInt1Ty(), "hasSkipHeader"},
38Binding{iBuilder->getSizeTy(), "previousOffset"},
39Binding{iBuilder->getInt1Ty(), "reachFinalBlock"},
40
41Binding{iBuilder->getInt8Ty(), "pendingIsCompressed"},
42Binding{iBuilder->getInt64Ty(), "pendingBlockStart"},
43Binding{iBuilder->getInt64Ty(), "pendingBlockEnd"},
44}) {
45
46}
47
48void LZ4BlockDecoderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) {
49
50    Constant* INT64_0 = b->getInt64(0);
51
52    BasicBlock * entryBlock = b->GetInsertBlock();
53
54    // Skip Header
55    Value* hasSkipHeader = b->getScalarField("hasSkipHeader");
56    b->setScalarField("hasSkipHeader", b->getTrue());
57    Value* skipLength = b->CreateSelect(hasSkipHeader, b->getSize(0), b->getScalarField("headerSize"));
58    Value* previousOffset = b->getScalarField("previousOffset");
59    previousOffset = b->CreateAdd(skipLength, previousOffset);
60    Value* initBlockStart = b->getScalarField("pendingBlockStart");
61    Value* initBlockEnd = b->getScalarField("pendingBlockEnd");
62    Value* initIsCompressed = b->getScalarField("pendingIsCompressed");
63    Value * availableItemCount = b->getAvailableItemCount("byteStream");
64    BasicBlock * processCon = b->CreateBasicBlock("process_con");
65    b->CreateBr(processCon);
66
67    b->SetInsertPoint(processCon);
68
69    PHINode* phiIsCompressed = b->CreatePHI(initIsCompressed->getType(), 3);
70    PHINode* phiBlockStart = b->CreatePHI(initBlockStart->getType(), 3);
71    PHINode* phiBlockEnd = b->CreatePHI(initBlockEnd->getType(), 3);
72    PHINode* sOffset = b->CreatePHI(previousOffset->getType(), 3);
73
74    phiIsCompressed->addIncoming(initIsCompressed, entryBlock);
75    phiBlockStart->addIncoming(initBlockStart, entryBlock);
76    phiBlockEnd->addIncoming(initBlockEnd, entryBlock);
77    sOffset->addIncoming(previousOffset, entryBlock);
78
79    // Store Output
80    BasicBlock* storeOutputBlock = b->CreateBasicBlock("storeOutputBlock");
81    BasicBlock * block_decoder_con = b->CreateBasicBlock("block_decoder_con_block");
82
83    b->CreateUnlikelyCondBr(
84            b->CreateAnd(
85                    b->CreateICmpULE(phiBlockEnd, availableItemCount),
86                    b->CreateNot(b->CreateICmpEQ(phiBlockEnd, INT64_0))
87            ),
88            storeOutputBlock,
89            block_decoder_con
90    );
91
92    b->SetInsertPoint(storeOutputBlock);
93
94    appendOutput(b, phiIsCompressed, phiBlockStart, phiBlockEnd);
95
96
97    phiIsCompressed->addIncoming(b->getInt8(0), storeOutputBlock);
98    phiBlockStart->addIncoming(INT64_0, storeOutputBlock);
99    phiBlockEnd->addIncoming(INT64_0, storeOutputBlock);
100    sOffset->addIncoming(sOffset, storeOutputBlock);
101
102    b->CreateBr(processCon);
103
104
105    // block decoder entry
106    b->SetInsertPoint(block_decoder_con);
107
108    BasicBlock * block_decoder_body = b->CreateBasicBlock("block_decoder_body_block");
109    BasicBlock * block_decoder_exit = b->CreateBasicBlock("block_decoder_exit_block");
110
111    Value * reachFinalBlock = b->getScalarField("reachFinalBlock");
112    b->CreateCondBr(
113        b->CreateAnd(
114            b->CreateICmpULT(sOffset, availableItemCount),
115            b->CreateNot(reachFinalBlock)
116        ),
117        block_decoder_body,
118        block_decoder_exit);
119
120    //block_decoder_body
121    b->SetInsertPoint(block_decoder_body);
122    Value* currentBlockSize = b->getSize(0);
123    for (size_t i = 0; i < 4; i++) {
124        Value * offset = b->CreateAdd(sOffset, b->getSize(i));
125        Value * rawOffset = b->CreateZExt(generateLoadInput(b, offset), b->getSizeTy());
126        currentBlockSize = b->CreateOr(currentBlockSize, b->CreateShl(rawOffset, b->getSize(8 * i)));
127    }
128
129    Value * realBlockSize = b->CreateAnd(currentBlockSize, 0x7fffffff);
130
131    Value * isCompressed = b->CreateNot(currentBlockSize);
132    isCompressed = b->CreateLShr(isCompressed, 31);
133    isCompressed = b->CreateTrunc(isCompressed, b->getInt1Ty());
134
135    Value * isFinalBlock = b->CreateICmpEQ(realBlockSize, b->getSize(0));
136    b->setScalarField("reachFinalBlock", isFinalBlock);
137
138    Value * blockStart = b->CreateAdd(sOffset, b->getSize(4));
139    Value * blockEnd = b->CreateAdd(blockStart, realBlockSize);
140
141    Value * newOffset = sOffset;
142    newOffset = b->CreateAdd(newOffset, b->getSize(4)); // Block Size
143    newOffset = b->CreateAdd(newOffset, realBlockSize); // Block Content
144    Value * const blockChecksumOffset = b->CreateSelect(b->getScalarField("hasBlockChecksum"), b->getSize(4), b->getSize(0));
145    newOffset = b->CreateAdd(newOffset, blockChecksumOffset);
146
147    sOffset->addIncoming(newOffset, block_decoder_body);
148    phiIsCompressed->addIncoming(b->CreateZExt(isCompressed, b->getInt8Ty()), block_decoder_body);
149    phiBlockStart->addIncoming(blockStart, block_decoder_body);
150    phiBlockEnd->addIncoming(blockEnd, block_decoder_body);
151    b->CreateBr(processCon);
152
153    // block_decoder_exit_block
154    b->SetInsertPoint(block_decoder_exit);
155    b->setScalarField("pendingIsCompressed", phiIsCompressed);
156    b->setScalarField("pendingBlockStart", phiBlockStart);
157    b->setScalarField("pendingBlockEnd", phiBlockEnd);
158    b->setScalarField("previousOffset", sOffset);
159    b->setProcessedItemCount("byteStream", availableItemCount);
160    b->setTerminationSignal(mIsFinal);
161}
162
163void LZ4BlockDecoderKernel::appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, Value * const isCompressed, Value * const blockStart, Value * const blockEnd) {
164    Value * const offset = iBuilder->getProducedItemCount("isCompressed");
165    generateStoreNumberOutput(iBuilder, "isCompressed", offset, iBuilder->CreateZExt(isCompressed, iBuilder->getInt8Ty()));
166    generateStoreNumberOutput(iBuilder, "blockStart", offset, blockStart);
167    generateStoreNumberOutput(iBuilder, "blockEnd", offset, blockEnd);
168    iBuilder->setProducedItemCount("isCompressed", iBuilder->CreateAdd(offset, iBuilder->getSize(1)));
169}
170
171Value* LZ4BlockDecoderKernel::generateLoadInput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* offset) {
172    return iBuilder->CreateLoad(iBuilder->getRawInputPointer("byteStream", offset));
173}
174
175void LZ4BlockDecoderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder, const string &outputBufferName, Value * offset, Value *value) {
176    iBuilder->CreateStore(value, iBuilder->getRawOutputPointer(outputBufferName, offset));
177}
178
179}
Note: See TracBrowser for help on using the repository browser.