Ignore:
Timestamp:
Apr 24, 2018, 11:28:48 AM (14 months ago)
Author:
nmedfort
Message:

Made lz4 block decoder a segment oriented kernel; cleaned up processing rates.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.cpp

    r5957 r5984  
    1717namespace kernel{
    1818
    19     LZ4BlockDecoderNewKernel::LZ4BlockDecoderNewKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
    20 : MultiBlockKernel("LZ4BlockDecoderNewKernel",
    21     // Inputs
    22     {
    23                            Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", FixedRate(1)},
    24                    },
    25     //Outputs
    26     {
    27         Binding{iBuilder->getStreamSetTy(1, 8), "isCompressed", BoundedRate(0, 1)},
    28         Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1)},
    29         Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1)}},
    30     //Arguments
    31     {
    32         Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"},
    33         Binding{iBuilder->getSizeTy(), "headerSize"},
    34         Binding{iBuilder->getSizeTy(), "fileSize"}
    35     },
    36     {},
    37     //Internal states:
    38     {
    39     Binding{iBuilder->getInt1Ty(), "hasSkipHeader"},
    40     Binding{iBuilder->getSizeTy(), "previousOffset"},
    41     Binding{iBuilder->getInt1Ty(), "reachFinalBlock"},
     19LZ4BlockDecoderNewKernel::LZ4BlockDecoderNewKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
     20: SegmentOrientedKernel("LZ4BlockDecoderNewKernel",
     21// Inputs
     22{
     23    Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"},
     24},
     25//Outputs
     26{
     27    Binding{iBuilder->getStreamSetTy(1, 8), "isCompressed", BoundedRate(0, 1)},
     28    Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", RateEqualTo("isCompressed")},
     29    Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", RateEqualTo("isCompressed")}},
     30//Arguments
     31{
     32    Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"},
     33    Binding{iBuilder->getSizeTy(), "headerSize"},
     34    Binding{iBuilder->getSizeTy(), "fileSize"}
     35},
     36{},
     37//Internal states:
     38{
     39Binding{iBuilder->getInt1Ty(), "hasSkipHeader"},
     40Binding{iBuilder->getSizeTy(), "previousOffset"},
     41Binding{iBuilder->getInt1Ty(), "reachFinalBlock"},
    4242
    43     Binding{iBuilder->getInt1Ty(), "pendingIsCompressed"},
    44     Binding{iBuilder->getInt64Ty(), "pendingBlockStart"},
    45     Binding{iBuilder->getInt64Ty(), "pendingBlockEnd"},
    46     }) {
    47         addAttribute(MustExplicitlyTerminate());
     43Binding{iBuilder->getInt1Ty(), "pendingIsCompressed"},
     44Binding{iBuilder->getInt64Ty(), "pendingBlockStart"},
     45Binding{iBuilder->getInt64Ty(), "pendingBlockEnd"},
     46}) {
     47
    4848}
    4949
    50 void LZ4BlockDecoderNewKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
    51                                                         std::vector<std::string> outputList) {
    52     previousProducedMap.clear();
    53     for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
    54         previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter)));
    55     }
    56 }
     50void LZ4BlockDecoderNewKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    5751
    58 void LZ4BlockDecoderNewKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, Value * const numOfStrides) {
    59     // Constant
    6052    Constant* INT64_0 = iBuilder->getInt64(0);
    6153
    6254    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    63     BasicBlock * exitBlock = iBuilder->CreateBasicBlock("exit");
    64 
    65     this->resetPreviousProducedMap(iBuilder, {"isCompressed", "blockStart", "blockEnd"});
    6655
    6756    // Skip Header
    6857    Value* hasSkipHeader = iBuilder->getScalarField("hasSkipHeader");
    69     iBuilder->setScalarField("hasSkipHeader", iBuilder->getInt1(true));
     58    iBuilder->setScalarField("hasSkipHeader", iBuilder->getTrue());
    7059    Value* skipLength = iBuilder->CreateSelect(hasSkipHeader, iBuilder->getSize(0), iBuilder->getScalarField("headerSize"));
    7160    Value* previousOffset = iBuilder->getScalarField("previousOffset");
     
    7463    Value* initBlockEnd = iBuilder->getScalarField("pendingBlockEnd");
    7564    Value* initIsCompressed = iBuilder->getScalarField("pendingIsCompressed");
    76 
    77 
    78     Value* availableItemCount = iBuilder->getAvailableItemCount("byteStream");
    79     Value* processedItemCount = iBuilder->getProcessedItemCount("byteStream");
    80 
    81     Value* totalItemCount = iBuilder->CreateAdd(availableItemCount, processedItemCount);
    82 
    83     Value* mIsFinalBlock = iBuilder->CreateICmpEQ(totalItemCount, iBuilder->getScalarField("fileSize"));
    84 
    85     iBuilder->setTerminationSignal(mIsFinalBlock);
    86 
    87 
    88     BasicBlock* processCon = iBuilder->CreateBasicBlock("process_con");
     65    Value * availableItemCount = iBuilder->getAvailableItemCount("byteStream");
     66    BasicBlock * processCon = iBuilder->CreateBasicBlock("process_con");
    8967    iBuilder->CreateBr(processCon);
    9068
     
    10785    iBuilder->CreateUnlikelyCondBr(
    10886            iBuilder->CreateAnd(
    109                     iBuilder->CreateICmpULE(phiBlockEnd, totalItemCount),
     87                    iBuilder->CreateICmpULE(phiBlockEnd, availableItemCount),
    11088                    iBuilder->CreateNot(iBuilder->CreateICmpEQ(phiBlockEnd, INT64_0))
    11189            ),
     
    11593
    11694    iBuilder->SetInsertPoint(storeOutputBlock);
    117     this->appendOutput(iBuilder, phiIsCompressed, phiBlockStart, phiBlockEnd);
     95
     96    appendOutput(iBuilder, phiIsCompressed, phiBlockStart, phiBlockEnd);
     97
    11898    phiIsCompressed->addIncoming(iBuilder->getFalse(), storeOutputBlock);
    11999    phiBlockStart->addIncoming(INT64_0, storeOutputBlock);
    120100    phiBlockEnd->addIncoming(INT64_0, storeOutputBlock);
    121101    sOffset->addIncoming(sOffset, storeOutputBlock);
    122 
    123102
    124103    iBuilder->CreateBr(processCon);
     
    131110    BasicBlock * block_decoder_exit = iBuilder->CreateBasicBlock("block_decoder_exit_block");
    132111
    133     Value* reachFinalBlock = iBuilder->getScalarField("reachFinalBlock");
    134 
     112    Value * reachFinalBlock = iBuilder->getScalarField("reachFinalBlock");
    135113    iBuilder->CreateCondBr(
    136114        iBuilder->CreateAnd(
    137             iBuilder->CreateICmpULT(sOffset, totalItemCount),
     115            iBuilder->CreateICmpULT(sOffset, availableItemCount),
    138116            iBuilder->CreateNot(reachFinalBlock)
    139117        ),
     
    145123    Value* currentBlockSize = iBuilder->getSize(0);
    146124    for (size_t i = 0; i < 4; i++) {
    147         Value* offset = iBuilder->CreateAdd(sOffset, iBuilder->getSize(i));
    148         Value* rawOffset = iBuilder->CreateZExt(this->generateLoadInput(iBuilder, offset), iBuilder->getSizeTy());
    149 
    150         currentBlockSize = iBuilder->CreateAdd(currentBlockSize, iBuilder->CreateShl(rawOffset, iBuilder->getSize(8 * i)));
     125        Value * offset = iBuilder->CreateAdd(sOffset, iBuilder->getSize(i));
     126        Value * rawOffset = iBuilder->CreateZExt(generateLoadInput(iBuilder, offset), iBuilder->getSizeTy());
     127        currentBlockSize = iBuilder->CreateOr(currentBlockSize, iBuilder->CreateShl(rawOffset, iBuilder->getSize(8 * i)));
    151128    }
    152129
    153     Value* realBlockSize = iBuilder->CreateAnd(currentBlockSize, 0x7fffffff);
    154     Value* highestBit = iBuilder->CreateTrunc(iBuilder->CreateLShr(currentBlockSize, 31), iBuilder->getInt1Ty());
    155     Value* isCompressed = iBuilder->CreateNot(highestBit);
    156 //    iBuilder->CallPrintInt("----realBlockSize", realBlockSize);
     130    Value * realBlockSize = iBuilder->CreateAnd(currentBlockSize, 0x7fffffff);
    157131
    158     Value* isFinalBlock = iBuilder->CreateICmpEQ(realBlockSize, iBuilder->getSize(0));
     132    Value * isCompressed = iBuilder->CreateNot(currentBlockSize);
     133    isCompressed = iBuilder->CreateLShr(isCompressed, 31);
     134    isCompressed = iBuilder->CreateTrunc(isCompressed, iBuilder->getInt1Ty());
     135
     136    Value * isFinalBlock = iBuilder->CreateICmpEQ(realBlockSize, iBuilder->getSize(0));
    159137    iBuilder->setScalarField("reachFinalBlock", isFinalBlock);
    160138
    161     Value* blockStart = iBuilder->CreateAdd(sOffset, iBuilder->getSize(4));
    162     Value* blockEnd = iBuilder->CreateAdd(blockStart, realBlockSize);
     139    Value * blockStart = iBuilder->CreateAdd(sOffset, iBuilder->getSize(4));
     140    Value * blockEnd = iBuilder->CreateAdd(blockStart, realBlockSize);
    163141
    164     Value* newOffset = sOffset;
     142    Value * newOffset = sOffset;
    165143    newOffset = iBuilder->CreateAdd(newOffset, iBuilder->getSize(4)); // Block Size
    166144    newOffset = iBuilder->CreateAdd(newOffset, realBlockSize); // Block Content
    167     newOffset = iBuilder->CreateAdd(
    168             newOffset,
    169             iBuilder->CreateSelect(
    170                     iBuilder->getScalarField("hasBlockChecksum"),
    171                     iBuilder->getSize(4),
    172                     iBuilder->getSize(0))
    173     ); // Block Checksum
     145    Value * const blockChecksumOffset = iBuilder->CreateSelect(iBuilder->getScalarField("hasBlockChecksum"), iBuilder->getSize(4), iBuilder->getSize(0));
     146    newOffset = iBuilder->CreateAdd(newOffset, blockChecksumOffset);
    174147
    175     sOffset->addIncoming(newOffset, iBuilder->GetInsertBlock());
    176     phiIsCompressed->addIncoming(isCompressed, iBuilder->GetInsertBlock());
    177     phiBlockStart->addIncoming(blockStart, iBuilder->GetInsertBlock());
    178     phiBlockEnd->addIncoming(blockEnd, iBuilder->GetInsertBlock());
     148    sOffset->addIncoming(newOffset, block_decoder_body);
     149    phiIsCompressed->addIncoming(isCompressed, block_decoder_body);
     150    phiBlockStart->addIncoming(blockStart, block_decoder_body);
     151    phiBlockEnd->addIncoming(blockEnd, block_decoder_body);
    179152    iBuilder->CreateBr(processCon);
    180153
    181154    // block_decoder_exit_block
    182155    iBuilder->SetInsertPoint(block_decoder_exit);
    183 
    184156    iBuilder->setScalarField("pendingIsCompressed", phiIsCompressed);
    185157    iBuilder->setScalarField("pendingBlockStart", phiBlockStart);
    186158    iBuilder->setScalarField("pendingBlockEnd", phiBlockEnd);
    187159    iBuilder->setScalarField("previousOffset", sOffset);
    188 
    189     iBuilder->CreateBr(exitBlock);
    190     iBuilder->SetInsertPoint(exitBlock);
     160    iBuilder->setProcessedItemCount("byteStream", availableItemCount);
     161    iBuilder->setTerminationSignal(mIsFinal);
    191162}
    192163
     164void LZ4BlockDecoderNewKernel::appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, Value * const isCompressed, Value * const blockStart, Value * const blockEnd) {
     165    Value * const offset = iBuilder->getProducedItemCount("isCompressed");
     166    generateStoreNumberOutput(iBuilder, "isCompressed", offset, iBuilder->CreateZExt(isCompressed, iBuilder->getInt8Ty()));
     167    generateStoreNumberOutput(iBuilder, "blockStart", offset, blockStart);
     168    generateStoreNumberOutput(iBuilder, "blockEnd", offset, blockEnd);
     169    iBuilder->setProducedItemCount("isCompressed", iBuilder->CreateAdd(offset, iBuilder->getSize(1)));
     170}
    193171
    194     Value* LZ4BlockDecoderNewKernel::generateLoadInput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* offset) {
    195         // The external buffer is always linear accessible, so the GEP here is safe
    196         Value * inputBufferBasePtr = iBuilder->getRawInputPointer("byteStream", iBuilder->getSize(0));
    197         Value* targetPtr = iBuilder->CreateGEP(inputBufferBasePtr, offset);
    198         return iBuilder->CreateLoad(targetPtr);
    199     }
     172Value* LZ4BlockDecoderNewKernel::generateLoadInput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* offset) {
     173    return iBuilder->CreateLoad(iBuilder->getRawInputPointer("byteStream", offset));
     174}
    200175
    201     void LZ4BlockDecoderNewKernel::appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, Value* isCompressed, Value* blockStart, Value* blockEnd) {
    202         // Constant
    203         this->generateStoreNumberOutput(iBuilder, "isCompressed", iBuilder->getInt1Ty()->getPointerTo(), isCompressed);
    204         this->generateStoreNumberOutput(iBuilder, "blockStart", iBuilder->getInt64Ty()->getPointerTo(), blockStart);
    205         this->generateStoreNumberOutput(iBuilder, "blockEnd", iBuilder->getInt64Ty()->getPointerTo(), blockEnd);
    206     }
     176void LZ4BlockDecoderNewKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder, const string &outputBufferName, Value * offset, Value *value) {
     177    iBuilder->CreateStore(value, iBuilder->getRawOutputPointer(outputBufferName, offset));
     178}
    207179
    208     void LZ4BlockDecoderNewKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
    209                                                              const string &outputBufferName, Type *pointerType,
    210                                                              Value *value) {
    211         Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    212         Value* SIZE_ZERO = iBuilder->getSize(0);
    213         Value* SIZE_ONE = iBuilder->getSize(1);
    214 
    215         Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
    216 
    217         Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
    218         Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName);
    219         Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH);
    220 
    221         Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH);
    222 
    223         // i8, [8 x <4 x i64>]*
    224         // i64, [64 x <4 x i64>]*
    225         Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
    226         ptr = iBuilder->CreatePointerCast(ptr, pointerType);
    227         // GEP here is safe
    228         iBuilder->CreateStore(value, iBuilder->CreateGEP(ptr, blockOffset));
    229 
    230         iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
    231     }
    232 
    233     size_t LZ4BlockDecoderNewKernel::getOutputBufferSize(const unique_ptr<KernelBuilder> &iBuilder, const string& bufferName) {
    234 //        size_t s = this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks();
    235         return this->getOutputStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getStride();
    236     }
    237180}
Note: See TracChangeset for help on using the changeset viewer.