Changeset 6148


Ignore:
Timestamp:
Aug 14, 2018, 5:10:07 PM (6 weeks ago)
Author:
xwa163
Message:
  1. UTF-8 LZ4 Grep: Avoid decompressing U8NonFinal when target regex does not contain unicode
  2. Reduce data loading and branching in lz4_bytestream_decompression
Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/decompression/lz4_bytestream_decompression.cpp

    r6136 r6148  
    7070
    7171        Value* remBufferSize = b->CreateSub(LZ4_BLOCK_SIZE, b->CreateAdd(phiCopiedLength, outputPosRemBlockSize));
    72         Value* fullCopy = b->CreateICmpULE(SIZE_FW_BYTE, remBufferSize);
    73 
    74 
    75         BasicBlock* fullCopyBlock = b->CreateBasicBlock("fullCopyBlock");
    76         BasicBlock* partCopyBlock = b->CreateBasicBlock("partCopyBlock");
    77 
    78         BasicBlock* literalCopyEnd = b->CreateBasicBlock("literalCopyEnd");
    79 
    80         b->CreateLikelyCondBr(fullCopy, fullCopyBlock, partCopyBlock);
    81 
    82         // ---- fullCopyBlock
    83         b->SetInsertPoint(fullCopyBlock);
    84         b->CreateStore(b->CreateLoad(phiInputPtr), phiOutputPtr);
    85         b->CreateBr(literalCopyEnd);
    86         // ---- partCopyBlock
    87         b->SetInsertPoint(partCopyBlock);
    88         Value* oldOutputValue = b->CreateLoad(phiOutputPtr);
     72
     73
    8974        Value* inputValue = b->CreateLoad(phiInputPtr);
    9075
     
    9580                INT_FW_1
    9681        );
     82        Value* fullCopy = b->CreateICmpEQ(actualCopyLength, SIZE_FW_BYTE);
    9783        mask = b->CreateSelect(
    98                 b->CreateICmpEQ(actualCopyLength, SIZE_FW_BYTE),
     84                fullCopy,
    9985                b->CreateNot(b->getIntN(COPY_FW, 0)),
    10086                mask
    10187        );
    10288
     89        Value* exceedValue = b->CreateSelect(
     90                fullCopy,
     91                b->getIntN(COPY_FW, 0),
     92                b->CreateShl(this->oldOutputExceedFwData, b->CreateMul(actualCopyLength, b->getIntN(COPY_FW, 8)))
     93        );
     94
    10395        Value* actualOutput = b->CreateOr(
    10496                b->CreateAnd(inputValue, mask),
    105                 b->CreateAnd(oldOutputValue, b->CreateNot(mask))
     97                exceedValue
    10698        );
    10799
    108100        b->CreateStore(actualOutput, phiOutputPtr);
    109 
    110         b->CreateBr(literalCopyEnd);
    111         // ---- literalCopyEnd
    112         b->SetInsertPoint(literalCopyEnd);
    113 
    114101
    115102        phiInputPtr->addIncoming(b->CreateGEP(phiInputPtr, b->getSize(1)), b->GetInsertBlock());
     
    125112    void LZ4ByteStreamDecompressionKernel::doMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value *matchOffset,
    126113                                             llvm::Value *matchLength) {
    127 
    128114        Value* LZ4_BLOCK_SIZE = b->getSize(mBlockSize);
    129115        Type* INT_FW_PTR = b->getIntNTy(COPY_FW)->getPointerTo();
     
    131117        Value* SIZE_FW_BYTE = b->getSize(COPY_FW / BYTE_WIDTH);
    132118
    133 
    134119        BasicBlock* entryBlock = b->GetInsertBlock();
    135120
     
    138123        Value* outputPosRemBlockSize = b->CreateURem(outputPos, b->getSize(mBlockSize));
    139124
    140         Value* copyToPtr = b->getRawOutputPointer("outputStream", b->CreateURem(outputPos, outputBufferSize));
     125        Value* outputPosRem = b->CreateURem(outputPos, outputBufferSize);
     126        Value* copyToPtr = b->getRawOutputPointer("outputStream", outputPosRem);
     127
    141128        Value* copyFromPtr = b->getRawOutputPointer("outputStream", b->CreateURem(b->CreateSub(outputPos, matchOffset), outputBufferSize));
    142129
     
    162149
    163150        Value* remBufferSize = b->CreateSub(LZ4_BLOCK_SIZE, b->CreateAdd(phiCopiedSize, outputPosRemBlockSize));
    164         Value* fullCopy = b->CreateICmpULE(SIZE_FW_BYTE, remBufferSize);
    165151        Value* copyFromFwPtr = b->CreatePointerCast(phiFromPtr, INT_FW_PTR);
    166152        Value* copyToFwPtr = b->CreatePointerCast(phiToPtr, INT_FW_PTR);
    167153
    168         BasicBlock* fullMatchCopyBlock = b->CreateBasicBlock("fullMatchCopyBlock");
    169         BasicBlock* partMatchCopyBlock = b->CreateBasicBlock("partMatchCopyBlock");
    170         BasicBlock* matchCopyEndBlock = b->CreateBasicBlock("matchCopyEndBlock");
    171 
    172         b->CreateLikelyCondBr(fullCopy, fullMatchCopyBlock, partMatchCopyBlock);
    173 
    174         // ---- fullMatchCopyBlock
    175         b->SetInsertPoint(fullMatchCopyBlock);
    176         b->CreateStore(b->CreateLoad(copyFromFwPtr), copyToFwPtr);
    177         b->CreateBr(matchCopyEndBlock);
    178 
    179         // ---- partMatchCopyBlock
    180         b->SetInsertPoint(partMatchCopyBlock);
    181         Value* oldOutputValue = b->CreateLoad(copyToFwPtr);
    182154        Value* actualCopyLength = b->CreateUMin(SIZE_FW_BYTE, remBufferSize);
    183155        Value* mask = b->CreateSub(
     
    185157                INT_FW_1
    186158        );
     159
     160        Value* fullCopy = b->CreateICmpEQ(actualCopyLength, SIZE_FW_BYTE);
    187161        mask = b->CreateSelect(
    188                 b->CreateICmpEQ(actualCopyLength, SIZE_FW_BYTE),
     162                fullCopy,
    189163                b->CreateNot(b->getIntN(COPY_FW, 0)),
    190164                mask
    191165        );
     166        Value* exceedValue = b->CreateSelect(
     167                fullCopy,
     168                b->getIntN(COPY_FW, 0),
     169                b->CreateShl(this->oldOutputExceedFwData, b->CreateMul(actualCopyLength, b->getIntN(COPY_FW, 8)))
     170        );
     171
    192172
    193173        Value* actualOutput = b->CreateOr(
    194174                b->CreateAnd(b->CreateLoad(copyFromFwPtr), mask),
    195                 b->CreateAnd(oldOutputValue, b->CreateNot(mask))
     175                exceedValue
    196176        );
    197177
     
    200180                copyToFwPtr
    201181        );
    202 
    203         b->CreateBr(matchCopyEndBlock);
    204 
    205         // ---- matchCopyEndBlock
    206         b->SetInsertPoint(matchCopyEndBlock);
    207 
    208182
    209183        Value* copySize = b->CreateUMin(matchOffset, b->getSize(COPY_FW / 8));
     
    242216        b->CreateMemCpy(temporayInputPtr, b->CreateGEP(rawInputPtr, blockStartRem), copySize1, 1);
    243217        b->CreateMemCpy(b->CreateGEP(temporayInputPtr, copySize1), rawInputPtr, copySize2, 1);
     218
     219
     220        Value* outputPos = b->getScalarField("outputPos");
     221        Value* outputBufferSize = b->getCapacity("outputStream");
     222        Value* outputPosRem = b->CreateURem(outputPos, outputBufferSize);
     223        Value* LZ4_BLOCK_SIZE = b->getSize(mBlockSize);
     224        Type* INT_FW_PTR = b->getIntNTy(COPY_FW)->getPointerTo();
     225        Value* outputEndPtr = b->CreateGEP(b->getRawOutputPointer("outputStream", b->getSize(0)), b->CreateMul(b->CreateUDiv(b->CreateAdd(outputPosRem, LZ4_BLOCK_SIZE), LZ4_BLOCK_SIZE), LZ4_BLOCK_SIZE));
     226        // Only load old output exceed data once for every LZ4 block
     227        this->oldOutputExceedFwData = b->CreateLoad(b->CreatePointerCast(outputEndPtr, INT_FW_PTR));
     228
    244229    }
    245230
  • icGREP/icgrep-devel/icgrep/kernels/lz4/decompression/lz4_bytestream_decompression.h

    r6136 r6148  
    2626        inline std::string getCopyByteStreamName();
    2727        bool mCopyOtherByteStream;
    28 
     28        llvm::Value* oldOutputExceedFwData;
    2929    };
    3030
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.cpp

    r6147 r6148  
    175175        re::RE* targetRe = mRE;
    176176
     177        bool allCcByteLength = re::isAllCcByteLength(mRE);
     178
    177179        linefeedCC = re::makeCC(0x0A);
    178180
     
    193195        mGrepDriver->makeKernelCall(ccK, {compressedBitStream}, {CharClasses});
    194196
    195 
    196 
    197         StreamSetBuffer* compressedNonFinalStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    198         kernel::Kernel * nonFinalK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, u8NonFinalRe, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
    199         mGrepDriver->makeKernelCall(nonFinalK, {compressedBitStream}, {compressedNonFinalStream});
    200 
    201 
    202 
    203         auto decompressedStreams = this->decompressBitStreams(compressedByteStream, {CharClasses, compressedNonFinalStream});
    204 
    205 //        uncompressedCharClasses = this->decompressBitStream(compressedByteStream, CharClasses);
    206         uncompressedCharClasses = decompressedStreams[0];
    207         u8NoFinalStream = decompressedStreams[1];
    208 
    209 
    210 
    211         auto fakeStreams = this->generateFakeStreams(idb, uncompressedCharClasses, std::vector<unsigned>{8});
    212         fakeMatchCopiedBits = fakeStreams[0];
    213 
     197        if (allCcByteLength) {
     198            // We do not need to decompress U8 NonFinal Stream is all of the character class in target regular expression is byte length
     199            uncompressedCharClasses = this->decompressBitStream(compressedByteStream, CharClasses);
     200            auto fakeStreams = this->generateFakeStreams(idb, uncompressedCharClasses, std::vector<unsigned>{8, 1});
     201            fakeMatchCopiedBits = fakeStreams[0];
     202            u8NoFinalStream = fakeStreams[1];
     203        } else {
     204            StreamSetBuffer* compressedNonFinalStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
     205            kernel::Kernel * nonFinalK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, u8NonFinalRe, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
     206            mGrepDriver->makeKernelCall(nonFinalK, {compressedBitStream}, {compressedNonFinalStream});
     207
     208            auto decompressedStreams = this->decompressBitStreams(compressedByteStream, {CharClasses, compressedNonFinalStream});
     209            uncompressedCharClasses = decompressedStreams[0];
     210            u8NoFinalStream = decompressedStreams[1];
     211
     212            auto fakeStreams = this->generateFakeStreams(idb, uncompressedCharClasses, std::vector<unsigned>{8});
     213            fakeMatchCopiedBits = fakeStreams[0];
     214        }
    214215    } else {
    215216        re::Seq* seq = re::makeSeq();
     
    229230        std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    230231        auto numOfCharacterClasses = mpx_basis.size();
     232//        llvm::errs() << "numOfCharacterClasses:" << numOfCharacterClasses << "\n";
    231233        StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize, 1);
    232234
Note: See TracChangeset for help on using the changeset viewer.