Ignore:
Timestamp:
Aug 14, 2018, 5:10:07 PM (12 months ago)
Author:
xwa163
Message:
  1. UTF-8 LZ4 Grep: Avoid decompressing U8NonFinal when target regex does not contain unicode
  2. Reduce data loading and branching in lz4_bytestream_decompression
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.cpp

    r6147 r6148  
    175175        re::RE* targetRe = mRE;
    176176
     177        bool allCcByteLength = re::isAllCcByteLength(mRE);
     178
    177179        linefeedCC = re::makeCC(0x0A);
    178180
     
    193195        mGrepDriver->makeKernelCall(ccK, {compressedBitStream}, {CharClasses});
    194196
    195 
    196 
    197         StreamSetBuffer* compressedNonFinalStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    198         kernel::Kernel * nonFinalK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, u8NonFinalRe, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
    199         mGrepDriver->makeKernelCall(nonFinalK, {compressedBitStream}, {compressedNonFinalStream});
    200 
    201 
    202 
    203         auto decompressedStreams = this->decompressBitStreams(compressedByteStream, {CharClasses, compressedNonFinalStream});
    204 
    205 //        uncompressedCharClasses = this->decompressBitStream(compressedByteStream, CharClasses);
    206         uncompressedCharClasses = decompressedStreams[0];
    207         u8NoFinalStream = decompressedStreams[1];
    208 
    209 
    210 
    211         auto fakeStreams = this->generateFakeStreams(idb, uncompressedCharClasses, std::vector<unsigned>{8});
    212         fakeMatchCopiedBits = fakeStreams[0];
    213 
     197        if (allCcByteLength) {
     198            // We do not need to decompress U8 NonFinal Stream is all of the character class in target regular expression is byte length
     199            uncompressedCharClasses = this->decompressBitStream(compressedByteStream, CharClasses);
     200            auto fakeStreams = this->generateFakeStreams(idb, uncompressedCharClasses, std::vector<unsigned>{8, 1});
     201            fakeMatchCopiedBits = fakeStreams[0];
     202            u8NoFinalStream = fakeStreams[1];
     203        } else {
     204            StreamSetBuffer* compressedNonFinalStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
     205            kernel::Kernel * nonFinalK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, u8NonFinalRe, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
     206            mGrepDriver->makeKernelCall(nonFinalK, {compressedBitStream}, {compressedNonFinalStream});
     207
     208            auto decompressedStreams = this->decompressBitStreams(compressedByteStream, {CharClasses, compressedNonFinalStream});
     209            uncompressedCharClasses = decompressedStreams[0];
     210            u8NoFinalStream = decompressedStreams[1];
     211
     212            auto fakeStreams = this->generateFakeStreams(idb, uncompressedCharClasses, std::vector<unsigned>{8});
     213            fakeMatchCopiedBits = fakeStreams[0];
     214        }
    214215    } else {
    215216        re::Seq* seq = re::makeSeq();
     
    229230        std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    230231        auto numOfCharacterClasses = mpx_basis.size();
     232//        llvm::errs() << "numOfCharacterClasses:" << numOfCharacterClasses << "\n";
    231233        StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize, 1);
    232234
Note: See TracChangeset for help on using the changeset viewer.