Ignore:
Timestamp:
May 16, 2018, 1:00:19 PM (14 months ago)
Author:
xwa163
Message:
  1. Fix a bug of multiplexing lz4_grep in release build
  2. Combine compressed-space LineBreakStream? and CharClassStream? before doing PDEP and match copy in order to improve the performance
Location:
icGREP/icgrep-devel/icgrep/lz4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6043 r6044  
    3333
    3434LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
    35 
     35    mCompressionMarker = NULL;
    3636}
    3737
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6043 r6044  
    162162    const auto nREs = mREs.size();
    163163
    164     StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    165164    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    166165
    167 //    StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(matchCopiedBasisBits);
    168     StreamSetBuffer * LineFeedStream = this->linefeedStreamFromCompressedBits();
     166
    169167
    170168    if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
    171         LineBreakStream = LineFeedStream;
    172169    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
    173170        // TODO fix here
     
    178175    std::map<std::string, StreamSetBuffer *> propertyStream;
    179176
    180     for(unsigned i = 0; i < nREs; ++i) {
    181         std::vector<std::string> externalStreamNames;
    182 
    183         std::set<re::Name *> UnicodeProperties;
    184 
    185 
    186         const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    187         StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    188 
    189         mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
    190         mREs[i] = transformCCs(mpx.get(), mREs[i]);
    191         std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    192         auto numOfCharacterClasses = mpx_basis.size();
    193         StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    194         kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
    195         mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses}); //TODO get it from compression space
    196 
    197         StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
    198         //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
    199         //                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {CharClasses});
    200 
    201         StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
    202         Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
    203         mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
    204 
    205         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
    206         mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
    207         MatchResultsBufs[i] = MatchResults;
    208 
    209     }
     177    std::vector<std::string> externalStreamNames;
     178    std::set<re::Name *> UnicodeProperties;
     179
     180    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
     181    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     182
     183    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
     184    mREs[0] = transformCCs(mpx.get(), mREs[0]);
     185    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
     186    auto numOfCharacterClasses = mpx_basis.size();
     187    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     188
     189    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
     190    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses}); //TODO get it from compression space
     191
     192    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     193    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
     194    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
     195
     196    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
     197    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
     198    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
     199
     200    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
     201    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
     202    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     203    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
     204    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
     205
     206    /*
     207    StreamSetBuffer * LineBreakStream = this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
     208    StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
     209     */
     210
     211    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
     212    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
     213    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
     214
     215    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
     216    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
     217    MatchResultsBufs[0] = MatchResults;
    210218
    211219    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
Note: See TracChangeset for help on using the changeset viewer.