Ignore:
Timestamp:
Jul 23, 2018, 4:56:33 AM (12 months ago)
Author:
xwa163
Message:
  1. More experiment on lz4 grep
  2. Improve performance of lzparabix grep
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6124 r6132  
    2222#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
    2323#include <kernels/lz4/lz4_bitstream_not_kernel.h>
     24#include <kernels/lz4/aio/lz4_i4_bytestream_aio.h>
    2425#include <kernels/fake_stream_generating_kernel.h>
    2526#include <kernels/bitstream_pdep_kernel.h>
     
    2930#include <re/collect_ccs.h>
    3031#include <re/replaceCC.h>
     32
     33#include <re/casing.h>
     34#include <re/exclude_CC.h>
     35#include <re/to_utf8.h>
     36#include <re/re_analysis.h>
     37#include <re/re_name_resolve.h>
     38#include <re/re_name_gather.h>
     39#include <re/re_multiplex.h>
     40#include <re/re_utility.h>
    3141
    3242#include <UCD/resolve_properties.h>
     
    5464#include <kernels/lz4/aio/lz4_swizzled_aio.h>
    5565#include <kernels/lz4/aio/lz4_bitstream_aio.h>
    56 
     66#include <re/re_seq.h>
     67#include <kernels/lz4/aio/lz4_bytestream_aio.h>
    5768
    5869namespace re { class CC; }
     
    108119}
    109120
    110 
     121parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithByteStreamAioApproach(
     122        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
     123    auto mGrepDriver = &mPxDriver;
     124    auto & b = mGrepDriver->getBuilder();
     125
     126    LZ4BlockInfo blockInfo = this->getBlockInfo(b);
     127
     128    StreamSetBuffer * const mtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b));
     129    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(b, cc::BitNumbering::BigEndian, prefix, numberOfStream);
     130    mPxDriver.makeKernelCall(p2sK, {compressedBitStream}, {mtxByteStream});
     131
     132    StreamSetBuffer * const decompressionMtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(b), 1);
     133    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(b, true);
     134    lz4AioK->setInitialArguments({mFileSize});
     135    mPxDriver.makeKernelCall(
     136            lz4AioK,
     137            {
     138                    mCompressedByteStream,
     139                    // Block Data
     140                    blockInfo.isCompress,
     141                    blockInfo.blockStart,
     142                    blockInfo.blockEnd,
     143                    mtxByteStream
     144            }, {
     145                    decompressionMtxByteStream
     146            });
     147
     148    StreamSetBuffer * const decompressedMtxBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8), this->getDecompressedBufferBlocks(b));
     149
     150    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(b, cc::BitNumbering::BigEndian, true, prefix, numberOfStream);
     151    mPxDriver.makeKernelCall(s2pk, {decompressionMtxByteStream}, {decompressedMtxBitStream});
     152
     153    return decompressedMtxBitStream;
     154}
    111155
    112156StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStreamWithSwizzledAioApproach(
    113157        parabix::StreamSetBuffer *compressedBitStream, int numberOfStream, std::string prefix) {
    114158    auto mGrepDriver = &mPxDriver;
    115     auto & iBuilder = mGrepDriver->getBuilder();
    116 
    117     //// Decode Block Information
    118     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
    119     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    120     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    121 
    122     //// Generate Helper Markers Extenders, FX, XF
    123 //    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
    124 //    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    125 //    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    126 //    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
    127 
    128 
    129     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(iBuilder);
    130     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
    131     mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    132 
     159    auto & b = mGrepDriver->getBuilder();
     160
     161    LZ4BlockInfo blockInfo = this->getBlockInfo(b);
    133162
    134163    // Produce unswizzled bit streams
    135     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    136     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "source");
     164    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4), this->getInputBufferBlocks(b), 1);
     165    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "source");
    137166    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
    138167
    139     StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    140 
    141 
    142     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(iBuilder, 4, 1, 4);
     168    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4), this->getInputBufferBlocks(b), 1);
     169
     170
     171    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledAioKernel>(b, 4, 1, 4);
    143172    lz4AioK->setInitialArguments({mFileSize});
    144173    mPxDriver.makeKernelCall(
     
    149178
    150179                    // Block Data
    151                     BlockData_IsCompressed,
    152                     BlockData_BlockStart,
    153                     BlockData_BlockEnd,
     180                    blockInfo.isCompress,
     181                    blockInfo.blockStart,
     182                    blockInfo.blockEnd,
    154183
    155184                    u16Swizzle0,
     
    160189
    161190
    162     StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    163     Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "dst");
     191    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(b));
     192    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "dst");
    164193    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
    165194
     
    317346
    318347};
    319 std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio, bool useSwizzled) {
     348std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio, bool useSwizzled, bool useByteStream) {
    320349
    321350    this->initREs(REs);
     
    338367    std::set<re::Name *> UnicodeProperties;
    339368
    340     const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
     369    re::CC* linefeedCC = re::makeCC(0x0A);
     370
     371    re::Seq* seq = re::makeSeq();
     372    seq->push_back(mREs[0]);
     373    seq->push_back(std::move(linefeedCC));
     374
     375
     376    const auto UnicodeSets = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    341377    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    342378
     
    350386    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
    351387
    352     StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    353     kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
    354     mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
    355 
    356 
    357     StreamSetBuffer * LineBreakStream = nullptr;
    358388    StreamSetBuffer * decompressedCharClasses = nullptr;
    359389    if (useSwizzled) {
    360         StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
    361         kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
    362         mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
    363         StreamSetBuffer * decompressedCombinedStream = nullptr;
    364 
    365390        if (useAio) {
    366             decompressedCombinedStream = this->convertCompressedBitsStreamWithSwizzledAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
     391            decompressedCharClasses = this->convertCompressedBitsStreamWithSwizzledAioApproach(CharClasses, numOfCharacterClasses, "combined");
    367392        } else {
    368             decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
     393            decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "combined");
    369394        }
    370 
    371         LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
    372         decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    373         kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
    374         mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
     395    } else if (useByteStream){
     396        decompressedCharClasses = this->convertCompressedBitsStreamWithByteStreamAioApproach(CharClasses, numOfCharacterClasses, "combined");
    375397    } else {
    376         auto ret = this->convertCompressedBitsStreamWithBitStreamAioApproach({CharClasses, CompressedLineFeedStream}, "combined");
     398        auto ret = this->convertCompressedBitsStreamWithBitStreamAioApproach({CharClasses}, "combined");
    377399        decompressedCharClasses = ret[0];
    378         LineBreakStream = ret[1];
    379400    }
    380401
     
    383404    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
    384405
    385     kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
     406    StreamSetBuffer * LineBreakStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getInputBufferBlocks(idb));
     407    kernel::Kernel * lineFeedGrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, transformCCs(mpx.get(), linefeedCC), externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
     408    mGrepDriver->makeKernelCall(lineFeedGrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {LineBreakStream});
     409
     410
     411    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
    386412    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
    387413    MatchResultsBufs[0] = MatchResults;
     
    437463        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
    438464
    439         std::set<re::Name *> UnicodeProperties;
    440 
    441         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    442         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
    443         mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    444         MatchResultsBufs[i] = MatchResults;
     465        if (mEnableMultiplexing) {
     466            const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
     467            StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     468
     469            mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
     470            mREs[i] = transformCCs(mpx.get(), mREs[i]);
     471            std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
     472            auto numOfCharacterClasses = mpx_basis.size();
     473            StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     474            kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
     475            mGrepDriver->makeKernelCall(ccK, {decompressedBasisBits}, {CharClasses});
     476
     477            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian, true);
     478            icgrepInputSets.push_back(CharClasses);
     479            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     480            MatchResultsBufs[i] = MatchResults;
     481        } else {
     482            std::set<re::Name *> UnicodeProperties;
     483
     484            StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     485            kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
     486            mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     487            MatchResultsBufs[i] = MatchResults;
     488        }
    445489    }
    446490
     
    559603    mPxDriver.finalizeObject();
    560604}
     605
     606void LZ4GrepGenerator::generateByteStreamMultiplexingAioPipeline(re::RE* regex) {
     607    auto & iBuilder = mPxDriver.getBuilder();
     608    this->generateCountOnlyMainFunc(iBuilder);
     609
     610    this->generateLoadByteStreamAndBitStream(iBuilder);
     611    StreamSetBuffer * LineBreakStream;
     612    StreamSetBuffer * Matches;
     613    std::vector<re::RE*> res = {regex};
     614    std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, true, false, true);
     615
     616    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
     617    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
     618    mPxDriver.generatePipelineIR();
     619
     620    iBuilder->setKernel(matchCountK);
     621    Value * matchedLineCount = iBuilder->getAccumulator("countResult");
     622    matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
     623
     624    mPxDriver.deallocateBuffers();
     625
     626    iBuilder->CreateRet(matchedLineCount);
     627
     628    mPxDriver.finalizeObject();
     629}
     630
    561631
    562632void LZ4GrepGenerator::generateMultiplexingBitStreamAioPipeline(re::RE* regex) {
     
    696766    mPxDriver.finalizeObject();
    697767}
     768
     769
    698770
    699771void LZ4GrepGenerator::generateAioPipeline(re::RE *regex) {
Note: See TracChangeset for help on using the changeset viewer.