Ignore:
Timestamp:
Aug 27, 2018, 2:22:39 PM (13 months ago)
Author:
xwa163
Message:
  1. Remove LZParabix related codes
  2. Enable multiplexing for LZ4 ScanMatch? pipeline
  3. Some minor bug fixing
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.cpp

    r6148 r6150  
    4242#include <llvm/Support/Debug.h>
    4343#include <kernels/lz4/lz4_block_decoder.h>
     44#include <kernels/lz4/lz4_match_detector.h>
    4445
    4546#include <re/re_seq.h>
    4647#include <kernels/kernel_builder.h>
    4748#include <re/re_alt.h>
     49#include <kernels/lz4/decompression/lz4_bytestream_decompression.h>
    4850
    4951namespace re { class CC; }
     
    6567}
    6668
    67 void LZ4GrepBaseGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
    68     auto & iBuilder = mPxDriver.getBuilder();
    69     this->generateScanMatchMainFunc(iBuilder);
    70 
    71     StreamSetBuffer* compressedByteStream = this->loadByteStream();
    72 
    73     StreamSetBuffer * const uncompressedByteStream = this->byteStreamDecompression(compressedByteStream);
    74     StreamSetBuffer * uncompressedBitStream = this->s2p(uncompressedByteStream);
    75 
    76     StreamSetBuffer * LineBreakStream;
    77     StreamSetBuffer * Matches;
    78     std::tie(LineBreakStream, Matches) = grep(regex, uncompressedBitStream);
    79 
    80     kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
    81     scanMatchK->setInitialArguments({match_accumulator});
    82     mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, uncompressedByteStream}, {});
    83     mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
    84     mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
    85 
    86     mPxDriver.generatePipelineIR();
    87     mPxDriver.deallocateBuffers();
    88 
    89     iBuilder->CreateRetVoid();
    90 
    91     mPxDriver.finalizeObject();
     69void LZ4GrepBaseGenerator::generateScanMatchGrepPipeline(re::RE* regex, bool enableMultiplexing, bool utf8CC) {
     70    if (enableMultiplexing) {
     71        this->generateMultiplexingScanMatchGrepPipeline(regex, utf8CC);
     72    } else {
     73        this->generateFullyDecompressionScanMatchGrepPipeline(regex);
     74    }
    9275}
    9376
     
    190173        std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    191174        auto numOfCharacterClasses = mpx_basis.size();
     175        llvm::errs() << "numOfUnicodeSet:" << UnicodeSets.size() << "\n";
     176
     177        llvm::errs() << "numOfCharacterClasses:" << numOfCharacterClasses << "\n";
    192178        StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize, 1);
    193179
     
    230216        std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    231217        auto numOfCharacterClasses = mpx_basis.size();
    232 //        llvm::errs() << "numOfCharacterClasses:" << numOfCharacterClasses << "\n";
     218        llvm::errs() << "numOfUnicodeSet:" << UnicodeSets.size() << "\n";
     219        llvm::errs() << "numOfCharacterClasses:" << numOfCharacterClasses << "\n";
    233220        StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize, 1);
    234221
     
    280267
    281268std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepBaseGenerator::grep(
    282         re::RE *RE, parabix::StreamSetBuffer *uncompressedBasisBits, bool ccMultiplexing) {
     269        re::RE *RE, parabix::StreamSetBuffer *byteStream, parabix::StreamSetBuffer *uncompressedBasisBits, bool ccMultiplexing) {
    283270
    284271    this->initREs(RE);
     
    295282    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    296283
    297     StreamSetBuffer * LineBreakStream = this->linefeedStreamFromUncompressedBits(uncompressedBasisBits);
     284    StreamSetBuffer * LineBreakStream = nullptr;
    298285
    299286
     
    301288
    302289    for(unsigned i = 0; i < nREs; ++i) {
    303         std::vector<std::string> externalStreamNames;
    304         std::vector<StreamSetBuffer *> icgrepInputSets = {uncompressedBasisBits};
     290
    305291
    306292        if (ccMultiplexing) {
     293
     294            if (uncompressedBasisBits == nullptr) {
     295                uncompressedBasisBits = this->s2p(byteStream);
     296            }
     297            this->linefeedStreamFromUncompressedBits(uncompressedBasisBits);
     298            std::vector<std::string> externalStreamNames;
     299            std::vector<StreamSetBuffer *> icgrepInputSets = {uncompressedBasisBits};
     300
    307301            const auto UnicodeSets = re::collectCCs(mRE, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    308302            StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
     
    321315            MatchResultsBufs[i] = MatchResults;
    322316        } else {
    323             std::set<re::Name *> UnicodeProperties;
    324 
    325 
    326 
    327 
    328             StreamSetBuffer* nonFinalStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    329             kernel::Kernel * nonFinalK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, u8NonFinalRe, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
    330             mGrepDriver->makeKernelCall(nonFinalK, icgrepInputSets, {nonFinalStream});
    331             icgrepInputSets.push_back(nonFinalStream);
    332             externalStreamNames.push_back("UTF8_nonfinal");
    333 
    334 
    335 
    336 
    337 
    338             StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    339             kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mRE, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
    340             mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    341             MatchResultsBufs[i] = MatchResults;
     317
     318            bool anyGCB = hasGraphemeClusterBoundary(mRE);
     319            bool isSimple = (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
     320            if (isSimple) {
     321                mRE = toUTF8(mRE);
     322            }
     323            const unsigned ByteCClimit = 6;
     324
     325            if (byteTestsWithinLimit(mRE, ByteCClimit)) {
     326                LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     327                kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
     328                mGrepDriver->makeKernelCall(breakK, {byteStream}, {LineBreakStream});
     329
     330                std::vector<std::string> externalStreamNames;
     331                std::vector<StreamSetBuffer *> icgrepInputSets = {byteStream};
     332                StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     333                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mRE, externalStreamNames);
     334                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     335                MatchResultsBufs[i] = MatchResults;
     336
     337            } else {
     338
     339                if (uncompressedBasisBits == nullptr) {
     340                    uncompressedBasisBits = this->s2p(byteStream);
     341                }
     342                this->linefeedStreamFromUncompressedBits(uncompressedBasisBits);
     343                std::vector<std::string> externalStreamNames;
     344                std::vector<StreamSetBuffer *> icgrepInputSets = {uncompressedBasisBits};
     345
     346                std::set<re::Name *> UnicodeProperties;
     347                StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
     348                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mRE, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
     349                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     350                MatchResultsBufs[i] = MatchResults;
     351            }
    342352        }
    343353    }
     
    375385
    376386
     387void LZ4GrepBaseGenerator::generateFullyDecompressionScanMatchGrepPipeline(re::RE *regex) {
     388    auto & iBuilder = mPxDriver.getBuilder();
     389    this->generateScanMatchMainFunc(iBuilder);
     390
     391    StreamSetBuffer* compressedByteStream = this->loadByteStream();
     392
     393    StreamSetBuffer * const uncompressedByteStream = this->byteStreamDecompression(compressedByteStream);
     394    StreamSetBuffer * uncompressedBitStream = this->s2p(uncompressedByteStream);
     395
     396    StreamSetBuffer * LineBreakStream;
     397    StreamSetBuffer * Matches;
     398    std::tie(LineBreakStream, Matches) = grep(regex, uncompressedByteStream, uncompressedBitStream);
     399
     400    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
     401    scanMatchK->setInitialArguments({match_accumulator});
     402    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, uncompressedByteStream}, {});
     403    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
     404    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
     405
     406    mPxDriver.generatePipelineIR();
     407    mPxDriver.deallocateBuffers();
     408
     409    iBuilder->CreateRetVoid();
     410
     411    mPxDriver.finalizeObject();
     412}
     413
     414void LZ4GrepBaseGenerator::generateMultiplexingScanMatchGrepPipeline(re::RE *regex, bool utf8CC) {
     415    auto & iBuilder = mPxDriver.getBuilder();
     416    this->generateScanMatchMainFunc(iBuilder);
     417
     418    StreamSetBuffer *compressedByteStream = nullptr, *compressedBasisBits = nullptr;
     419    std::tie(compressedByteStream, compressedBasisBits) = this->loadByteStreamAndBitStream();
     420
     421    StreamSetBuffer * LineBreakStream;
     422    StreamSetBuffer * Matches;
     423    std::tie(LineBreakStream, Matches) = multiplexingGrep(regex, compressedByteStream, compressedBasisBits, utf8CC);
     424
     425//    Kernel* matchDetector = mPxDriver.addKernelInstance<LZ4MatchDetectorKernel>(iBuilder);
     426//    StreamSetBuffer* hasMatch = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8),
     427//                                                                  this->getDefaultBufferBlocks(), 1);
     428//    mPxDriver.makeKernelCall(matchDetector, {Matches, LineBreakStream}, {hasMatch});
     429
     430
     431    LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
     432
     433    StreamSetBuffer *const uncompressedByteStream =
     434            mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8),
     435                                              this->getDefaultBufferBlocks(), 1);
     436    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(iBuilder, false, 4 * 1024 * 1024, true);
     437    lz4AioK->setInitialArguments({mFileSize});
     438    mPxDriver.makeKernelCall(
     439            lz4AioK,
     440            {
     441                    compressedByteStream,
     442
     443                    // Block Data
     444                    blockInfo.isCompress,
     445                    blockInfo.blockStart,
     446                    blockInfo.blockEnd,
     447                    Matches
     448            }, {
     449                    uncompressedByteStream
     450            });
     451
     452
     453    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
     454    scanMatchK->setInitialArguments({match_accumulator});
     455    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, uncompressedByteStream}, {});
     456    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
     457    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
     458
     459
     460    mPxDriver.generatePipelineIR();
     461
     462    mPxDriver.deallocateBuffers();
     463    iBuilder->CreateRetVoid();
     464    mPxDriver.finalizeObject();
     465}
     466
    377467
    378468void LZ4GrepBaseGenerator::generateMultiplexingCountOnlyGrepPipeline(re::RE *regex, bool utf8CC) {
     
    407497    this->generateCountOnlyMainFunc(iBuilder);
    408498
    409     StreamSetBuffer * const uncompressedBitStream = this->generateUncompressedBitStreams();
     499    StreamSetBuffer * const uncompressedByteStream = this->generateUncompressedByteStream();
     500//    StreamSetBuffer * const uncompressedBitStream = this->generateUncompressedBitStreams();
    410501
    411502    StreamSetBuffer * LineBreakStream;
    412503    StreamSetBuffer * Matches;
    413504
    414     std::tie(LineBreakStream, Matches) = grep(regex, uncompressedBitStream);
     505    std::tie(LineBreakStream, Matches) = grep(regex, uncompressedByteStream, nullptr);
    415506
    416507    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
     
    511602}
    512603
     604
     605
    513606std::vector<parabix::StreamSetBuffer *>
    514607LZ4GrepBaseGenerator::decompressBitStreams(parabix::StreamSetBuffer *compressedByteStream,
Note: See TracChangeset for help on using the changeset viewer.