Changeset 6043


Ignore:
Timestamp:
May 16, 2018, 11:55:17 AM (3 months ago)
Author:
xwa163
Message:

Init checkin for lz4_grep count-only pipeline with multiplexing

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
9 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6040 r6043  
    102102add_library(UCDlib UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    103103add_library(GrepEngine  ${GREP_CORE_SRC} grep/grep_engine.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/streams_merge.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp)
    104 add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp lz4/LZ4GrepEngine.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/lz4/lz4_bitstream_match_copy_kernel.cpp)
     104add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp lz4/LZ4GrepEngine.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/bitstream_pdep_kernel.cpp kernels/bitstream_gather_pdep_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp kernels/lz4/lz4_bitstream_not_kernel.cpp kernels/lz4/lz4_bitstream_match_copy_kernel.cpp kernels/lz4/lz4_fake_stream_generating_kernel.cpp)
    105105
    106106
  • icGREP/icgrep-devel/icgrep/kernels/bitstream_gather_pdep_kernel.cpp

    r6040 r6043  
    120120                        {b->getInt32(0), b->getInt32(32), b->getInt32(64), b->getInt32(96)});
    121121
    122                 Value *nullAddress = ConstantVector::getNullValue(addresses->getType());
    123                 for (int i = 0; i < 4; i++) {
    124                     nullAddress = b->CreateInsertElement(nullAddress,
    125                                                          b->CreateMul(b->CreateTrunc(swizzleIndex, b->getInt32Ty()),
    126                                                                       b->getInt32(8)), i);
    127                 };
     122                Value *nullAddress = this->fill_address(b, 32, 4, b->CreateMul(b->CreateTrunc(swizzleIndex, b->getInt32Ty()),
     123                                                                        b->getInt32(8)));
     124
    128125                addresses = b->CreateAdd(addresses, nullAddress);
    129126
     
    201198    }
    202199
     200    llvm::Value* BitStreamGatherPDEPKernel::fill_address(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned field_count, Value * a) {
     201        Type * singleFieldVecTy = VectorType::get(b->getIntNTy(fw), 1);
     202        Value * aVec = b->CreateBitCast(a, singleFieldVecTy);
     203        return b->CreateShuffleVector(aVec, UndefValue::get(singleFieldVecTy), Constant::getNullValue(VectorType::get(b->getInt32Ty(), field_count)));
     204    }
    203205}
  • icGREP/icgrep-devel/icgrep/kernels/bitstream_gather_pdep_kernel.h

    r6040 r6043  
    1111class BitStreamGatherPDEPKernel final : public MultiBlockKernel {
    1212public:
    13     BitStreamGatherPDEPKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned numberOfStream = 8, std::string name = "BitStreamPDEPKernel");
     13    BitStreamGatherPDEPKernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned numberOfStream = 8, std::string name = "BitStreamGatherPDEPKernel");
    1414    bool isCachable() const override { return true; }
    1515    bool hasSignature() const override { return false; }
     
    1818private:
    1919    const unsigned mNumberOfStream;
     20
     21    llvm::Value* fill_address(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned field_count, llvm::Value* a);
    2022};
    2123
  • icGREP/icgrep-devel/icgrep/kernels/bitstream_pdep_kernel.cpp

    r6029 r6043  
    114114
    115115            for (int iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
    116                 // TODO improve the performance of these load instructions by gather
    117116                Value * const swizzleBlock = b->CreateBlockAlignedLoad(b->getInputStreamBlockPtr("source", b->getSize(iStreamIndex), blockOffset));
    118117
  • icGREP/icgrep-devel/icgrep/kernels/bitstream_pdep_kernel.h

    r6029 r6043  
    1717    bool isCachable() const override { return true; }
    1818    bool hasSignature() const override { return false; }
    19 private:
     19
     20protected:
    2021    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
    2122private:
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6039 r6043  
    388388}
    389389
     390void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
     391    if (!mCompressionMarker) {
     392        mCompressionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     393        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
     394        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
     395    }
     396}
     397
    390398parabix::StreamSetBuffer* LZ4Generator::generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    391     StreamSetBuffer * const compressionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    392     Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
    393     mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {compressionMarker});
     399    this->generateCompressionMarker(iBuilder);
    394400
    395401    // Deletion
    396402    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    397     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     403    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks());
    398404
    399405    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
    400     mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, compressionMarker}, {deletedBits, deletionCounts});
     406    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
    401407
    402408    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.h

    r6039 r6043  
    4242    virtual parabix::StreamSetBuffer* generateBitStreamExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    4343
     44    void generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     45
    4446    // BufferSize related Helper Function
    4547    virtual int getInputBufferBlocks();
     
    6264    parabix::StreamSetBuffer * mCompressedByteStream;
    6365    parabix::StreamSetBuffer * mCompressedBasisBits;
    64     parabix::StreamSetBuffer * mDeletionMarker; //TODO rename to ExtarctMarker
     66    parabix::StreamSetBuffer * mDeletionMarker;
     67    parabix::StreamSetBuffer * mCompressionMarker;
    6568    parabix::StreamSetBuffer * mDepositMarker;
    6669    parabix::StreamSetBuffer * mMatchOffsetMarker;
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6040 r6043  
    2222#include <kernels/lz4/lz4_bitstream_match_copy_kernel.h>
    2323#include <kernels/lz4/lz4_bitstream_not_kernel.h>
     24#include <kernels/lz4/lz4_fake_stream_generating_kernel.h>
    2425#include <kernels/bitstream_pdep_kernel.h>
    2526#include <kernels/bitstream_gather_pdep_kernel.h>
     
    6061using namespace grep;
    6162
    62 
    63 const unsigned ByteCClimit = 6;
    64 
    65 
    6663LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
    6764    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
     
    109106}
    110107
     108parabix::StreamSetBuffer * LZ4GrepGenerator::convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix) {
     109    auto mGrepDriver = &mPxDriver;
     110    auto & idb = mGrepDriver->getBuilder();
     111
     112    // Extract (Deletion)
     113    this->generateCompressionMarker(idb);
     114
     115    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
     116    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks());
     117
     118    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
     119    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
     120
     121    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
     122    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
     123    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
     124
     125    // Deposit
     126    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
     127    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
     128    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
     129
     130    // Match Copy
     131    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
     132    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
     133    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
     134
     135    return matchCopiedBits;
     136}
    111137
    112138parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
    113     // TODO for now, swizzled form for <1 * i1> input stream is not well defined, so we can not use this pipeline
    114139    auto mGrepDriver = &mPxDriver;
    115140    const unsigned baseBufferSize = this->getInputBufferBlocks();
     
    119144    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
    120145    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
    121 
    122     // Extract (Deletion)
    123     StreamSetBuffer * deletedLineFeedSwizzled = mPxDriver.addBuffer<CircularCopybackBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    124     Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 1, 64);
    125     mPxDriver.makeKernelCall(delK, {mDeletionMarker, CompressedLineFeedStream}, {deletedLineFeedSwizzled});
    126 
    127     // TODO incomplete
    128     // Deposit
    129     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    130 //    Kernel * multiplePdepK = mPxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(idb, 4, 1, 4, 64, "lineFeedMultiplePDEP");
    131 //    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, deletedLineFeedSwizzled}, {depositedSwizzle0});
    132 
    133 
    134     // Match Copy
    135     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    136     Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4, 64, "lineFeedSwizzledMatchCopy");
    137     mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
    138 
    139     // Unswizzled
    140     StreamSetBuffer * lineFeedStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1), this->getDecompressedBufferBlocks());
    141     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 1, 1, 1, 64, "linefeed");
    142     mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0}, {lineFeedStream});
    143 
    144     return lineFeedStream;
    145 }
    146 
    147 
    148 std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, parabix::StreamSetBuffer *matchCopiedBasisBits) {
     146    return this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
     147}
     148
     149
     150std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs) {
    149151
    150152    this->initREs(REs);
    151153    auto mGrepDriver = &mPxDriver;
    152 
    153154
    154155    auto & idb = mGrepDriver->getBuilder();
    155156    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
    156157    const unsigned baseBufferSize = this->getInputBufferBlocks();
    157     bool MultithreadedSimpleRE = false;
    158     bool PropertyKernels = false;
    159     bool CC_Multiplexing = false;
    160     bool InvertMatchFlag = false;
     158    bool CC_Multiplexing = true;
    161159    int MaxCountFlag = 0;
    162 
    163160
    164161    //  Regular Expression Processing and Analysis Phase
    165162    const auto nREs = mREs.size();
    166     bool hasGCB[nREs];
    167     bool anyGCB = false;
    168 
    169     for(unsigned i = 0; i < nREs; ++i) {
    170         hasGCB[i] = hasGraphemeClusterBoundary(mREs[i]);
    171         anyGCB |= hasGCB[i];
    172     }
     163
    173164    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    174165    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    175166
    176     re::RE * prefixRE;
    177     re::RE * suffixRE;
    178     // For simple regular expressions with a small number of characters, we
    179     // can bypass transposition and use the Direct CC compiler.
    180 //    bool isSimple = (nREs == 1) && (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
    181     bool isSimple = false;
    182     if (isSimple) {
    183         mREs[0] = toUTF8(mREs[0]);
    184     }
    185     if (isSimple && byteTestsWithinLimit(mREs[0], ByteCClimit)) {
     167//    StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(matchCopiedBasisBits);
     168    StreamSetBuffer * LineFeedStream = this->linefeedStreamFromCompressedBits();
     169
     170    if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
     171        LineBreakStream = LineFeedStream;
     172    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
     173        // TODO fix here
     174//        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
     175//        mGrepDriver->makeKernelCall(breakK, {matchCopiedBasisBits}, {LineBreakStream});
     176    }
     177
     178    std::map<std::string, StreamSetBuffer *> propertyStream;
     179
     180    for(unsigned i = 0; i < nREs; ++i) {
    186181        std::vector<std::string> externalStreamNames;
    187         std::vector<StreamSetBuffer *> icgrepInputSets = {mCompressedByteStream};
    188         if (MultithreadedSimpleRE && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
    189             auto CCs = re::collectCCs(prefixRE, &cc::Byte);
    190             for (auto cc : CCs) {
    191                 auto ccName = makeName(cc);
    192                 mREs[0] = re::replaceCC(mREs[0], cc, ccName);
    193                 std::string ccNameStr = ccName->getFullName();
    194                 StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    195                 kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    196                 mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {ccStream});
    197                 externalStreamNames.push_back(ccNameStr);
    198                 icgrepInputSets.push_back(ccStream);
    199             }
    200         }
    201         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    202         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mREs[0], externalStreamNames);
    203         mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    204         MatchResultsBufs[0] = MatchResults;
    205         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    206         mGrepDriver->makeKernelCall(breakK, {mCompressedByteStream}, {LineBreakStream});
    207     } else if (isSimple && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
    208         std::vector<std::string> externalStreamNames;
    209         std::vector<StreamSetBuffer *> icgrepInputSets = {mCompressedByteStream};
    210         if (MultithreadedSimpleRE) {
    211             auto CCs = re::collectCCs(prefixRE, &cc::Byte);
    212             for (auto cc : CCs) {
    213                 auto ccName = makeName(cc);
    214                 mREs[0] = re::replaceCC(mREs[0], cc, ccName);
    215                 std::string ccNameStr = ccName->getFullName();
    216                 StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    217                 kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    218                 mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {ccStream});
    219                 externalStreamNames.push_back(ccNameStr);
    220                 icgrepInputSets.push_back(ccStream);
    221             }
    222         }
    223         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    224         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteBitGrepKernel>(idb, prefixRE, suffixRE, externalStreamNames);
    225         mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    226         MatchResultsBufs[0] = MatchResults;
    227         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    228         mGrepDriver->makeKernelCall(breakK, {mCompressedByteStream}, {LineBreakStream});
    229     } else {
    230         StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    231         StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    232 
    233 
    234         StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(matchCopiedBasisBits);
    235 //        StreamSetBuffer * LineFeedStream = this->linefeedStreamFromCompressedBits();
    236 
    237         kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
    238         mGrepDriver->makeKernelCall(requiredStreamsK, {matchCopiedBasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
    239 
    240         if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
    241             LineBreakStream = LineFeedStream;
    242         } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
    243             kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
    244             mGrepDriver->makeKernelCall(breakK, {matchCopiedBasisBits}, {LineBreakStream});
    245         } else {
    246             LineBreakStream = UnicodeLB;
    247         }
    248 
    249         std::map<std::string, StreamSetBuffer *> propertyStream;
    250         if (PropertyKernels) {
    251             for (auto p : mUnicodeProperties) {
    252                 auto name = p->getFullName();
    253                 StreamSetBuffer * s = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    254                 propertyStream.emplace(std::make_pair(name, s));
    255                 kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
    256                 mGrepDriver->makeKernelCall(propertyK, {matchCopiedBasisBits}, {s});
    257             }
    258         }
    259         StreamSetBuffer * GCB_stream = nullptr;
    260         if (anyGCB) {
    261             GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    262             kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
    263             mGrepDriver->makeKernelCall(gcbK, {matchCopiedBasisBits, RequiredStreams}, {GCB_stream});
    264         }
    265 
    266         for(unsigned i = 0; i < nREs; ++i) {
    267             std::vector<std::string> externalStreamNames;
    268             std::vector<StreamSetBuffer *> icgrepInputSets = {matchCopiedBasisBits};
    269             if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
    270                 externalStreamNames.push_back("UTF8_LB");
    271                 icgrepInputSets.push_back(LineBreakStream);
    272                 externalStreamNames.push_back("UTF8_nonfinal");
    273                 icgrepInputSets.push_back(RequiredStreams);
    274             }
    275             std::set<re::Name *> UnicodeProperties;
    276             if (PropertyKernels) {
    277                 re::gatherUnicodeProperties(mREs[i], UnicodeProperties);
    278                 for (auto p : UnicodeProperties) {
    279                     auto name = p->getFullName();
    280                     auto f = propertyStream.find(name);
    281                     if (f == propertyStream.end()) report_fatal_error(name + " not found\n");
    282                     externalStreamNames.push_back(name);
    283                     icgrepInputSets.push_back(f->second);
    284                 }
    285             }
    286             if (hasGCB[i]) {
    287                 externalStreamNames.push_back("\\b{g}");
    288                 icgrepInputSets.push_back(GCB_stream);
    289             }
    290             if (CC_Multiplexing) {
    291                 const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    292                 StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    293                 if (UnicodeSets.size() <= 1) {
    294                     kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
    295                     mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    296                     MatchResultsBufs[i] = MatchResults;
    297                 } else {
    298                     mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
    299                     mREs[i] = transformCCs(mpx.get(), mREs[i]);
    300                     std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    301                     auto numOfCharacterClasses = mpx_basis.size();
    302                     StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    303                     kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
    304                     mGrepDriver->makeKernelCall(ccK, {matchCopiedBasisBits}, {CharClasses});
    305                     //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
    306                     //                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {CharClasses});
    307                     kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
    308                     icgrepInputSets.push_back(CharClasses);
    309                     mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    310                     MatchResultsBufs[i] = MatchResults;
    311                 }
    312             } else {
    313                 StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    314                 kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
    315                 mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    316                 MatchResultsBufs[i] = MatchResults;
    317             }
    318         }
     182
     183        std::set<re::Name *> UnicodeProperties;
     184
     185
     186        const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
     187        StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     188
     189        mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
     190        mREs[i] = transformCCs(mpx.get(), mREs[i]);
     191        std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
     192        auto numOfCharacterClasses = mpx_basis.size();
     193        StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     194        kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
     195        mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses}); //TODO get it from compression space
     196
     197        StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
     198        //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
     199        //                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {CharClasses});
     200
     201        StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
     202        Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
     203        mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
     204
     205        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
     206        mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
     207        MatchResultsBufs[i] = MatchResults;
     208
    319209    }
    320210
     
    332222        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    333223    }
    334     if (InvertMatchFlag) {
    335         kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
    336         StreamSetBuffer * OriginalMatches = Matches;
    337         Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    338         mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
    339     }
     224
    340225    if (MaxCountFlag > 0) {
    341226        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
     
    355240    auto mGrepDriver = &mPxDriver;
    356241
    357 
    358242    auto & idb = mGrepDriver->getBuilder();
    359243    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
    360244    const unsigned baseBufferSize = this->getInputBufferBlocks();
    361     bool MultithreadedSimpleRE = false;
    362     bool PropertyKernels = false;
    363     bool CC_Multiplexing = false;
    364     bool InvertMatchFlag = false;
    365245    int MaxCountFlag = 0;
    366 
    367 
    368 
    369246
    370247    //  Regular Expression Processing and Analysis Phase
    371248    const auto nREs = mREs.size();
    372     bool hasGCB[nREs];
    373     bool anyGCB = false;
    374 
    375     for(unsigned i = 0; i < nREs; ++i) {
    376         hasGCB[i] = hasGraphemeClusterBoundary(mREs[i]);
    377         anyGCB |= hasGCB[i];
    378     }
     249
    379250    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    380251    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    381252
    382     re::RE * prefixRE;
    383     re::RE * suffixRE;
    384     // For simple regular expressions with a small number of characters, we
    385     // can bypass transposition and use the Direct CC compiler.
    386 //    bool isSimple = (nREs == 1) && (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
    387     bool isSimple = false;
    388     if (isSimple) {
    389         mREs[0] = toUTF8(mREs[0]);
    390     }
    391     if (isSimple && byteTestsWithinLimit(mREs[0], ByteCClimit)) {
     253    StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     254    StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     255
     256    StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
     257
     258    kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
     259    mGrepDriver->makeKernelCall(requiredStreamsK, {decompressedBasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
     260
     261    if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
     262        LineBreakStream = LineFeedStream;
     263    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
     264        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
     265        mGrepDriver->makeKernelCall(breakK, {decompressedBasisBits}, {LineBreakStream});
     266    } else {
     267        LineBreakStream = UnicodeLB;
     268    }
     269
     270    std::map<std::string, StreamSetBuffer *> propertyStream;
     271
     272    for(unsigned i = 0; i < nREs; ++i) {
    392273        std::vector<std::string> externalStreamNames;
    393         std::vector<StreamSetBuffer *> icgrepInputSets = {mCompressedByteStream};
    394         if (MultithreadedSimpleRE && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
    395             auto CCs = re::collectCCs(prefixRE, &cc::Byte);
    396             for (auto cc : CCs) {
    397                 auto ccName = makeName(cc);
    398                 mREs[0] = re::replaceCC(mREs[0], cc, ccName);
    399                 std::string ccNameStr = ccName->getFullName();
    400                 StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    401                 kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    402                 mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {ccStream});
    403                 externalStreamNames.push_back(ccNameStr);
    404                 icgrepInputSets.push_back(ccStream);
    405             }
     274        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
     275        if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
     276            externalStreamNames.push_back("UTF8_LB");
     277            icgrepInputSets.push_back(LineBreakStream);
     278            externalStreamNames.push_back("UTF8_nonfinal");
     279            icgrepInputSets.push_back(RequiredStreams);
    406280        }
     281        std::set<re::Name *> UnicodeProperties;
     282
    407283        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    408         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mREs[0], externalStreamNames);
     284        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
    409285        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    410         MatchResultsBufs[0] = MatchResults;
    411         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    412         mGrepDriver->makeKernelCall(breakK, {mCompressedByteStream}, {LineBreakStream});
    413     } else if (isSimple && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
    414         std::vector<std::string> externalStreamNames;
    415         std::vector<StreamSetBuffer *> icgrepInputSets = {mCompressedByteStream};
    416         if (MultithreadedSimpleRE) {
    417             auto CCs = re::collectCCs(prefixRE, &cc::Byte);
    418             for (auto cc : CCs) {
    419                 auto ccName = makeName(cc);
    420                 mREs[0] = re::replaceCC(mREs[0], cc, ccName);
    421                 std::string ccNameStr = ccName->getFullName();
    422                 StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    423                 kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    424                 mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {ccStream});
    425                 externalStreamNames.push_back(ccNameStr);
    426                 icgrepInputSets.push_back(ccStream);
    427             }
    428         }
    429         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    430         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteBitGrepKernel>(idb, prefixRE, suffixRE, externalStreamNames);
    431         mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    432         MatchResultsBufs[0] = MatchResults;
    433         kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    434         mGrepDriver->makeKernelCall(breakK, {mCompressedByteStream}, {LineBreakStream});
    435     } else {
    436         StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    437         StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    438 
    439         StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
    440 
    441         kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
    442         mGrepDriver->makeKernelCall(requiredStreamsK, {decompressedBasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
    443 
    444         if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
    445             LineBreakStream = LineFeedStream;
    446         } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
    447             kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
    448             mGrepDriver->makeKernelCall(breakK, {decompressedBasisBits}, {LineBreakStream});
    449         } else {
    450             LineBreakStream = UnicodeLB;
    451         }
    452 
    453         std::map<std::string, StreamSetBuffer *> propertyStream;
    454         if (PropertyKernels) {
    455             for (auto p : mUnicodeProperties) {
    456                 auto name = p->getFullName();
    457                 StreamSetBuffer * s = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    458                 propertyStream.emplace(std::make_pair(name, s));
    459                 kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
    460                 mGrepDriver->makeKernelCall(propertyK, {decompressedBasisBits}, {s});
    461             }
    462         }
    463         StreamSetBuffer * GCB_stream = nullptr;
    464         if (anyGCB) {
    465             GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    466             kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
    467             mGrepDriver->makeKernelCall(gcbK, {decompressedBasisBits, RequiredStreams}, {GCB_stream});
    468         }
    469 
    470         for(unsigned i = 0; i < nREs; ++i) {
    471             std::vector<std::string> externalStreamNames;
    472             std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
    473             if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
    474                 externalStreamNames.push_back("UTF8_LB");
    475                 icgrepInputSets.push_back(LineBreakStream);
    476                 externalStreamNames.push_back("UTF8_nonfinal");
    477                 icgrepInputSets.push_back(RequiredStreams);
    478             }
    479             std::set<re::Name *> UnicodeProperties;
    480             if (PropertyKernels) {
    481                 re::gatherUnicodeProperties(mREs[i], UnicodeProperties);
    482                 for (auto p : UnicodeProperties) {
    483                     auto name = p->getFullName();
    484                     auto f = propertyStream.find(name);
    485                     if (f == propertyStream.end()) report_fatal_error(name + " not found\n");
    486                     externalStreamNames.push_back(name);
    487                     icgrepInputSets.push_back(f->second);
    488                 }
    489             }
    490             if (hasGCB[i]) {
    491                 externalStreamNames.push_back("\\b{g}");
    492                 icgrepInputSets.push_back(GCB_stream);
    493             }
    494             if (CC_Multiplexing) {
    495                 const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    496                 StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    497                 if (UnicodeSets.size() <= 1) {
    498                     kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
    499                     mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    500                     MatchResultsBufs[i] = MatchResults;
    501                 } else {
    502                     mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
    503                     mREs[i] = transformCCs(mpx.get(), mREs[i]);
    504                     std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    505                     auto numOfCharacterClasses = mpx_basis.size();
    506                     StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    507                     kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
    508                     mGrepDriver->makeKernelCall(ccK, {decompressedBasisBits}, {CharClasses});
    509                     //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
    510                     //                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {CharClasses});
    511                     kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
    512                     icgrepInputSets.push_back(CharClasses);
    513                     mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    514                     MatchResultsBufs[i] = MatchResults;
    515                 }
    516             } else {
    517                 StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    518                 kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
    519                 mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    520                 MatchResultsBufs[i] = MatchResults;
    521             }
    522         }
     286        MatchResultsBufs[i] = MatchResults;
    523287    }
    524288
     
    536300        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    537301    }
    538     if (InvertMatchFlag) {
    539         kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
    540         StreamSetBuffer * OriginalMatches = Matches;
    541         Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    542         mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
    543     }
     302
    544303    if (MaxCountFlag > 0) {
    545304        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
     
    622381    this->generateExtractAndDepositMarkers(iBuilder);
    623382
    624     StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    625 
    626     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
    627     Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    628     mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    629 
    630     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    631     Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
    632     mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
    633 
    634383    StreamSetBuffer * LineBreakStream;
    635384    StreamSetBuffer * Matches;
    636385    std::vector<re::RE*> res = {regex};
    637386    if (mEnableMultiplexing) {
    638         std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, matchCopiedBits);
     387        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res);
    639388    } else {
     389        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
     390        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
     391        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
     392        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
     393
     394        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     395        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
     396        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
     397
    640398        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedBits);
    641399    };
     
    702460    StreamSetBuffer * Matches;
    703461    std::vector<re::RE*> res = {regex};
    704     if (mEnableMultiplexing) {
    705         std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, matchCopiedbits);
    706     } else {
     462//    if (mEnableMultiplexing) {
     463//        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, matchCopiedbits);
     464//    } else {
    707465        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
    708     };
     466//    };
    709467
    710468    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.h

    r6040 r6043  
    2424    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(std::vector<re::RE *> &REs,
    2525                                                                                   parabix::StreamSetBuffer *decompressedBasisBits);
    26     std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> multiplexingGrepPipeline(std::vector<re::RE *> &REs,
    27                                                                                    parabix::StreamSetBuffer *matchCopiedBasisBits);
     26    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> multiplexingGrepPipeline(std::vector<re::RE *> &REs);
    2827
    2928
     
    5756    parabix::StreamSetBuffer * linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits);
    5857    parabix::StreamSetBuffer * linefeedStreamFromCompressedBits();
     58
     59    parabix::StreamSetBuffer * convertCompressedBitsStream(parabix::StreamSetBuffer* compressedBitStream, int numberOfStream, std::string prefix);
    5960};
    6061
Note: See TracChangeset for help on using the changeset viewer.