Changeset 6026


Ignore:
Timestamp:
May 10, 2018, 2:28:16 PM (3 months ago)
Author:
xwa163
Message:
  1. Implement SwizzledMultiplePDEPkernel with the same logic as new PDEPkernel, remove LZ4MultiplePDEPkernel, improve the performance
  2. Remove some unnecessary include
  3. Add prefix for some kernels
  4. Remove a legacy kernel
Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
4 deleted
15 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r6020 r6026  
    102102add_library(UCDlib UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    103103add_library(GrepEngine  ${GREP_CORE_SRC} grep/grep_engine.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/streams_merge.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp)
    104 add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp kernels/lz4/lz4_match_copy_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_multiple_pdep_kernel.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp lz4/LZ4GrepEngine.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp)
     104add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp lz4/LZ4GrepEngine.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/swizzled_multiple_pdep_kernel.cpp)
    105105
    106106
     
    126126add_executable(core combine/core.cpp combine/regexGen.cpp combine/stringGen.cpp combine/propGen.cpp combine/icgrep-test/icgrep-test.cpp grep_interface.cpp grep/grep_engine.cpp kernels/scanmatchgen.cpp kernels/u8u32_kernel.cpp kernels/delmask_kernel.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/grep_kernel.cpp kernels/until_n.cpp)
    127127add_executable(character_deletion character_deletion.cpp kernels/cc_kernel.cpp)
    128 add_executable(character_deposit character_deposit.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp kernels/lz4/lz4_multiple_pdep_kernel.cpp)
     128add_executable(character_deposit character_deposit.cpp kernels/cc_kernel.cpp kernels/pdep_kernel.cpp)
    129129add_executable(lz4d_ext_dep lz4d_ext_dep.cpp)
    130130add_executable(lz4_grep grep_interface.cpp util/file_select.cpp lz4_grep.cpp lz4/LZ4GrepGenerator.cpp)
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.cpp

    r6020 r6026  
    1515namespace kernel{
    1616
    17 LZ4BlockDecoderNewKernel::LZ4BlockDecoderNewKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
    18 : SegmentOrientedKernel("LZ4BlockDecoderNewKernel",
     17LZ4BlockDecoderNewKernel::LZ4BlockDecoderNewKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, std::string&& kernelName)
     18: SegmentOrientedKernel(std::string(kernelName),
    1919// Inputs
    2020{
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.h

    r5984 r6026  
    2020namespace kernel {
    2121
    22 class LZ4BlockDecoderNewKernel final : public SegmentOrientedKernel {
     22class LZ4BlockDecoderNewKernel : public SegmentOrientedKernel {
    2323public:
    24     LZ4BlockDecoderNewKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder);
     24    LZ4BlockDecoderNewKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, std::string&& kernelName = "LZ4BlockDecoderKernel");
    2525protected:
    2626    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) override;
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp

    r6020 r6026  
    374374}
    375375
    376 LZ4SwizzledMatchCopyKernel::LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned streamCount/*=4*/, unsigned streamSize/*=2*/, unsigned swizzleFactor/*=4*/, unsigned PDEP_width/*64*/)
    377 : SegmentOrientedKernel("LZ4SwizzledMatchCopyKernel",
     376LZ4SwizzledMatchCopyKernel::LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned streamCount, unsigned streamSize, unsigned swizzleFactor, unsigned PDEP_width, std::string name)
     377: SegmentOrientedKernel(std::move(name),
    378378// Inputs
    379379{
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.h

    r6020 r6026  
    1212    class LZ4SwizzledMatchCopyKernel: public SegmentOrientedKernel {
    1313    public:
    14         LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned streamCount, unsigned streamSize, unsigned swizzleFactor, unsigned PDEP_width = 64);
     14        LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned streamCount, unsigned streamSize, unsigned swizzleFactor, unsigned PDEP_width = 64, std::string name = "LZ4SwizzledMatchCopyKernel");
    1515    protected:
    1616        void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) override;
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r6003 r6026  
    150150    kb->SetInsertPoint(s2pDone);
    151151}
    152 
    153 S2PKernel::S2PKernel(const std::unique_ptr<KernelBuilder> & b, bool aligned)
    154 : MultiBlockKernel(aligned ? "s2p" : "s2p_unaligned",
     152S2PKernel::S2PKernel(const std::unique_ptr<KernelBuilder> & b, bool aligned, std::string prefix)
     153: MultiBlockKernel(aligned ? prefix + "s2p" : prefix + "s2p_unaligned",
    155154    {Binding{b->getStreamSetTy(1, 8), "byteStream", FixedRate(), Principal()}},
    156155    {Binding{b->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {}),
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r6002 r6026  
    99
    1010#include <pablo/pablo_kernel.h>
     11#include <string>
    1112
    1213namespace IDISA { class IDISA_Builder; }  // lines 14-14
     
    1718class S2PKernel final : public MultiBlockKernel {
    1819public:
    19     S2PKernel(const std::unique_ptr<kernel::KernelBuilder> & b, bool aligned = true);
     20    S2PKernel(const std::unique_ptr<kernel::KernelBuilder> & b, bool aligned = true, std::string prefix = "");
    2021    bool isCachable() const override { return true; }
    2122    bool hasSignature() const override { return false; }
  • icGREP/icgrep-devel/icgrep/kernels/swizzle.cpp

    r5985 r6026  
    66#include "swizzle.h"
    77#include <kernels/kernel_builder.h>
     8#include <string>
    89
    910using namespace llvm;
    1011
    1112namespace kernel {
    12 
    13 SwizzleGenerator::SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned outputSets, unsigned inputSets, unsigned fieldWidth)
    14 : BlockOrientedKernel("swizzle" + std::to_string(fieldWidth) + ":" + std::to_string(bitStreamCount), {}, {}, {}, {}, {})
     13SwizzleGenerator::SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned outputSets, unsigned inputSets, unsigned fieldWidth, std::string prefix)
     14: BlockOrientedKernel(prefix + "swizzle" + std::to_string(fieldWidth) + ":" + std::to_string(bitStreamCount) + "_" + std::to_string(outputSets) + "_" + std::to_string(inputSets) , {}, {}, {}, {}, {})
    1515, mBitStreamCount(bitStreamCount)
    1616, mFieldWidth(fieldWidth)
  • icGREP/icgrep-devel/icgrep/kernels/swizzle.h

    r5594 r6026  
    5252public:
    5353   
    54     SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned outputSets = 1, unsigned inputSets = 1, unsigned fieldWidth = 64);
     54    SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned outputSets = 1, unsigned inputSets = 1, unsigned fieldWidth = 64, std::string prefix = "");
    5555   
    5656protected:
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6020 r6026  
    55#include <boost/iostreams/device/mapped_file.hpp>
    66
    7 #include <llvm/Support/CommandLine.h>
    8 #include <llvm/Support/PrettyStackTrace.h>
    97
    108#include <cc/cc_compiler.h>
    119
    12 #include <lz4FrameDecoder.h>
    13 #include <kernels/streamset.h>
    1410#include <kernels/cc_kernel.h>
    1511#include <kernels/s2p_kernel.h>
     
    2218#include <kernels/swizzle.h>
    2319#include <kernels/pdep_kernel.h>
    24 #include <kernels/lz4/lz4_multiple_pdep_kernel.h>
    25 #include <kernels/lz4/lz4_match_copy_kernel.h>
     20#include <kernels/swizzled_multiple_pdep_kernel.h>
    2621#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
    2722#include <kernels/lz4/lz4_block_decoder.h>
     
    3429using namespace kernel;
    3530
    36 LZ4Generator::LZ4Generator():pxDriver("lz4d") {
     31LZ4Generator::LZ4Generator():mPxDriver("lz4d") {
    3732
    3833}
    3934
    4035MainFunctionType LZ4Generator::getMainFunc() {
    41     return reinterpret_cast<MainFunctionType>(pxDriver.getMain());
     36    return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
    4237}
    4338
     
    4540
    4641void LZ4Generator::generateExtractOnlyPipeline(const std::string& outputFile) {
    47     auto & iBuilder = pxDriver.getBuilder();
     42    auto & iBuilder = mPxDriver.getBuilder();
    4843    this->generateMainFunc(iBuilder);
    4944
    50     StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     45    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    5146
    5247    // GeneratePipeline
     
    6156
    6257    // Produce unswizzled bit streams
    63     StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    64     Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    65 
    66     pxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
    67 
    68 
    69     Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
    70     pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
     58    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     59    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     60
     61    mPxDriver.makeKernelCall(unSwizzleK, {swizzle.first, swizzle.second}, {extractedbits});
     62
     63
     64    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     65    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    7166
    7267    // --------------------------------------------------------
    7368    // End
    74     Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     69    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    7570
    7671    outK->setInitialArguments({iBuilder->GetString(outputFile)});
    77     pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    78 
    79     pxDriver.generatePipelineIR();
    80     pxDriver.deallocateBuffers();
     72    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     73
     74    mPxDriver.generatePipelineIR();
     75    mPxDriver.deallocateBuffers();
    8176
    8277    iBuilder->CreateRetVoid();
    8378
    84     pxDriver.finalizeObject();
     79    mPxDriver.finalizeObject();
    8580}
    8681
    8782void LZ4Generator::generateExtractAndDepositOnlyPipeline(const std::string &outputFile) {
    88     auto & iBuilder = pxDriver.getBuilder();
     83    auto & iBuilder = mPxDriver.getBuilder();
    8984    this->generateMainFunc(iBuilder);
    9085
    91     StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     86    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    9287
    9388    // GeneratePipeline
     
    9792    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    9893
    99     StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    100     StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    101 
    102     Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
    103     pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
     94    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     95    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     96
     97    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     98    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    10499
    105100    // Produce unswizzled bit streams
    106     StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    107     Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    108     pxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
    109 
    110     Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
    111     pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
     101    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     102    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     103    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {extractedbits});
     104
     105    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     106    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    112107
    113108    // --------------------------------------------------------
    114109    // End
    115     Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     110    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    116111    outK->setInitialArguments({iBuilder->GetString(outputFile)});
    117     pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    118 
    119     pxDriver.generatePipelineIR();
    120     pxDriver.deallocateBuffers();
     112    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     113
     114    mPxDriver.generatePipelineIR();
     115    mPxDriver.deallocateBuffers();
    121116
    122117    iBuilder->CreateRetVoid();
    123118
    124     pxDriver.finalizeObject();
     119    mPxDriver.finalizeObject();
    125120}
    126121
    127122void LZ4Generator::generatePipeline(const std::string& outputFile) {
    128     auto & iBuilder = pxDriver.getBuilder();
     123    auto & iBuilder = mPxDriver.getBuilder();
    129124    this->generateMainFunc(iBuilder);
    130125
    131     StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     126    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    132127
    133128    // GeneratePipeline
     
    137132    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    138133
    139     StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    140     StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    141 
    142     Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
    143     pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    144 
    145 
    146     StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    147     StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    148 
    149     Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    150     pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     134    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     135    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     136
     137    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     138    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
     139
     140    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     141    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     142
     143    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     144    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    151145
    152146
    153147    // Produce unswizzled bit streams
    154     StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    155     Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    156     pxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
    157 
    158 
    159     Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
    160     pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
     148    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     149    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     150    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
     151
     152
     153    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     154    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    161155
    162156    // --------------------------------------------------------
    163157    // End
    164     Kernel * outK = pxDriver.addKernelInstance<FileSink>(iBuilder, 8);
     158    Kernel * outK = mPxDriver.addKernelInstance<FileSink>(iBuilder, 8);
    165159    outK->setInitialArguments({iBuilder->GetString(outputFile)});
    166     pxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
    167 
    168     pxDriver.generatePipelineIR();
    169     pxDriver.deallocateBuffers();
     160    mPxDriver.makeKernelCall(outK, {DecompressedByteStream}, {});
     161
     162    mPxDriver.generatePipelineIR();
     163    mPxDriver.deallocateBuffers();
    170164
    171165    iBuilder->CreateRetVoid();
    172166
    173     pxDriver.finalizeObject();
     167    mPxDriver.finalizeObject();
    174168}
    175169
     
    184178    main->setCallingConv(CallingConv::C);
    185179    Function::arg_iterator args = main->arg_begin();
    186     inputStream = &*(args++);
    187     inputStream->setName("input");
    188 
    189     headerSize = &*(args++);
    190     headerSize->setName("headerSize");
    191 
    192     fileSize = &*(args++);
    193     fileSize->setName("fileSize");
    194 
    195     hasBlockChecksum = &*(args++);
    196     hasBlockChecksum->setName("hasBlockChecksum");
     180    mInputStream = &*(args++);
     181    mInputStream->setName("input");
     182
     183    mHeaderSize = &*(args++);
     184    mHeaderSize->setName("mHeaderSize");
     185
     186    mFileSize = &*(args++);
     187    mFileSize->setName("mFileSize");
     188
     189    mHasBlockChecksum = &*(args++);
     190    mHasBlockChecksum->setName("mHasBlockChecksum");
    197191    // TODO for now, we do not handle blockCheckSum
    198     hasBlockChecksum = iBuilder->getInt1(false);
     192    mHasBlockChecksum = iBuilder->getInt1(false);
    199193
    200194    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
     
    202196
    203197void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    204     ByteStream = pxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    205     BasisBits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
    206 
    207     kernel::Kernel * sourceK = pxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
    208     sourceK->setInitialArguments({inputStream, fileSize});
    209     pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    210     Kernel * s2pk = pxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
     198    mCompressedByteStream = mPxDriver.addBuffer<SourceBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
     199    mCompressedBasisBits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
     200
     201    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
     202    sourceK->setInitialArguments({mInputStream, mFileSize});
     203    mPxDriver.makeKernelCall(sourceK, {}, {mCompressedByteStream});
     204    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true);
    211205//    s2pk->addAttribute(MustConsumeAll());
    212     pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     206    mPxDriver.makeKernelCall(s2pk, {mCompressedByteStream}, {mCompressedBasisBits});
    213207}
    214208
    215209void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    216210    //// Decode Block Information
    217     StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
    218     StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    219     StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     211    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
     212    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     213    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    220214
    221215    //// Generate Helper Markers Extenders, FX, XF
    222     StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    223     MatchOffsetMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     216    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
     217    mMatchOffsetMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    224218        // FX and XF streams will be added to IndexBuilderKernel in the future
    225 //    StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    226 //    StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    227 
    228     Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
     219//    StreamSetBuffer * const CC_0xFX = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     220//    StreamSetBuffer * const CC_0xXF = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     221
     222    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    229223//    extenderK->addAttribute(MustConsumeAll());
    230     pxDriver.makeKernelCall(extenderK, {BasisBits}, {Extenders});
    231 
    232 
    233     Kernel * blockDecoderK = pxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
    234     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize, fileSize});
    235     pxDriver.makeKernelCall(blockDecoderK, {ByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
     224    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     225
     226
     227    Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
     228    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(mHasBlockChecksum, iBuilder->getInt1Ty()), mHeaderSize, mFileSize});
     229    mPxDriver.makeKernelCall(blockDecoderK, {mCompressedByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    236230
    237231//    re::CC* xfCC = re::makeCC(0x0f);
     
    242236//    }
    243237
    244 //    Kernel * CC_0xFXKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
    245 //    pxDriver.makeKernelCall(CC_0xFXKernel, {BasisBits}, {CC_0xFX});
    246 
    247 //    Kernel * CC_0xXFKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
    248 //    pxDriver.makeKernelCall(CC_0xXFKernel, {BasisBits}, {CC_0xXF});
     238//    Kernel * CC_0xFXKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
     239//    mPxDriver.makeKernelCall(CC_0xFXKernel, {mCompressedBasisBits}, {CC_0xFX});
     240
     241//    Kernel * CC_0xXFKernel = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
     242//    mPxDriver.makeKernelCall(CC_0xXFKernel, {mCompressedBasisBits}, {CC_0xXF});
    249243
    250244    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
    251245
    252246    //TODO handle uncompressed part
    253     StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    254     StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    255     StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    256 
    257     DeletionMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    258     M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    259     DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    260 
    261     Kernel* Lz4IndexBuilderK = pxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
    262     Lz4IndexBuilderK->setInitialArguments({fileSize});
    263     pxDriver.makeKernelCall(
     247    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     248    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     249    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     250
     251    mDeletionMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     252    mM0Marker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     253    mDepositMarker = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     254
     255    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
     256    Lz4IndexBuilderK->setInitialArguments({mFileSize});
     257    mPxDriver.makeKernelCall(
    264258            Lz4IndexBuilderK,
    265259            {
    266                     ByteStream,
     260                    mCompressedByteStream,
    267261                    Extenders,
    268262//                    CC_0xFX,
     
    279273                    UncompressedOutputPos,
    280274
    281                     DeletionMarker,
    282                     M0Marker,
    283                     MatchOffsetMarker
     275                    mDeletionMarker,
     276                    mM0Marker,
     277                    mMatchOffsetMarker
    284278            });
    285279
    286     Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
    287     pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker});
     280    Kernel * generateDepositK = mPxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
     281    mPxDriver.makeKernelCall(generateDepositK, {mM0Marker}, {mDepositMarker});
    288282
    289283}
    290284
    291285std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    292     StreamSetBuffer * u16Swizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    293     StreamSetBuffer * u16Swizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    294 
    295     Kernel * delK = pxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
    296     pxDriver.makeKernelCall(delK, {DeletionMarker, BasisBits}, {u16Swizzle0, u16Swizzle1});
     286    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     287    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     288
     289    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
     290    mPxDriver.makeKernelCall(delK, {mDeletionMarker, mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
    297291    return std::make_pair(u16Swizzle0, u16Swizzle1);
    298292}
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.h

    r6020 r6026  
    4646    //// Data Member
    4747    // Driver
    48     ParabixDriver pxDriver;
     48    ParabixDriver mPxDriver;
    4949
    5050    // Runtime Arguments
    51     llvm::Value * inputStream;
    52     llvm::Value * headerSize;
    53     llvm::Value * fileSize;
    54     llvm::Value * hasBlockChecksum;
     51    llvm::Value * mInputStream;
     52    llvm::Value * mHeaderSize;
     53    llvm::Value * mFileSize;
     54    llvm::Value * mHasBlockChecksum;
    5555
    5656
    5757    // StreamSetBuffers
    58     parabix::StreamSetBuffer * ByteStream;
    59     parabix::StreamSetBuffer * BasisBits;
    60     parabix::StreamSetBuffer * DeletionMarker; //TODO rename to ExtarctMarker
    61     parabix::StreamSetBuffer * DepositMarker;
    62     parabix::StreamSetBuffer * MatchOffsetMarker;
     58    parabix::StreamSetBuffer * mCompressedByteStream;
     59    parabix::StreamSetBuffer * mCompressedBasisBits;
     60    parabix::StreamSetBuffer * mDeletionMarker; //TODO rename to ExtarctMarker
     61    parabix::StreamSetBuffer * mDepositMarker;
     62    parabix::StreamSetBuffer * mMatchOffsetMarker;
    6363
    6464    // M0CountMarker will not contain anything, it will only be used to pass producedItemCount and manage processedItemCount between different kernel
    65     parabix::StreamSetBuffer * M0Marker;
     65    parabix::StreamSetBuffer * mM0Marker;
    6666};
    6767
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6020 r6026  
    22#include "LZ4GrepGenerator.h"
    33
    4 #include <boost/filesystem.hpp>
    54#include <boost/iostreams/device/mapped_file.hpp>
    65
    7 #include <llvm/Support/CommandLine.h>
    86#include <llvm/Support/PrettyStackTrace.h>
    97
    108#include <cc/cc_compiler.h>
    119
    12 #include <lz4FrameDecoder.h>
    13 #include <kernels/streamset.h>
    1410#include <kernels/cc_kernel.h>
    1511#include <kernels/s2p_kernel.h>
     
    2218#include <kernels/swizzle.h>
    2319#include <kernels/pdep_kernel.h>
    24 #include <kernels/lz4/lz4_multiple_pdep_kernel.h>
    25 #include <kernels/lz4/lz4_match_copy_kernel.h>
     20#include <kernels/swizzled_multiple_pdep_kernel.h>
    2621#include <kernels/lz4/lz4_swizzled_match_copy_kernel.h>
    2722#include <re/re_toolchain.h>
     
    3025#include <re/replaceCC.h>
    3126
    32 #include <set>
    33 #include "grep/grep_engine.h"
    34 #include "grep_interface.h"
    35 #include <llvm/IR/Module.h>
    36 #include <boost/filesystem.hpp>
    3727#include <UCD/resolve_properties.h>
    3828#include <kernels/charclasses.h>
    39 #include <kernels/cc_kernel.h>
    4029#include <kernels/grep_kernel.h>
    4130#include <kernels/UCD_property_kernel.h>
     
    4332#include <kernels/linebreak_kernel.h>
    4433#include <kernels/streams_merge.h>
    45 #include <kernels/source_kernel.h>
    46 #include <kernels/s2p_kernel.h>
    4734#include <kernels/scanmatchgen.h>
    48 #include <kernels/streamset.h>
    4935#include <kernels/until_n.h>
    50 #include <kernels/kernel_builder.h>
    51 #include <pablo/pablo_kernel.h>
    52 #include <re/re_cc.h>
    53 #include <re/re_name.h>
    5436#include <re/casing.h>
    5537#include <re/exclude_CC.h>
    5638#include <re/to_utf8.h>
    57 #include <re/re_toolchain.h>
    58 #include <toolchain/toolchain.h>
    5939#include <re/re_analysis.h>
    6040#include <re/re_name_resolve.h>
     
    6444#include <re/grapheme_clusters.h>
    6545#include <re/printer_re.h>
    66 #include <toolchain/toolchain.h>
    67 #include <toolchain/cpudriver.h>
    68 #include <iostream>
    69 #include <cc/multiplex_CCs.h>
    7046#include <llvm/Support/raw_ostream.h>
    71 #include <util/aligned_allocator.h>
    72 #include <sys/stat.h>
    73 #include <fcntl.h>
    74 #include <errno.h>
    75 #include <llvm/ADT/STLExtras.h> // for make_unique
    76 #include <llvm/Support/CommandLine.h>
    7747#include <llvm/Support/Debug.h>
    78 #include <sched.h>
    79 #include <cstdio>
    80 #include <cc/multiplex_CCs.h>
    8148
    8249
     
    9259
    9360
    94 LZ4GrepGenerator::LZ4GrepGenerator(): LZ4Generator() {
     61LZ4GrepGenerator::LZ4GrepGenerator(bool enableMultiplexing): LZ4Generator(), mEnableMultiplexing(enableMultiplexing) {
    9562    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
    9663    mMoveMatchesToEOL = true;
     
    12794
    12895
    129 
    130 std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
    131         std::vector<re::RE *> &REs, parabix::StreamSetBuffer *BasisBits) {
     96parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
     97//    auto mGrepDriver = &mPxDriver;
     98    const unsigned baseBufferSize = this->getInputBufferBlocks();
     99    auto & idb = mPxDriver.getBuilder();
     100    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     101    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
     102    mPxDriver.makeKernelCall(linefeedK, {decompressedBasisBits}, {LineFeedStream});
     103    return LineFeedStream;
     104}
     105
     106
     107parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
     108    // TODO for now, swizzled form for <1 * i1> input stream is not well defined, so we can not use this pipeline
     109    auto mGrepDriver = &mPxDriver;
     110    const unsigned baseBufferSize = this->getInputBufferBlocks();
     111    auto & idb = mGrepDriver->getBuilder();
     112
     113    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     114    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
     115    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
     116
     117    // Extract (Deletion)
     118    StreamSetBuffer * deletedLineFeedSwizzled = mPxDriver.addBuffer<CircularCopybackBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     119    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 1, 64);
     120    mPxDriver.makeKernelCall(delK, {mDeletionMarker, CompressedLineFeedStream}, {deletedLineFeedSwizzled});
     121
     122    // TODO incomplete
     123    // Deposit
     124    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     125//    Kernel * multiplePdepK = mPxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(idb, 4, 1, 4, 64, "lineFeedMultiplePDEP");
     126//    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, deletedLineFeedSwizzled}, {depositedSwizzle0});
     127
     128
     129    // Match Copy
     130    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     131    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4, 64, "lineFeedSwizzledMatchCopy");
     132    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
     133
     134    // Unswizzled
     135    StreamSetBuffer * lineFeedStream = mPxDriver.addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1), this->getDecompressedBufferBlocks());
     136    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 1, 1, 1, 64, "linefeed");
     137    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0}, {lineFeedStream});
     138
     139    return lineFeedStream;
     140}
     141
     142
     143std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, parabix::StreamSetBuffer *matchCopiedBasisBits) {
    132144
    133145    this->initREs(REs);
    134     auto mGrepDriver = &pxDriver;
     146    auto mGrepDriver = &mPxDriver;
    135147
    136148
     
    145157
    146158
    147 
    148 
    149159    //  Regular Expression Processing and Analysis Phase
    150160    const auto nREs = mREs.size();
     
    170180    if (isSimple && byteTestsWithinLimit(mREs[0], ByteCClimit)) {
    171181        std::vector<std::string> externalStreamNames;
    172         std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
     182        std::vector<StreamSetBuffer *> icgrepInputSets = {mCompressedByteStream};
    173183        if (MultithreadedSimpleRE && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
    174184            auto CCs = re::collectCCs(prefixRE, &cc::Byte);
     
    179189                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    180190                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    181                 mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
     191                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {ccStream});
    182192                externalStreamNames.push_back(ccNameStr);
    183193                icgrepInputSets.push_back(ccStream);
     
    189199        MatchResultsBufs[0] = MatchResults;
    190200        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    191         mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
     201        mGrepDriver->makeKernelCall(breakK, {mCompressedByteStream}, {LineBreakStream});
    192202    } else if (isSimple && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
    193203        std::vector<std::string> externalStreamNames;
    194         std::vector<StreamSetBuffer *> icgrepInputSets = {ByteStream};
     204        std::vector<StreamSetBuffer *> icgrepInputSets = {mCompressedByteStream};
    195205        if (MultithreadedSimpleRE) {
    196206            auto CCs = re::collectCCs(prefixRE, &cc::Byte);
     
    201211                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    202212                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
    203                 mGrepDriver->makeKernelCall(ccK, {ByteStream}, {ccStream});
     213                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {ccStream});
    204214                externalStreamNames.push_back(ccNameStr);
    205215                icgrepInputSets.push_back(ccStream);
     
    211221        MatchResultsBufs[0] = MatchResults;
    212222        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    213         mGrepDriver->makeKernelCall(breakK, {ByteStream}, {LineBreakStream});
     223        mGrepDriver->makeKernelCall(breakK, {mCompressedByteStream}, {LineBreakStream});
    214224    } else {
    215225        StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    216226        StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    217227
    218         StreamSetBuffer * LineFeedStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    219         kernel::Kernel * linefeedK = mGrepDriver->addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
    220         mGrepDriver->makeKernelCall(linefeedK, {BasisBits}, {LineFeedStream});
     228
     229        StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(matchCopiedBasisBits);
     230//        StreamSetBuffer * LineFeedStream = this->linefeedStreamFromCompressedBits();
    221231
    222232        kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
    223         mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
     233        mGrepDriver->makeKernelCall(requiredStreamsK, {matchCopiedBasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
    224234
    225235        if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
     
    227237        } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
    228238            kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
    229             mGrepDriver->makeKernelCall(breakK, {BasisBits}, {LineBreakStream});
     239            mGrepDriver->makeKernelCall(breakK, {matchCopiedBasisBits}, {LineBreakStream});
    230240        } else {
    231241            LineBreakStream = UnicodeLB;
     
    239249                propertyStream.emplace(std::make_pair(name, s));
    240250                kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
    241                 mGrepDriver->makeKernelCall(propertyK, {BasisBits}, {s});
     251                mGrepDriver->makeKernelCall(propertyK, {matchCopiedBasisBits}, {s});
    242252            }
    243253        }
     
    246256            GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    247257            kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
    248             mGrepDriver->makeKernelCall(gcbK, {BasisBits, RequiredStreams}, {GCB_stream});
     258            mGrepDriver->makeKernelCall(gcbK, {matchCopiedBasisBits, RequiredStreams}, {GCB_stream});
    249259        }
    250260
    251261        for(unsigned i = 0; i < nREs; ++i) {
    252262            std::vector<std::string> externalStreamNames;
    253             std::vector<StreamSetBuffer *> icgrepInputSets = {BasisBits};
     263            std::vector<StreamSetBuffer *> icgrepInputSets = {matchCopiedBasisBits};
    254264            if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
    255265                externalStreamNames.push_back("UTF8_LB");
     
    287297                    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    288298                    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
    289                     mGrepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
     299                    mGrepDriver->makeKernelCall(ccK, {matchCopiedBasisBits}, {CharClasses});
    290300                    //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
    291                     //                mGrepDriver->makeKernelCall(ccK, {ByteStream}, {CharClasses});
     301                    //                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {CharClasses});
    292302                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
    293303                    icgrepInputSets.push_back(CharClasses);
     
    332342
    333343    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
     344};
     345
     346std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
     347        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
     348
     349    this->initREs(REs);
     350    auto mGrepDriver = &mPxDriver;
     351
     352
     353    auto & idb = mGrepDriver->getBuilder();
     354    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
     355    const unsigned baseBufferSize = this->getInputBufferBlocks();
     356    bool MultithreadedSimpleRE = false;
     357    bool PropertyKernels = false;
     358    bool CC_Multiplexing = false;
     359    bool InvertMatchFlag = false;
     360    int MaxCountFlag = 0;
     361
     362
     363
     364
     365    //  Regular Expression Processing and Analysis Phase
     366    const auto nREs = mREs.size();
     367    bool hasGCB[nREs];
     368    bool anyGCB = false;
     369
     370    for(unsigned i = 0; i < nREs; ++i) {
     371        hasGCB[i] = hasGraphemeClusterBoundary(mREs[i]);
     372        anyGCB |= hasGCB[i];
     373    }
     374    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     375    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
     376
     377    re::RE * prefixRE;
     378    re::RE * suffixRE;
     379    // For simple regular expressions with a small number of characters, we
     380    // can bypass transposition and use the Direct CC compiler.
     381//    bool isSimple = (nREs == 1) && (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
     382    bool isSimple = false;
     383    if (isSimple) {
     384        mREs[0] = toUTF8(mREs[0]);
     385    }
     386    if (isSimple && byteTestsWithinLimit(mREs[0], ByteCClimit)) {
     387        std::vector<std::string> externalStreamNames;
     388        std::vector<StreamSetBuffer *> icgrepInputSets = {mCompressedByteStream};
     389        if (MultithreadedSimpleRE && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
     390            auto CCs = re::collectCCs(prefixRE, &cc::Byte);
     391            for (auto cc : CCs) {
     392                auto ccName = makeName(cc);
     393                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
     394                std::string ccNameStr = ccName->getFullName();
     395                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     396                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
     397                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {ccStream});
     398                externalStreamNames.push_back(ccNameStr);
     399                icgrepInputSets.push_back(ccStream);
     400            }
     401        }
     402        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     403        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mREs[0], externalStreamNames);
     404        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     405        MatchResultsBufs[0] = MatchResults;
     406        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
     407        mGrepDriver->makeKernelCall(breakK, {mCompressedByteStream}, {LineBreakStream});
     408    } else if (isSimple && hasTriCCwithinLimit(mREs[0], ByteCClimit, prefixRE, suffixRE)) {
     409        std::vector<std::string> externalStreamNames;
     410        std::vector<StreamSetBuffer *> icgrepInputSets = {mCompressedByteStream};
     411        if (MultithreadedSimpleRE) {
     412            auto CCs = re::collectCCs(prefixRE, &cc::Byte);
     413            for (auto cc : CCs) {
     414                auto ccName = makeName(cc);
     415                mREs[0] = re::replaceCC(mREs[0], cc, ccName);
     416                std::string ccNameStr = ccName->getFullName();
     417                StreamSetBuffer * ccStream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     418                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, ccNameStr, std::vector<re::CC *>{cc});
     419                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {ccStream});
     420                externalStreamNames.push_back(ccNameStr);
     421                icgrepInputSets.push_back(ccStream);
     422            }
     423        }
     424        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     425        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteBitGrepKernel>(idb, prefixRE, suffixRE, externalStreamNames);
     426        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     427        MatchResultsBufs[0] = MatchResults;
     428        kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
     429        mGrepDriver->makeKernelCall(breakK, {mCompressedByteStream}, {LineBreakStream});
     430    } else {
     431        StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     432        StreamSetBuffer * UnicodeLB = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     433
     434        StreamSetBuffer * LineFeedStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
     435
     436        kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance<kernel::RequiredStreams_UTF8>(idb);
     437        mGrepDriver->makeKernelCall(requiredStreamsK, {decompressedBasisBits, LineFeedStream}, {RequiredStreams, UnicodeLB});
     438
     439        if (mGrepRecordBreak == GrepRecordBreakKind::LF) {
     440            LineBreakStream = LineFeedStream;
     441        } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
     442            kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::ParabixCharacterClassKernelBuilder>(idb, "Null", std::vector<re::CC *>{mBreakCC}, 8);
     443            mGrepDriver->makeKernelCall(breakK, {decompressedBasisBits}, {LineBreakStream});
     444        } else {
     445            LineBreakStream = UnicodeLB;
     446        }
     447
     448        std::map<std::string, StreamSetBuffer *> propertyStream;
     449        if (PropertyKernels) {
     450            for (auto p : mUnicodeProperties) {
     451                auto name = p->getFullName();
     452                StreamSetBuffer * s = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     453                propertyStream.emplace(std::make_pair(name, s));
     454                kernel::Kernel * propertyK = mGrepDriver->addKernelInstance<kernel::UnicodePropertyKernelBuilder>(idb, p);
     455                mGrepDriver->makeKernelCall(propertyK, {decompressedBasisBits}, {s});
     456            }
     457        }
     458        StreamSetBuffer * GCB_stream = nullptr;
     459        if (anyGCB) {
     460            GCB_stream = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     461            kernel::Kernel * gcbK = mGrepDriver->addKernelInstance<kernel::GraphemeClusterBreakKernel>(idb);
     462            mGrepDriver->makeKernelCall(gcbK, {decompressedBasisBits, RequiredStreams}, {GCB_stream});
     463        }
     464
     465        for(unsigned i = 0; i < nREs; ++i) {
     466            std::vector<std::string> externalStreamNames;
     467            std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
     468            if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
     469                externalStreamNames.push_back("UTF8_LB");
     470                icgrepInputSets.push_back(LineBreakStream);
     471                externalStreamNames.push_back("UTF8_nonfinal");
     472                icgrepInputSets.push_back(RequiredStreams);
     473            }
     474            std::set<re::Name *> UnicodeProperties;
     475            if (PropertyKernels) {
     476                re::gatherUnicodeProperties(mREs[i], UnicodeProperties);
     477                for (auto p : UnicodeProperties) {
     478                    auto name = p->getFullName();
     479                    auto f = propertyStream.find(name);
     480                    if (f == propertyStream.end()) report_fatal_error(name + " not found\n");
     481                    externalStreamNames.push_back(name);
     482                    icgrepInputSets.push_back(f->second);
     483                }
     484            }
     485            if (hasGCB[i]) {
     486                externalStreamNames.push_back("\\b{g}");
     487                icgrepInputSets.push_back(GCB_stream);
     488            }
     489            if (CC_Multiplexing) {
     490                const auto UnicodeSets = re::collectCCs(mREs[i], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
     491                StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     492                if (UnicodeSets.size() <= 1) {
     493                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
     494                    mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     495                    MatchResultsBufs[i] = MatchResults;
     496                } else {
     497                    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
     498                    mREs[i] = transformCCs(mpx.get(), mREs[i]);
     499                    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
     500                    auto numOfCharacterClasses = mpx_basis.size();
     501                    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     502                    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
     503                    mGrepDriver->makeKernelCall(ccK, {decompressedBasisBits}, {CharClasses});
     504                    //                kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), true);
     505                    //                mGrepDriver->makeKernelCall(ccK, {mCompressedByteStream}, {CharClasses});
     506                    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
     507                    icgrepInputSets.push_back(CharClasses);
     508                    mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     509                    MatchResultsBufs[i] = MatchResults;
     510                }
     511            } else {
     512                StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     513                kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
     514                mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     515                MatchResultsBufs[i] = MatchResults;
     516            }
     517        }
     518    }
     519
     520    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
     521    if (mREs.size() > 1) {
     522        MergedResults = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     523        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
     524        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     525    }
     526    StreamSetBuffer * Matches = MergedResults;
     527    if (mMoveMatchesToEOL) {
     528        StreamSetBuffer * OriginalMatches = Matches;
     529        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
     530        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     531        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
     532    }
     533    if (InvertMatchFlag) {
     534        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
     535        StreamSetBuffer * OriginalMatches = Matches;
     536        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     537        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
     538    }
     539    if (MaxCountFlag > 0) {
     540        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
     541        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
     542        StreamSetBuffer * const AllMatches = Matches;
     543        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     544        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
     545    }
     546
     547    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
    334548
    335549}
     
    348562
    349563void LZ4GrepGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
    350     auto & iBuilder = pxDriver.getBuilder();
     564    auto & iBuilder = mPxDriver.getBuilder();
    351565    this->generateScanMatchMainFunc(iBuilder);
    352566
    353     StreamSetBuffer * const DecompressedByteStream = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     567    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
    354568
    355569    // GeneratePipeline
     
    359573    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    360574
    361     //TODO buffer blocks should be decompressedBufferBlocks
    362     StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    363     StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    364 
    365     Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
    366     pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    367 
    368     StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    369     StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    370 
    371     Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    372     pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     575    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     576    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     577
     578    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     579    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
     580
     581    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     582    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     583
     584    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     585    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    373586
    374587
    375588    // Produce unswizzled bit streams
    376     StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    377     Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    378     pxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
    379 
    380 
    381 
    382     Kernel * p2sK = pxDriver.addKernelInstance<P2SKernel>(iBuilder);
    383     pxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
     589    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     590    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     591    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
     592
     593
     594
     595    Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(iBuilder);
     596    mPxDriver.makeKernelCall(p2sK, {extractedbits}, {DecompressedByteStream});
    384597
    385598    StreamSetBuffer * LineBreakStream;
     
    388601    std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
    389602
    390     kernel::Kernel * scanMatchK = pxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
     603    kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
    391604    scanMatchK->setInitialArguments({match_accumulator});
    392     pxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
    393     pxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
    394     pxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
    395 
    396     pxDriver.generatePipelineIR();
    397     pxDriver.deallocateBuffers();
     605    mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, DecompressedByteStream}, {});
     606    mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
     607    mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
     608
     609    mPxDriver.generatePipelineIR();
     610    mPxDriver.deallocateBuffers();
    398611
    399612    iBuilder->CreateRetVoid();
    400613
    401     pxDriver.finalizeObject();
     614    mPxDriver.finalizeObject();
    402615}
    403616
    404617void LZ4GrepGenerator::generateCountOnlyGrepPipeline(re::RE* regex) {
    405     auto & iBuilder = pxDriver.getBuilder();
     618    auto & iBuilder = mPxDriver.getBuilder();
    406619    this->generateMainFunc(iBuilder);
    407620
     
    413626    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    414627
    415     StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    416     StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    417 
    418     Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
    419     pxDriver.makeKernelCall(multiplePdepK, {DepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    420 
    421 
    422     StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    423     StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    424 
    425     Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    426     pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     628    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     629    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     630
     631    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     632    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
     633
     634
     635    // split PDEP into 2 kernel will be a little slower in single thread environment
     636/*
     637    Kernel * pdep1 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
     638    mPxDriver.makeKernelCall(pdep1, {mDepositMarker, swizzle.first}, {depositedSwizzle0});
     639
     640    Kernel * pdep2 = mPxDriver.addKernelInstance<PDEPkernel>(iBuilder, 4);
     641    mPxDriver.makeKernelCall(pdep2, {mDepositMarker, swizzle.second}, {depositedSwizzle1});
     642*/
     643
     644    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     645    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     646
     647    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     648    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    427649
    428650    // Produce unswizzled bit streams
    429     StreamSetBuffer * extractedbits = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    430     Kernel * unSwizzleK = pxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    431     pxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
     651    StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     652    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     653    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
    432654
    433655    StreamSetBuffer * LineBreakStream;
    434656    StreamSetBuffer * Matches;
    435657    std::vector<re::RE*> res = {regex};
    436     std::tie(LineBreakStream, Matches) = grepPipeline(res, extractedbits);
    437 
    438 
    439     kernel::Kernel * matchCountK = pxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
    440     pxDriver.makeKernelCall(matchCountK, {Matches}, {});
    441     pxDriver.generatePipelineIR();
     658    if (mEnableMultiplexing) {
     659        std::tie(LineBreakStream, Matches) = multiplexingGrepPipeline(res, matchCopiedbits);
     660    } else {
     661        std::tie(LineBreakStream, Matches) = grepPipeline(res, matchCopiedbits);
     662    };
     663
     664
     665
     666    kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
     667    mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
     668    mPxDriver.generatePipelineIR();
    442669
    443670
     
    447674    iBuilder->CallPrintInt("aaa", matchedLineCount);
    448675
    449     pxDriver.deallocateBuffers();
     676    mPxDriver.deallocateBuffers();
    450677
    451678    // TODO return matchedLineCount
     
    455682    iBuilder->CreateRetVoid();
    456683
    457     pxDriver.finalizeObject();
     684    mPxDriver.finalizeObject();
    458685}
    459686
    460687ScanMatchGrepMainFunctionType LZ4GrepGenerator::getScanMatchGrepMainFunction() {
    461     return reinterpret_cast<ScanMatchGrepMainFunctionType>(pxDriver.getMain());
     688    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
    462689}
    463690
     
    473700    main->setCallingConv(CallingConv::C);
    474701    Function::arg_iterator args = main->arg_begin();
    475     inputStream = &*(args++);
    476     inputStream->setName("input");
    477 
    478     headerSize = &*(args++);
    479     headerSize->setName("headerSize");
    480 
    481     fileSize = &*(args++);
    482     fileSize->setName("fileSize");
    483 
    484     hasBlockChecksum = &*(args++);
    485     hasBlockChecksum->setName("hasBlockChecksum");
     702    mInputStream = &*(args++);
     703    mInputStream->setName("input");
     704
     705    mHeaderSize = &*(args++);
     706    mHeaderSize->setName("mHeaderSize");
     707
     708    mFileSize = &*(args++);
     709    mFileSize->setName("mFileSize");
     710
     711    mHasBlockChecksum = &*(args++);
     712    mHasBlockChecksum->setName("mHasBlockChecksum");
    486713
    487714    match_accumulator = &*(args++);
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.h

    r6020 r6026  
    1717class LZ4GrepGenerator : public LZ4Generator{
    1818public:
    19     LZ4GrepGenerator();
     19    LZ4GrepGenerator(bool enableMultiplexing = false);
    2020    void generateCountOnlyGrepPipeline(re::RE* regex);
    2121    void generateScanMatchGrepPipeline(re::RE* regex);
    2222    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(std::vector<re::RE *> &REs,
    23                                                                                    parabix::StreamSetBuffer *ByteStream);
     23                                                                                   parabix::StreamSetBuffer *decompressedBasisBits);
     24    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> multiplexingGrepPipeline(std::vector<re::RE *> &REs,
     25                                                                                   parabix::StreamSetBuffer *matchCopiedBasisBits);
     26
    2427
    2528    void invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum);
    2629
    2730private:
     31    bool mEnableMultiplexing;
     32
    2833    grep::GrepRecordBreakKind mGrepRecordBreak;
    2934    void initREs(std::vector<re::RE *> & REs);
     
    4752
    4853    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
     54
     55    parabix::StreamSetBuffer * linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits);
     56    parabix::StreamSetBuffer * linefeedStreamFromCompressedBits();
    4957};
    5058
  • icGREP/icgrep-devel/icgrep/lz4_grep.cpp

    r6008 r6026  
    2828
    2929
    30 #include <cstdio>
    31 #include <vector>
    32 #include <llvm/Support/CommandLine.h>
    33 #include <llvm/Support/ErrorHandling.h>
    34 #include <llvm/Support/Signals.h>
    35 #include <llvm/Support/raw_ostream.h>
    3630#include <re/re_alt.h>
    37 #include <re/re_seq.h>
    3831#include <re/re_start.h>
    3932#include <re/re_end.h>
    40 #include <re/parsers/parser.h>
    4133#include <re/re_utility.h>
    42 #include <grep/grep_engine.h>
    43 #include <grep_interface.h>
    44 #include <fstream>
    45 #include <string>
    4634#include <re/re_toolchain.h>
    4735#include <pablo/pablo_toolchain.h>
    48 #include <boost/filesystem.hpp>
    49 #include <iostream> // MEEE
    5036
    5137
     
    6046static cl::opt<std::string> inputFile(cl::Positional, cl::desc("<input file>"), cl::Required, cl::cat(lz4GrepFlags));
    6147static cl::opt<bool> countOnly("count-only", cl::desc("Only count the match result"), cl::init(false), cl::cat(lz4GrepFlags));
     48static cl::opt<bool> enableMultiplexing("enable-multiplexing", cl::desc("Enable CC multiplexing."), cl::init(false), cl::cat(lz4GrepFlags));
     49
     50
    6251//static cl::opt<std::string> outputFile(cl::Positional, cl::desc("<output file>"), cl::Required, cl::cat(lz4GrepFlags));
    6352//static cl::opt<bool> overwriteOutput("f", cl::desc("Overwrite existing output file."), cl::init(false), cl::cat(lz4GrepFlags));
     
    9079    char *fileBuffer = const_cast<char *>(mappedFile.data());
    9180    re::RE * re_ast = re::RE_Parser::parse(regexString, re::MULTILINE_MODE_FLAG);
    92     LZ4GrepGenerator g;
     81    LZ4GrepGenerator g(enableMultiplexing);
    9382    if (countOnly) {
    9483        g.generateCountOnlyGrepPipeline(re_ast);
  • icGREP/icgrep-devel/icgrep/lz4d.cpp

    r6008 r6026  
    77
    88#include <llvm/IR/Module.h>
    9 #include <llvm/IR/Function.h>
    109#include <llvm/Linker/Linker.h>
    11 #include <llvm/Support/CommandLine.h>
    1210#include <llvm/Support/PrettyStackTrace.h>
    1311#include <llvm/Support/Signals.h>
     
    3129#include <kernels/kernel_builder.h>
    3230#include <toolchain/cpudriver.h>
    33 #include <llvm/Support/raw_ostream.h>
    34 #include <string>
    3531#include <iostream>
    3632namespace re { class CC; }
Note: See TracChangeset for help on using the changeset viewer.