Changeset 6145


Ignore:
Timestamp:
Aug 11, 2018, 9:19:42 PM (2 months ago)
Author:
xwa163
Message:
  1. LZ4 Grep: complete utf8 character classes for multiplexing pipeline
  2. Implement multiple streams version of S2P and P2S
Location:
icGREP/icgrep-devel/icgrep
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/untwist_kernel.cpp

    r6144 r6145  
    112112        }
    113113    }
    114 
    115     StreamCompareKernel::StreamCompareKernel(const std::unique_ptr<kernel::KernelBuilder> &b,
    116                                              unsigned int numberOfStream):
    117             BlockOrientedKernel("UntwistByPEXTKernel",
    118                                 {
    119                                         Binding{b->getStreamSetTy(numberOfStream, 1), "stream1", FixedRate(), Principal()},
    120                                         Binding{b->getStreamSetTy(numberOfStream, 1), "stream2", FixedRate()}
    121                                 },
    122                                 {
    123 //                                        Binding{b->getStreamSetTy(numberOfOutputStream, 1), "basisBits"}
    124                                 }, {}, {}, {}),mNumberOfStream(numberOfStream)
    125     {
    126 //        this->setStride(4 * 1024 * 1024);
    127         this->addScalar(b->getSizeTy(), "pos");
    128     }
    129 
    130     void StreamCompareKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> &b) {
    131         Value* s1 = b->loadInputStreamBlock("stream1", b->getSize(0));
    132         Value* s2 = b->loadInputStreamBlock("stream2", b->getSize(0));
    133 
    134         for (unsigned i = 0 ; i < 4; i++) {
    135             Value* v1 = b->CreateExtractElement(s1, i);
    136             Value* v2 = b->CreateExtractElement(s2, i);
    137             Value* shouldPrint = b->CreateICmpNE(v1, v2);
    138             b->CallPrintIntCond("---pos", b->getScalarField("pos"), shouldPrint);
    139 
    140 //            b->CallPrintIntCond("s1_available", b->getAvailableItemCount("stream1"), shouldPrint);
    141             b->CallPrintRegisterCond("s1", s1, shouldPrint);
    142             b->CallPrintRegisterCond("s2", s2, shouldPrint);
    143         }
    144         b->setScalarField("pos", b->CreateAdd(b->getScalarField("pos"), b->getSize(b->getBitBlockWidth())));
    145     };
    146114}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/untwist_kernel.h

    r6144 r6145  
    2828        void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) override;
    2929    };
    30 
    31 
    32     class StreamCompareKernel final : public BlockOrientedKernel{
    33     public:
    34         StreamCompareKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned numberOfStream = 1);
    35     protected:
    36         const unsigned mNumberOfStream;
    37         void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) override;
    38     };
    3930}
    4031
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r6135 r6145  
    6767}
    6868
     69
     70void P2SMultipleStreamsKernel::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> &b) {
     71    Value * p_bitblock[8];
     72
     73    unsigned iStreamIndex = 0;
     74    for (unsigned i = 0; i < mNumsOfStreams.size(); i++) {
     75        for (unsigned j = 0; j < mNumsOfStreams[i]; j++) {
     76            p_bitblock[iStreamIndex] = b->loadInputStreamBlock("basisBits_" + std::to_string(i), b->getInt32(j));
     77            iStreamIndex++;
     78        }
     79    }
     80    while (iStreamIndex < 8) {
     81        p_bitblock[iStreamIndex] = ConstantVector::getNullValue(b->getBitBlockType());
     82        iStreamIndex++;
     83    }
     84
     85    Value * s_bytepack[8];
     86    p2s(b, p_bitblock, s_bytepack, mBasisSetNumbering);
     87    for (unsigned j = 0; j < 8; ++j) {
     88        b->storeOutputStreamPack("byteStream", b->getInt32(0), b->getInt32(j), s_bytepack[j]);
     89    }
     90}
     91
     92
    6993inline Value * partial_sum_popcounts(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * popcounts) {
    7094    Value * summed_counts = popcounts;
     
    192216}
    193217
     218P2SMultipleStreamsKernel::P2SMultipleStreamsKernel(const std::unique_ptr<kernel::KernelBuilder> &b,
     219                                                   cc::BitNumbering basisNumbering,
     220                                                   std::vector<unsigned> numsOfStreams)
     221        : BlockOrientedKernel("p2sMultipleStreams" + cc::numberingSuffix(basisNumbering),
     222                              {/*Binding{b->getStreamSetTy(numOfStreams, 1), "basisBits"}*/},
     223                              {Binding{b->getStreamSetTy(1, 8), "byteStream"}},
     224                              {}, {}, {}),
     225          mBasisSetNumbering(basisNumbering),
     226          mNumsOfStreams(numsOfStreams) {
     227
     228    for (unsigned i = 0; i < numsOfStreams.size(); i++) {
     229        mStreamSetInputs.push_back(Binding{b->getStreamSetTy(numsOfStreams[i], 1), "basisBits_" + std::to_string(i)});
     230    }
     231}
     232
    194233P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering numbering)
    195234: BlockOrientedKernel("p2s_compress" + cc::numberingSuffix(numbering),
     
    219258}
    220259
    221 }
     260
     261}
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r6135 r6145  
    2222    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    2323    unsigned mNumOfStreams;
     24};
     25
     26
     27class P2SMultipleStreamsKernel final : public BlockOrientedKernel {
     28public:
     29    P2SMultipleStreamsKernel(
     30            const std::unique_ptr<kernel::KernelBuilder> & b,
     31            cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian,
     32            std::vector<unsigned> numsOfStreams = std::vector<unsigned>{8}
     33    );
     34private:
     35    cc::BitNumbering mBasisSetNumbering;
     36    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     37    std::vector<unsigned> mNumsOfStreams;
    2438};
    2539
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp

    r6135 r6145  
    176176    }
    177177}
    178    
     178
     179S2PMultipleStreamsKernel::S2PMultipleStreamsKernel(
     180        const std::unique_ptr<kernel::KernelBuilder> & b,
     181        cc::BitNumbering basisNumbering,
     182        bool aligned,
     183        std::vector<unsigned> numsOfStreams)
     184: MultiBlockKernel(aligned ? "s2pMultipleStreams" + cc::numberingSuffix(basisNumbering): "s2p_unaligned" + cc::numberingSuffix(basisNumbering),
     185                   {Binding{b->getStreamSetTy(1, 8), "byteStream", FixedRate(), Principal()}},
     186                   {}, {}, {}, {}),
     187  mBasisSetNumbering(basisNumbering),
     188  mAligned(aligned),
     189  mNumsOfStreams(numsOfStreams)
     190{
     191    for (unsigned i = 0; i < numsOfStreams.size(); i++) {
     192        mStreamSetOutputs.push_back(Binding{b->getStreamSetTy(numsOfStreams[i], 1), "basisBits_" + std::to_string(i)});
     193    }
     194}
     195
     196void S2PMultipleStreamsKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &kb,
     197                                                       llvm::Value *const numOfBlocks) {
     198    BasicBlock * entry = kb->GetInsertBlock();
     199    BasicBlock * processBlock = kb->CreateBasicBlock("processBlock");
     200    BasicBlock * s2pDone = kb->CreateBasicBlock("s2pDone");
     201    Constant * const ZERO = kb->getSize(0);
     202
     203    kb->CreateBr(processBlock);
     204
     205    kb->SetInsertPoint(processBlock);
     206    PHINode * blockOffsetPhi = kb->CreatePHI(kb->getSizeTy(), 2); // block offset from the base block, e.g. 0, 1, 2, ...
     207    blockOffsetPhi->addIncoming(ZERO, entry);
     208
     209    Value * bytepack[8];
     210    for (unsigned i = 0; i < 8; i++) {
     211        if (mAligned) {
     212            bytepack[i] = kb->loadInputStreamPack("byteStream", ZERO, kb->getInt32(i), blockOffsetPhi);
     213        } else {
     214            Value * ptr = kb->getInputStreamPackPtr("byteStream", ZERO, kb->getInt32(i), blockOffsetPhi);
     215            // CreateLoad defaults to aligned here, so we need to force the alignment to 1 byte.
     216            bytepack[i] = kb->CreateAlignedLoad(ptr, 1);
     217        }
     218    }
     219    Value * basisbits[8];
     220    s2p(kb, bytepack, basisbits, mBasisSetNumbering);
     221    unsigned iStreamIndex = 0;
     222    for (unsigned i = 0; i < mNumsOfStreams.size(); i++) {
     223        for (unsigned j = 0; j < mNumsOfStreams[i]; j++) {
     224            kb->storeOutputStreamBlock("basisBits_" + std::to_string(i), kb->getInt32(j), blockOffsetPhi, basisbits[iStreamIndex]);
     225            iStreamIndex++;
     226        }
     227    }
     228
     229    Value * nextBlk = kb->CreateAdd(blockOffsetPhi, kb->getSize(1));
     230    blockOffsetPhi->addIncoming(nextBlk, processBlock);
     231    Value * moreToDo = kb->CreateICmpNE(nextBlk, numOfBlocks);
     232    kb->CreateCondBr(moreToDo, processBlock, s2pDone);
     233    kb->SetInsertPoint(s2pDone);
     234}
     235
     236
    179237S2P_21Kernel::S2P_21Kernel(const std::unique_ptr<KernelBuilder> & b, cc::BitNumbering numbering)
    180238: MultiBlockKernel("s2p_21" + cc::numberingSuffix(numbering),
  • icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.h

    r6135 r6145  
    2929};
    3030
     31class S2PMultipleStreamsKernel final : public MultiBlockKernel {
     32public:
     33    S2PMultipleStreamsKernel(const std::unique_ptr<kernel::KernelBuilder> & b, cc::BitNumbering basisNumbering = cc::BitNumbering::LittleEndian, bool aligned = true, std::vector<unsigned> numsOfStreams = std::vector<unsigned>{8});
     34protected:
     35    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, llvm::Value * const numOfStrides) override;
     36private:
     37    cc::BitNumbering mBasisSetNumbering;
     38    bool mAligned;
     39    std::vector<unsigned> mNumsOfStreams;
     40};
     41
     42
    3143class S2P_21Kernel final : public MultiBlockKernel {
    3244public:
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.cpp

    r6144 r6145  
    6565}
    6666
    67 
    68 
    6967void LZ4GrepBaseGenerator::generateScanMatchGrepPipeline(re::RE* regex) {
    7068    auto & iBuilder = mPxDriver.getBuilder();
     
    9492}
    9593
    96 
    9794void LZ4GrepBaseGenerator::generateCountOnlyGrepPipeline(re::RE* regex, bool enableMultiplexing, bool utf8CC){
    9895    if (enableMultiplexing) {
     
    10299    }
    103100}
    104 
    105101
    106102void LZ4GrepBaseGenerator::initREs(re::RE * RE) {
     
    132128
    133129}
    134 
    135130
    136131parabix::StreamSetBuffer * LZ4GrepBaseGenerator::linefeedStreamFromUncompressedBits(
     
    144139}
    145140
    146 
    147 
    148 
    149 
    150 
    151141std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepBaseGenerator::multiplexingGrep(
    152142        re::RE *RE,
     
    249239        u8NoFinalStream = fakeStreams[1];
    250240    }
    251 
    252 
    253 
    254 
    255241
    256242    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
     
    508494                                          parabix::StreamSetBuffer *refStream, std::vector<unsigned> numOfStreams) {
    509495
     496    if (!numOfStreams.size()) {
     497        return std::vector<StreamSetBuffer *>();
     498    }
    510499    std::vector<StreamSetBuffer *> outputStreams;
    511500    for (unsigned i = 0; i < numOfStreams.size(); i++) {
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.h

    r6144 r6145  
    4040    virtual std::vector<parabix::StreamSetBuffer*> decompressBitStreams(parabix::StreamSetBuffer* compressedByteStream, std::vector<parabix::StreamSetBuffer*> compressedBitStreams);
    4141
     42    std::vector<parabix::StreamSetBuffer*> generateFakeStreams(
     43            const std::unique_ptr<kernel::KernelBuilder> & iBuilder,
     44            parabix::StreamSetBuffer* refStream,
     45            std::vector<unsigned> numOfStreams
     46    );
    4247
    4348private:
     
    7984    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
    8085
    81 
    82     std::vector<parabix::StreamSetBuffer*> generateFakeStreams(
    83             const std::unique_ptr<kernel::KernelBuilder> & iBuilder,
    84             parabix::StreamSetBuffer* refStream,
    85             std::vector<unsigned> numOfStreams
    86     );
    87 
    8886};
    8987
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_bytestream_generator.cpp

    r6144 r6145  
    11
     2#include <numeric>
    23#include "lz4_grep_bytestream_generator.h"
    34#include <kernels/lz4/twist_kernel.h>
     
    2223LZ4GrepByteStreamGenerator::decompressBitStream(parabix::StreamSetBuffer *compressedByteStream,
    2324                                                parabix::StreamSetBuffer *compressedBitStream) {
    24     return this->convertCompressedBitsStreamWithTwistApproach(compressedByteStream, compressedBitStream, "combined");
     25    return this->decompressBitStreams(compressedByteStream, {compressedBitStream})[0];
    2526}
    2627
    27 
    28 parabix::StreamSetBuffer * LZ4GrepByteStreamGenerator::convertCompressedBitsStreamWithTwistApproach(
    29         parabix::StreamSetBuffer *compressedByteStream,
    30         parabix::StreamSetBuffer *compressedBitStream,
    31         std::string prefix
    32 ) {
    33     auto & b = mPxDriver.getBuilder();
    34 
    35     LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
    36 
    37     unsigned numOfStreams = compressedBitStream->getNumOfStreams();
    38 
    39     if (numOfStreams == 1) {
    40 
    41         StreamSetBuffer* uncompressedBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 1),
    42                                                                                    this->getDefaultBufferBlocks(), 1);
    43         Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(b, 1);
    44         lz4I4AioK->setInitialArguments({mFileSize});
    45         mPxDriver.makeKernelCall(lz4I4AioK, {
    46                 compressedByteStream,
    47 
    48                 blockInfo.isCompress,
    49                 blockInfo.blockStart,
    50                 blockInfo.blockEnd,
    51 
    52                 compressedBitStream
    53 //                , uncompressedBitStream2
    54         }, {
    55                                          uncompressedBitStream
    56                                  });
    57 
    58 
    59 //        Kernel* streamCmp = mPxDriver.addKernelInstance<StreamCompareKernel>(b, 1);
    60 //        mPxDriver.makeKernelCall(streamCmp, {
    61 //                uncompressedBitStream,
    62 //                uncompressedBitStream2
    63 //        }, {});
    64 
    65         return uncompressedBitStream;
     28unsigned LZ4GrepByteStreamGenerator::calculateTwistWidth(unsigned numOfStreams) {
     29    if (numOfStreams <= 2) {
     30        return numOfStreams;
     31    } else if (numOfStreams <= 4) {
     32        return 4;
     33    } else if (numOfStreams <= 8) {
     34        return 8;
     35    } else {
     36        llvm::report_fatal_error("Twist: Unsupported numOfStreams " + std::to_string(numOfStreams));;
    6637    }
    67     if (numOfStreams <= 2) {
    68         StreamSetBuffer* twistedCharClasses = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 2),
    69                                                                                 this->getDefaultBufferBlocks(), 1);
    70         kernel::Kernel* twistK = mPxDriver.addKernelInstance<kernel::TwistMultipleByPDEPKernel>(b, std::vector<unsigned>{numOfStreams}, 2);
    71         mPxDriver.makeKernelCall(twistK, {compressedBitStream}, {twistedCharClasses});
    72 
    73 
    74         StreamSetBuffer* uncompressedTwistedCharClasses = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 2),
    75                                                                                             this->getDefaultBufferBlocks(), 1);
    76         Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(b, 2);
    77         lz4I4AioK->setInitialArguments({mFileSize});
    78         mPxDriver.makeKernelCall(lz4I4AioK, {
    79                 compressedByteStream,
    80 
    81                 blockInfo.isCompress,
    82                 blockInfo.blockStart,
    83                 blockInfo.blockEnd,
    84 
    85                 twistedCharClasses
    86         }, {
    87                                          uncompressedTwistedCharClasses
    88                                  });
    89 
    90         StreamSetBuffer* untwistedCharClasses = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(numOfStreams),
    91                                                                                   this->getDefaultBufferBlocks(), 1);
    92         kernel::Kernel* untwistK = mPxDriver.addKernelInstance<kernel::UntwistMultipleByPEXTKernel>(b, std::vector<unsigned>{numOfStreams}, 2);
    93         mPxDriver.makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses});
    94         return untwistedCharClasses;
    95     }
    96     if (numOfStreams <= 4) {
    97         StreamSetBuffer* twistedCharClasses = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 4),
    98                                                                                 this->getDefaultBufferBlocks(), 1);
    99         kernel::Kernel* twistK = mPxDriver.addKernelInstance<kernel::TwistMultipleByPDEPKernel>(b, std::vector<unsigned>{numOfStreams}, 4);
    100         mPxDriver.makeKernelCall(twistK, {compressedBitStream}, {twistedCharClasses});
    101 
    102 
    103         StreamSetBuffer* uncompressedTwistedCharClasses = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 4),
    104                                                                                             this->getDefaultBufferBlocks(), 1);
    105 
    106         Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(b, 4);
    107         lz4I4AioK->setInitialArguments({mFileSize});
    108         mPxDriver.makeKernelCall(lz4I4AioK, {
    109                 compressedByteStream,
    110 
    111                 blockInfo.isCompress,
    112                 blockInfo.blockStart,
    113                 blockInfo.blockEnd,
    114 
    115                 twistedCharClasses
    116         }, {
    117                                          uncompressedTwistedCharClasses
    118                                  });
    119 
    120         StreamSetBuffer* untwistedCharClasses = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(numOfStreams),
    121                                                                                   this->getDefaultBufferBlocks(), 1);
    122         kernel::Kernel* untwistK = mPxDriver.addKernelInstance<kernel::UntwistMultipleByPEXTKernel>(b, std::vector<unsigned>{numOfStreams}, 4);
    123         mPxDriver.makeKernelCall(untwistK, {uncompressedTwistedCharClasses}, {untwistedCharClasses});
    124         return untwistedCharClasses;
    125     }
    126 
    127     // <= 8
    128     StreamSetBuffer * const mtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8),
    129                                                                               this->getDefaultBufferBlocks());
    130     Kernel * p2sK = mPxDriver.addKernelInstance<P2SKernel>(b, cc::BitNumbering::BigEndian, prefix, numOfStreams);
    131     mPxDriver.makeKernelCall(p2sK, {compressedBitStream}, {mtxByteStream});
    132 
    133 
    134     StreamSetBuffer * const decompressionMtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8),
    135                                                                                            this->getDefaultBufferBlocks(), 1);
    136     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(b, true);
    137     lz4AioK->setInitialArguments({mFileSize});
    138     mPxDriver.makeKernelCall(
    139             lz4AioK,
    140             {
    141                     compressedByteStream,
    142                     // Block Data
    143                     blockInfo.isCompress,
    144                     blockInfo.blockStart,
    145                     blockInfo.blockEnd,
    146                     mtxByteStream
    147             }, {
    148                     decompressionMtxByteStream
    149             });
    150 
    151     StreamSetBuffer * const uncompressedMtxBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(numOfStreams),
    152                                                                                          this->getDefaultBufferBlocks());
    153 
    154     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(b, cc::BitNumbering::BigEndian, true, prefix, numOfStreams);
    155     mPxDriver.makeKernelCall(s2pk, {decompressionMtxByteStream}, {uncompressedMtxBitStream});
    156     return uncompressedMtxBitStream;
    15738}
    15839
     
    16041LZ4GrepByteStreamGenerator::decompressBitStreams(parabix::StreamSetBuffer *compressedByteStream,
    16142                                                 std::vector<parabix::StreamSetBuffer *> compressedBitStreams) {
     43    auto & b = mPxDriver.getBuilder();
     44
     45    std::vector<unsigned> numOfStreams(compressedBitStreams.size());
     46    std::transform(compressedBitStreams.begin(), compressedBitStreams.end(), numOfStreams.begin(), [](StreamSetBuffer* b){return b->getNumOfStreams();});
     47    unsigned totalStreamNum = std::accumulate(numOfStreams.begin(), numOfStreams.end(), 0u);
     48
     49    unsigned twistWidth = this->calculateTwistWidth(totalStreamNum);
     50    StreamSetBuffer* twistedStream = this->twist(b, compressedBitStreams, twistWidth);
     51
     52    LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
     53    StreamSetBuffer* uncompressedTwistedStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, twistWidth), this->getDefaultBufferBlocks(), 1);
     54    std::vector<StreamSetBuffer*> inputStreams = {
     55            compressedByteStream,
     56
     57            blockInfo.isCompress,
     58            blockInfo.blockStart,
     59            blockInfo.blockEnd,
     60
     61            twistedStream
     62    };
     63    std::vector<StreamSetBuffer*> outputStreams = {
     64            uncompressedTwistedStream
     65    };
     66
     67    if (twistWidth <= 4) {
     68        Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(b, twistWidth);
     69        lz4I4AioK->setInitialArguments({mFileSize});
     70        mPxDriver.makeKernelCall(lz4I4AioK, inputStreams, outputStreams);
     71
     72    } else {
     73        Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(b, true);
     74        lz4AioK->setInitialArguments({mFileSize});
     75        mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStreams);
     76    }
     77    return this->untwist(b, uncompressedTwistedStream, twistWidth, numOfStreams);
     78}
     79
     80parabix::StreamSetBuffer* LZ4GrepByteStreamGenerator::twist(const std::unique_ptr<kernel::KernelBuilder> &b,
     81                                                            std::vector<StreamSetBuffer*> inputStreams,
     82                                                            unsigned twistWidth
     83) {
     84    std::vector<unsigned> numsOfStreams(inputStreams.size());
     85    std::transform(inputStreams.begin(), inputStreams.end(), numsOfStreams.begin(), [](StreamSetBuffer* b){return b->getNumOfStreams();});
     86    unsigned totalNumOfStreams = std::accumulate(numsOfStreams.begin(), numsOfStreams.end(), 0u);
     87    assert(totalNumOfStreams <= twistWidth);
     88
     89    if (twistWidth == 1) {
     90        for (unsigned i = 0; i < inputStreams.size(); i++) {
     91            if (inputStreams[i]->getNumOfStreams() == 1) {
     92                return inputStreams[i];
     93            }
     94        }
     95    } else if (twistWidth == 2 || twistWidth == 4) {
     96        StreamSetBuffer* twistedCharClasses = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, twistWidth),
     97                                                                                this->getDefaultBufferBlocks(), 1);
     98        kernel::Kernel* twistK = mPxDriver.addKernelInstance<kernel::TwistMultipleByPDEPKernel>(b, numsOfStreams, twistWidth);
     99        mPxDriver.makeKernelCall(twistK, inputStreams, {twistedCharClasses});
     100        return twistedCharClasses;
     101    } else if (twistWidth == 8) {
     102        StreamSetBuffer * const mtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, twistWidth),
     103                                                                                  this->getDefaultBufferBlocks());
     104        Kernel * p2sK = mPxDriver.addKernelInstance<P2SMultipleStreamsKernel>(b, cc::BitNumbering::BigEndian, numsOfStreams);
     105        mPxDriver.makeKernelCall(p2sK, inputStreams, {mtxByteStream});
     106        return mtxByteStream;
     107    } else {
     108        llvm::report_fatal_error("Twist: Unsupported twistWidth " + std::to_string(twistWidth));;
     109    }
     110}
     111
     112std::vector<StreamSetBuffer*> LZ4GrepByteStreamGenerator::untwist(const std::unique_ptr<kernel::KernelBuilder> &b,
     113                                                              parabix::StreamSetBuffer *inputStream,
     114                                                              unsigned twistWidth,
     115                                                              std::vector<unsigned> numOfStreams
     116) {
     117    unsigned totalNumOfStreams = std::accumulate(numOfStreams.begin(), numOfStreams.end(), 0u);
     118    assert(totalNumOfStreams <= twistWidth);
     119    if (twistWidth == 1) {
     120        std::vector<unsigned> fakeStreamNums;
     121        for (unsigned i = 0; i < numOfStreams.size(); i++) {
     122            if (numOfStreams[i] == 0) {
     123                fakeStreamNums.push_back(0);
     124            }
     125        }
     126        auto fakeStreams = this->generateFakeStreams(b, inputStream, fakeStreamNums);
     127
     128        std::vector<StreamSetBuffer*> retBuffers;
     129        unsigned j = 0;
     130        for (unsigned i = 0; i < numOfStreams.size(); i++) {
     131            if (numOfStreams[i] == 0) {
     132                retBuffers.push_back(fakeStreams[j]);
     133                j++;
     134            } else {
     135                retBuffers.push_back(inputStream);
     136            }
     137        }
     138        return retBuffers;
     139    } else{
     140        std::vector<StreamSetBuffer*> retBuffers;
     141        for (unsigned i = 0; i < numOfStreams.size(); i++) {
     142            retBuffers.push_back(mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(numOfStreams[i]), this->getDefaultBufferBlocks(), 1));
     143        }
    162144
    163145
    164     auto & b = mPxDriver.getBuilder();
    165     LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
     146        if (twistWidth == 2 || twistWidth == 4) {
     147            kernel::Kernel* untwistK = mPxDriver.addKernelInstance<kernel::UntwistMultipleByPEXTKernel>(b, numOfStreams, twistWidth);
     148            mPxDriver.makeKernelCall(untwistK, {inputStream}, retBuffers);
     149            return retBuffers;
     150        } else if (twistWidth == 8) {
     151            Kernel * s2pk = mPxDriver.addKernelInstance<S2PMultipleStreamsKernel>(b, cc::BitNumbering::BigEndian, true, numOfStreams);
     152            mPxDriver.makeKernelCall(s2pk, {inputStream}, retBuffers);
     153            return retBuffers;
     154        } else {
     155            llvm::report_fatal_error("Twist: Unsupported twistWidth " + std::to_string(twistWidth));;
     156        }
     157    }
     158}
    166159
    167     unsigned totalStreamNum = 0;
    168     std::vector<unsigned> numOfStreams;
    169     std::vector<StreamSetBuffer*> retStreams;
    170     for (unsigned i = 0; i < compressedBitStreams.size(); i++) {
    171         unsigned n = compressedBitStreams[i]->getNumOfStreams();
    172160
    173         numOfStreams.push_back(n);
    174         totalStreamNum += n;
    175         retStreams.push_back(mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(n), this->getDefaultBufferBlocks(), 1));
    176     }
    177 
    178     if (totalStreamNum == 1) {
    179         // TODO
    180     } else if (totalStreamNum <= 4) {
    181         unsigned twistWidth = totalStreamNum == 2 ? 2 : 4;
    182 
    183         StreamSetBuffer* twistedStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, twistWidth),
    184                                                                                 this->getDefaultBufferBlocks(), 1);
    185         kernel::Kernel* twistK = mPxDriver.addKernelInstance<kernel::TwistMultipleByPDEPKernel>(b, numOfStreams, twistWidth);
    186         mPxDriver.makeKernelCall(twistK, compressedBitStreams, {twistedStream});
    187 
    188         StreamSetBuffer* uncompressedTwistedStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, twistWidth),
    189                                                                                             this->getDefaultBufferBlocks(), 1);
    190         Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(b, twistWidth);
    191         lz4I4AioK->setInitialArguments({mFileSize});
    192         mPxDriver.makeKernelCall(lz4I4AioK, {
    193                 compressedByteStream,
    194 
    195                 blockInfo.isCompress,
    196                 blockInfo.blockStart,
    197                 blockInfo.blockEnd,
    198 
    199                 twistedStream
    200         }, {
    201                 uncompressedTwistedStream
    202                                  });
    203 
    204         kernel::Kernel* untwistK = mPxDriver.addKernelInstance<kernel::UntwistMultipleByPEXTKernel>(b, numOfStreams, twistWidth);
    205         mPxDriver.makeKernelCall(untwistK, {uncompressedTwistedStream}, retStreams);
    206     } else {
    207         // TODO
    208     }
    209     return retStreams;
    210 }
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_bytestream_generator.h

    r6144 r6145  
    1212
    1313private:
    14     parabix::StreamSetBuffer *convertCompressedBitsStreamWithTwistApproach(
    15             parabix::StreamSetBuffer *compressedByteStream,
    16             parabix::StreamSetBuffer *compressedBitStream,
    17             std::string prefix
    18     );
    19 
    20 
     14    unsigned calculateTwistWidth(unsigned numOfStreams);
     15    parabix::StreamSetBuffer* twist(const std::unique_ptr<kernel::KernelBuilder> &b,
     16                                    std::vector<parabix::StreamSetBuffer*> inputStreams,
     17                                    unsigned twistWidth);
     18    std::vector<parabix::StreamSetBuffer*> untwist(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, parabix::StreamSetBuffer* inputStream, unsigned twistWidth, std::vector<unsigned> numOfStreams);
    2119};
    2220
Note: See TracChangeset for help on using the changeset viewer.