Ignore:
Timestamp:
Nov 2, 2018, 7:18:31 PM (9 months ago)
Author:
nmedfort
Message:

Initial version of PipelineKernel? + revised StreamSet? model.

Location:
icGREP/icgrep-devel/icgrep/lz4
Files:
14 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.cpp

    r6173 r6184  
    4747#include <re/re_seq.h>
    4848#include <kernels/kernel_builder.h>
     49#include <kernels/pipeline_builder.h>
    4950#include <re/re_alt.h>
    5051#include <kernels/lz4/decompression/lz4_bytestream_decompression.h>
     
    5455
    5556using namespace llvm;
    56 using namespace parabix;
    5757using namespace kernel;
    5858using namespace grep;
    5959using namespace re;
    60 
    61 LZ4GrepBaseGenerator::LZ4GrepBaseGenerator()
    62         : LZ4BaseGenerator(),
    63           u8NonFinalRe(makeAlt({makeByte(0xC2, 0xF4),
    64                                 makeSeq({makeByte(0xE0, 0xF4), makeByte(0x80, 0xBF)}),
    65                                 makeSeq({makeByte(0xF0, 0xF4), makeByte(0x80, 0xBF), makeByte(0x80, 0xBF)})})),
    66           u8FinalRe(makeCC(0x0, 0x1FFFFF))
    67 {
     60using namespace cc;
     61
     62using Alphabets = ICGrepKernel::Alphabets;
     63using Externals = ICGrepKernel::Externals;
     64
     65inline RE * makeNonFinal() {
     66    CC * const C2_F4 = makeByte(0xC2, 0xF4);
     67    CC * const E0_F4 = makeByte(0xE0, 0xF4);
     68    CC * const _80_F4 = makeByte(0x80, 0xBF);
     69    RE * E0_F4x80_F4 = makeSeq({E0_F4, _80_F4});
     70    CC * const F0_F4 = makeByte(0xE0, 0xF4);
     71    RE * F0_F4x80_F4x80_F4 = makeSeq({F0_F4, _80_F4, _80_F4});
     72    return makeAlt({C2_F4, E0_F4x80_F4, F0_F4x80_F4x80_F4});
     73}
     74
     75LZ4GrepBaseGenerator::LZ4GrepBaseGenerator(const FunctionType type)
     76: LZ4BaseGenerator()
     77, u8NonFinalRe(makeNonFinal())
     78, u8FinalRe(makeCC(0x0, 0x1FFFFF))
     79, mMainMethod(nullptr) {
    6880    mGrepRecordBreak = grep::GrepRecordBreakKind::LF;
    6981    mMoveMatchesToEOL = true;
    70 }
    71 
    72 void LZ4GrepBaseGenerator::generateScanMatchGrepPipeline(re::RE* regex, bool enableMultiplexing, bool utf8CC) {
     82    mPipeline = std::move(makeInternalPipeline(type));
     83}
     84
     85inline std::unique_ptr<kernel::PipelineBuilder> LZ4GrepBaseGenerator::makeInternalPipeline(const FunctionType type) {
     86    Bindings inputs;
     87    Bindings outputs;
     88
     89    auto & b = mPxDriver.getBuilder();
     90
     91    Type * const inputType = b->getInt8PtrTy();
     92    Type * const sizeTy = b->getSizeTy();
     93    Type * const boolTy = b->getIntNTy(sizeof(bool) * 8);
     94
     95    inputs.emplace_back(inputType, "input");
     96    inputs.emplace_back(sizeTy, "headerSize");
     97    inputs.emplace_back(sizeTy, "fileSize");
     98    inputs.emplace_back(boolTy, "hasBlockChecksum");
     99
     100    if (type == FunctionType::CountOnly) {
     101        outputs.emplace_back(sizeTy, "countResult");
     102    } else if (type == FunctionType::Match) {
     103        Type * const intAddrTy = b->getIntAddrTy();
     104        inputs.emplace_back(intAddrTy, "match_accumulator");
     105    }
     106
     107    return mPxDriver.makePipeline(inputs, outputs);
     108}
     109
     110void LZ4GrepBaseGenerator::generateScanMatchGrepPipeline(RE* regex, bool enableMultiplexing, bool utf8CC) {
    73111    if (enableMultiplexing) {
    74         this->generateMultiplexingScanMatchGrepPipeline(regex, utf8CC);
     112        generateMultiplexingScanMatchGrepPipeline(regex, utf8CC);
    75113    } else {
    76         this->generateFullyDecompressionScanMatchGrepPipeline(regex);
    77     }
    78 }
    79 
    80 void LZ4GrepBaseGenerator::generateCountOnlyGrepPipeline(re::RE* regex, bool enableMultiplexing, bool utf8CC){
     114        generateFullyDecompressionScanMatchGrepPipeline(regex);
     115    }
     116}
     117
     118void LZ4GrepBaseGenerator::generateCountOnlyGrepPipeline(RE* regex, bool enableMultiplexing, bool utf8CC){
    81119    if (enableMultiplexing) {
    82         this->generateMultiplexingCountOnlyGrepPipeline(regex, utf8CC);
     120        generateMultiplexingCountOnlyGrepPipeline(regex, utf8CC);
    83121    } else {
    84         this->generateFullyDecompressionCountOnlyGrepPipeline(regex);
    85     }
    86 }
    87 
    88 void LZ4GrepBaseGenerator::initREs(re::RE * RE) {
     122        generateFullyDecompressionCountOnlyGrepPipeline(regex);
     123    }
     124}
     125
     126void LZ4GrepBaseGenerator::initREs(RE * re) {
    89127    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
    90         mBreakCC = re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029)));
     128        mBreakCC = makeCC(makeCC(0x0A, 0x0D), makeCC(makeCC(0x85), makeCC(0x2028, 0x2029)));
    91129    } else if (mGrepRecordBreak == GrepRecordBreakKind::Null) {
    92         mBreakCC = re::makeByte(0);  // Null
     130        mBreakCC = makeByte(0);  // Null
    93131    } else {
    94         mBreakCC = re::makeByte(0x0A); // LF
    95     }
    96     re::RE * anchorRE = mBreakCC;
     132        mBreakCC = makeByte(0x0A); // LF
     133    }
     134    RE * anchorRE = mBreakCC;
    97135    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
    98         re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
    99         anchorName->setDefinition(re::makeUnicodeBreak());
     136        Name * anchorName = makeName("UTF8_LB", Name::Type::Unicode);
     137        anchorName->setDefinition(makeUnicodeBreak());
    100138        anchorRE = anchorName;
    101139    }
    102140
    103     mRE = RE;
     141    mRE = re;
    104142    bool allAnchored = true;
    105143
    106144    if (!hasEndAnchor(mRE)) allAnchored = false;
    107145    mRE = resolveModesAndExternalSymbols(mRE);
    108     mRE = re::exclude_CC(mRE, mBreakCC);
     146    mRE = exclude_CC(mRE, mBreakCC);
    109147    mRE = resolveAnchors(mRE, anchorRE);
    110     re::gatherUnicodeProperties(mRE, mUnicodeProperties);
     148    gatherUnicodeProperties(mRE, mUnicodeProperties);
    111149    mRE = regular_expression_passes(mRE);
    112150
     
    115153}
    116154
    117 parabix::StreamSetBuffer * LZ4GrepBaseGenerator::linefeedStreamFromUncompressedBits(
    118         parabix::StreamSetBuffer *uncompressedBasisBits) {
    119     auto & idb = mPxDriver.getBuilder();
    120     const unsigned baseBufferSize = this->getDefaultBufferBlocks();
    121     StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    122     kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()}, cc::BitNumbering::BigEndian);
    123     mPxDriver.makeKernelCall(linefeedK, {uncompressedBasisBits}, {LineFeedStream});
     155StreamSet * LZ4GrepBaseGenerator::linefeedStreamFromUncompressedBits(StreamSet *uncompressedBasisBits) {
     156    StreamSet * const LineFeedStream = mPipeline->CreateStreamSet(1, 1);
     157    mPipeline->CreateKernelCall<LineFeedKernelBuilder>(uncompressedBasisBits, LineFeedStream, BitNumbering::BigEndian);
    124158    return LineFeedStream;
    125159}
     
    137171}
    138172
    139 std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepBaseGenerator::multiplexingGrep(
    140         re::RE *RE,
    141         parabix::StreamSetBuffer *compressedByteStream,
    142         parabix::StreamSetBuffer *compressedBitStream,
    143         bool utf8CC
    144 ) {
    145 
    146     this->initREs(RE);
    147     auto mGrepDriver = &mPxDriver;
    148 
    149     auto & idb = mGrepDriver->getBuilder();
    150     const unsigned baseBufferSize = this->getDefaultBufferBlocks();
    151     int MaxCountFlag = 0;
     173std::pair<StreamSet *, StreamSet *> LZ4GrepBaseGenerator::multiplexingGrep(RE * re, StreamSet * compressedByteStream, StreamSet * compressedBitStream, bool utf8CC) {
     174
     175    initREs(re);
    152176
    153177    //  Regular Expression Processing and Analysis Phase
    154     const auto nREs = 1;
    155 
    156     std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    157 
    158 
    159     std::map<std::string, StreamSetBuffer *> propertyStream;
    160 
    161     std::vector<std::string> externalStreamNames;
    162     std::set<re::Name *> UnicodeProperties;
    163 
    164     StreamSetBuffer* fakeMatchCopiedBits = nullptr;
    165     StreamSetBuffer* u8NoFinalStream = nullptr;
    166     StreamSetBuffer * uncompressedCharClasses = nullptr;
    167 
    168     re::CC* linefeedCC = nullptr;
    169 
     178
     179    StreamSet * fakeMatchCopiedBits = nullptr;
     180    StreamSet * u8NoFinalStream = nullptr;
     181    StreamSet * uncompressedCharClasses = nullptr;
     182
     183    CC * const linefeedCC = makeCC(0x0A);
     184
     185    std::shared_ptr<MultiplexedAlphabet> mpx;
    170186
    171187    if (utf8CC) {
    172188
    173         re::Seq* seq = re::makeSeq();
    174         re::RE* targetRe = mRE;
    175 
    176         bool requireNonFinal = re::isRequireNonFinal(mRE);
    177         linefeedCC = re::makeCC(0x0A);
    178         seq->push_back(targetRe);
    179         seq->push_back(std::move(linefeedCC));
    180 
    181 
    182 
    183         std::vector<re::CC*> UnicodeSets = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));;
    184         mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
    185         std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    186         auto numOfCharacterClasses = mpx_basis.size();
     189        const auto requireNonFinal = isRequireNonFinal(mRE);
     190        Seq * const seq = cast<Seq>(makeSeq({mRE, linefeedCC}));
     191
     192        auto UnicodeSets = collectCCs(seq, Unicode, std::set<Name *>({makeZeroWidth("\\b{g}")}));;
     193
     194        mpx = std::make_shared<MultiplexedAlphabet>("mpx", UnicodeSets);
     195
     196        auto mpxCCs = mpx->getMultiplexedCCs();
     197
    187198        bool mpxContainFinal = false;
    188         {
    189             seq->push_back(u8FinalRe);
    190             std::vector<re::CC*> UnicodeSetsWithU8Final = re::collectCCs(seq, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));;
    191             auto u8FinalMpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSetsWithU8Final);
    192             if (calculateTwistWidth(numOfCharacterClasses + 1) > calculateTwistWidth(u8FinalMpx->getMultiplexedCCs().size())) {
    193                 mpxContainFinal = true;
    194                 UnicodeSets = UnicodeSetsWithU8Final;
    195                 mpx = std::move(u8FinalMpx);
    196                 mpx_basis = mpx->getMultiplexedCCs();
    197                 numOfCharacterClasses = mpx_basis.size();
    198             }
    199 
     199
     200        seq->push_back(u8FinalRe);
     201
     202        auto UnicodeSetsWithU8Final = collectCCs(seq, Unicode, std::set<Name *>({makeZeroWidth("\\b{g}")}));;
     203        auto u8FinalMpx = std::make_shared<MultiplexedAlphabet>("mpx", UnicodeSetsWithU8Final);
     204        auto mpxCCsWithU8Final = u8FinalMpx->getMultiplexedCCs();
     205
     206        if (calculateTwistWidth(mpxCCs.size() + 1) > calculateTwistWidth(mpxCCsWithU8Final.size())) {
     207            mpxContainFinal = true;
     208            UnicodeSets = UnicodeSetsWithU8Final;
     209            mpx = u8FinalMpx;
     210            mpxCCs = mpxCCsWithU8Final;
    200211        }
    201         mRE = transformCCs(mpx.get(), targetRe);
    202 
    203 
    204 //        llvm::errs() << "numOfUnicodeSet:" << UnicodeSets.size() << "\n";
    205 //        llvm::errs() << "numOfCharacterClasses:" << numOfCharacterClasses << "\n";
    206         StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize, 1);
    207 
    208         kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
    209         mGrepDriver->makeKernelCall(ccK, {compressedBitStream}, {CharClasses});
     212
     213        mRE = transformCCs(mpx, mRE);
     214
     215        StreamSet * CharClasses = mPipeline->CreateStreamSet(mpxCCs.size());
     216
     217        mPipeline->CreateKernelCall<CharClassesKernel>(std::move(mpxCCs), compressedBitStream, CharClasses, BitNumbering::BigEndian);
    210218
    211219        if (!requireNonFinal) {
    212220            // We do not need to decompress U8 NonFinal Stream is all of the character class in target regular expression is byte length
    213             uncompressedCharClasses = this->decompressBitStream(compressedByteStream, CharClasses);
    214             auto fakeStreams = this->generateFakeStreams(idb, uncompressedCharClasses, std::vector<unsigned>{8, 1});
     221            uncompressedCharClasses = decompressBitStream(compressedByteStream, CharClasses);
     222            auto fakeStreams = generateFakeStreams(uncompressedCharClasses, std::vector<unsigned>{8, 1});
    215223            fakeMatchCopiedBits = fakeStreams[0];
    216224            u8NoFinalStream = fakeStreams[1];
    217225        } else {
    218226            if (mpxContainFinal) {
    219                 auto decompressedStreams = this->decompressBitStreams(compressedByteStream, {CharClasses/*, compressedNonFinalStream*/});
     227                auto decompressedStreams = decompressBitStreams(compressedByteStream, {CharClasses/*, compressedNonFinalStream*/});
    220228                uncompressedCharClasses = decompressedStreams[0];
    221 
    222                 auto fakeStreams = this->generateFakeStreams(idb, uncompressedCharClasses, std::vector<unsigned>{8});
     229                auto fakeStreams = generateFakeStreams(uncompressedCharClasses, std::vector<unsigned>{8});
    223230                fakeMatchCopiedBits = fakeStreams[0];
    224 
    225                 StreamSetBuffer * u8FinalStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getDefaultBufferBlocks(), 1);
    226 
    227                 re::RE* mpxU8FinalRe = transformCCs(mpx.get(), u8FinalRe);
    228 
    229                 ICGrepKernel * u8FinalGrepK = (ICGrepKernel *)mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mpxU8FinalRe, externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
    230                 u8FinalGrepK->setCachable(false);
    231                 mGrepDriver->makeKernelCall(u8FinalGrepK, {fakeMatchCopiedBits, uncompressedCharClasses}, {u8FinalStream});
    232 
    233                 u8NoFinalStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getDefaultBufferBlocks(), 1);
    234                 Kernel* notK = mGrepDriver->addKernelInstance<LZ4NotKernel>(idb);
    235                 mGrepDriver->makeKernelCall(notK, {u8FinalStream}, {u8NoFinalStream});
     231                StreamSet * u8FinalStream = mPipeline->CreateStreamSet();
     232                RE * const mpxU8FinalRe = transformCCs(mpx, u8FinalRe);
     233                Alphabets alpha;
     234                alpha.emplace_back(mpx, uncompressedCharClasses);
     235                mPipeline->CreateKernelCall<ICGrepKernel>(mpxU8FinalRe, fakeMatchCopiedBits, uncompressedCharClasses, Externals{}, alpha, BitNumbering::BigEndian, false);
     236                u8NoFinalStream = mPipeline->CreateStreamSet(1, 1);
     237                mPipeline->CreateKernelCall<LZ4NotKernel>(u8FinalStream, u8NoFinalStream);
    236238            } else {
    237                 StreamSetBuffer* compressedNonFinalStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    238                 kernel::Kernel * nonFinalK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, u8NonFinalRe, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
    239                 mGrepDriver->makeKernelCall(nonFinalK, {compressedBitStream}, {compressedNonFinalStream});
    240 
    241                 auto decompressedStreams = this->decompressBitStreams(compressedByteStream, {CharClasses, compressedNonFinalStream});
     239                StreamSet * compressedNonFinalStream = mPipeline->CreateStreamSet(1, 1);
     240                mPipeline->CreateKernelCall<ICGrepKernel>(u8NonFinalRe, compressedBitStream, compressedNonFinalStream, Externals{}, Alphabets{}, BitNumbering::BigEndian);
     241                auto decompressedStreams = decompressBitStreams(compressedByteStream, {CharClasses, compressedNonFinalStream});
    242242                uncompressedCharClasses = decompressedStreams[0];
    243243                u8NoFinalStream = decompressedStreams[1];
    244 
    245                 auto fakeStreams = this->generateFakeStreams(idb, uncompressedCharClasses, std::vector<unsigned>{8});
     244                auto fakeStreams = generateFakeStreams(uncompressedCharClasses, std::vector<unsigned>{8});
    246245                fakeMatchCopiedBits = fakeStreams[0];
    247246            }
    248247        }
    249     } else {
    250         re::Seq* seq = re::makeSeq();
    251         re::RE* targetRe = mRE;
    252         targetRe = re::toUTF8(targetRe, true);
    253 
    254         linefeedCC = re::makeByte(0x0A);
    255 
    256         seq->push_back(targetRe);
    257         seq->push_back(std::move(linefeedCC));
    258 
    259         std::vector<re::CC*> UnicodeSets = re::collectCCs(seq, &cc::Byte, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    260 
    261         mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
    262         mRE = transformCCs(mpx.get(), targetRe);
    263 
    264         std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    265         auto numOfCharacterClasses = mpx_basis.size();
    266 //        llvm::errs() << "numOfUnicodeSet:" << UnicodeSets.size() << "\n";
    267 //        llvm::errs() << "numOfCharacterClasses:" << numOfCharacterClasses << "\n";
    268         StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize, 1);
    269 
    270         kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::ByteClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
    271         mGrepDriver->makeKernelCall(ccK, {compressedBitStream}, {CharClasses});
    272 
    273         uncompressedCharClasses = this->decompressBitStream(compressedByteStream, CharClasses);
    274         auto fakeStreams = this->generateFakeStreams(idb, uncompressedCharClasses, std::vector<unsigned>{8, 1});
     248
     249    } else { // if (!utf8CC) {
     250
     251        RE * const targetRe = toUTF8(mRE, true);
     252        Seq * const seq = cast<Seq>(makeSeq({targetRe, linefeedCC}));
     253        auto UnicodeSets = collectCCs(seq, Byte, std::set<Name *>({makeZeroWidth("\\b{g}")}));
     254
     255        mpx = std::make_shared<MultiplexedAlphabet>("mpx", UnicodeSets);
     256
     257        mRE = transformCCs(mpx, targetRe);
     258
     259        auto mpx_basis = mpx->getMultiplexedCCs();
     260        StreamSet * const CharClasses = mPipeline->CreateStreamSet(mpx_basis.size());
     261
     262        mPipeline->CreateKernelCall<ByteClassesKernel>(std::move(mpx_basis), compressedBitStream, CharClasses, BitNumbering::BigEndian);
     263
     264        uncompressedCharClasses = decompressBitStream(compressedByteStream, CharClasses);
     265        auto fakeStreams = generateFakeStreams(uncompressedCharClasses, std::vector<unsigned>{8, 1});
    275266        fakeMatchCopiedBits = fakeStreams[0];
    276267        u8NoFinalStream = fakeStreams[1];
    277268    }
    278269
    279     StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
     270    StreamSet * const MatchResults = mPipeline->CreateStreamSet(1, 1);
    280271
    281272    // Multiplexing Grep Kernel is not Cachable, since it is possible that two REs with name "mpx_1" have different alphabets
    282     StreamSetBuffer * LineBreakStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), this->getDefaultBufferBlocks(), 1);
    283     ICGrepKernel * lineFeedGrepK = (ICGrepKernel *)mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, transformCCs(mpx.get(), linefeedCC), externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
    284     lineFeedGrepK->setCachable(false);
    285     mGrepDriver->makeKernelCall(lineFeedGrepK, {fakeMatchCopiedBits, uncompressedCharClasses}, {LineBreakStream});
    286 
    287 
    288     externalStreamNames.push_back("UTF8_nonfinal");
    289 
    290     ICGrepKernel * icgrepK = (ICGrepKernel *)mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mRE, externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
    291     icgrepK->setCachable(false);
    292     mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, u8NoFinalStream, uncompressedCharClasses}, {MatchResults});
    293     MatchResultsBufs[0] = MatchResults;
    294 
    295     StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    296 
    297     StreamSetBuffer * Matches = MergedResults;
     273    StreamSet * LineBreakStream = mPipeline->CreateStreamSet(1, 1);
     274
     275    RE * const transformedCC = transformCCs(mpx, linefeedCC);
     276
     277    Alphabets alpha;
     278    alpha.emplace_back(mpx, uncompressedCharClasses);
     279
     280    mPipeline->CreateKernelCall<ICGrepKernel>(transformedCC, fakeMatchCopiedBits, LineBreakStream, Externals{}, alpha, BitNumbering::BigEndian, false);
     281
     282    Externals externals;
     283    externals.emplace_back("UTF8_nonfinal", u8NoFinalStream);
     284
     285    mPipeline->CreateKernelCall<ICGrepKernel>(mRE, fakeMatchCopiedBits, MatchResults, externals, alpha, BitNumbering::BigEndian, false);
     286
     287    StreamSet * Matches = MatchResults;
    298288    if (mMoveMatchesToEOL) {
    299         StreamSetBuffer * OriginalMatches = Matches;
    300         kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    301         Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    302         mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    303     }
    304 
    305     if (MaxCountFlag > 0) {
    306         kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
    307         untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
    308         StreamSetBuffer * const AllMatches = Matches;
    309         Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    310         mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
    311     }
    312 
    313     return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
    314 };
    315 
    316 std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepBaseGenerator::grep(
    317         re::RE *RE, parabix::StreamSetBuffer *byteStream, parabix::StreamSetBuffer *uncompressedBasisBits, bool ccMultiplexing) {
    318 
    319     this->initREs(RE);
    320     auto mGrepDriver = &mPxDriver;
    321 
    322     auto & idb = mGrepDriver->getBuilder();
    323     // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
    324     const unsigned baseBufferSize = this->getDefaultBufferBlocks();
    325     int MaxCountFlag = 0;
     289        StreamSet * const MovedMatches = mPipeline->CreateStreamSet();
     290        mPipeline->CreateKernelCall<MatchedLinesKernel>(Matches, LineBreakStream, MovedMatches);
     291        Matches = MovedMatches;
     292    }
     293
     294    return std::pair<StreamSet *, StreamSet *>(LineBreakStream, Matches);
     295}
     296
     297std::pair<StreamSet *, StreamSet *> LZ4GrepBaseGenerator::grep(RE * re, StreamSet * byteStream, StreamSet * uncompressedBasisBits, bool ccMultiplexing) {
     298
     299    initREs(re);
    326300
    327301    //  Regular Expression Processing and Analysis Phase
    328     const auto nREs = 1;
    329 
    330     std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    331 
    332     StreamSetBuffer * LineBreakStream = nullptr;
    333 
    334 
    335     std::map<std::string, StreamSetBuffer *> propertyStream;
    336 
    337     for(unsigned i = 0; i < nREs; ++i) {
    338 
    339         if (ccMultiplexing) {
    340 
    341             if (uncompressedBasisBits == nullptr) {
    342                 uncompressedBasisBits = this->s2p(byteStream);
    343             }
    344             this->linefeedStreamFromUncompressedBits(uncompressedBasisBits);
    345             std::vector<std::string> externalStreamNames;
    346             std::vector<StreamSetBuffer *> icgrepInputSets = {uncompressedBasisBits};
    347 
    348             const auto UnicodeSets = re::collectCCs(mRE, &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    349             StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    350 
    351             std::unique_ptr<cc::MultiplexedAlphabet> mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
    352             mRE = transformCCs(mpx.get(), mRE);
    353             std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    354             auto numOfCharacterClasses = mpx_basis.size();
    355             StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize, 1);
    356             kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis), false, cc::BitNumbering::BigEndian);
    357             mGrepDriver->makeKernelCall(ccK, {uncompressedBasisBits}, {CharClasses});
    358 
    359             kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mRE, externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()}, cc::BitNumbering::BigEndian);
    360             icgrepInputSets.push_back(CharClasses);
    361             mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    362             MatchResultsBufs[i] = MatchResults;
    363         } else {
    364 
    365             bool anyGCB = hasGraphemeClusterBoundary(mRE);
    366             bool isSimple = (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
    367             if (isSimple) {
    368                 mRE = toUTF8(mRE);
    369             }
    370             const unsigned ByteCClimit = 6;
    371 
    372             if (false && byteTestsWithinLimit(mRE, ByteCClimit)) {
    373                 LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    374                 kernel::Kernel * breakK = mGrepDriver->addKernelInstance<kernel::DirectCharacterClassKernelBuilder>(idb, "breakCC", std::vector<re::CC *>{mBreakCC});
    375                 mGrepDriver->makeKernelCall(breakK, {byteStream}, {LineBreakStream});
    376 
    377                 std::vector<std::string> externalStreamNames;
    378                 std::vector<StreamSetBuffer *> icgrepInputSets = {byteStream};
    379                 StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    380                 kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ByteGrepKernel>(idb, mRE, externalStreamNames);
    381                 mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    382                 MatchResultsBufs[i] = MatchResults;
    383 
    384             } else {
    385 
    386                 if (uncompressedBasisBits == nullptr) {
    387                     uncompressedBasisBits = this->s2p(byteStream);
    388                 }
    389                 LineBreakStream = this->linefeedStreamFromUncompressedBits(uncompressedBasisBits);
    390                 std::vector<std::string> externalStreamNames;
    391                 std::vector<StreamSetBuffer *> icgrepInputSets = {uncompressedBasisBits};
    392 
    393                 std::set<re::Name *> UnicodeProperties;
    394                 StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    395                 kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mRE, externalStreamNames, std::vector<cc::Alphabet *>(), cc::BitNumbering::BigEndian);
    396                 mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    397                 MatchResultsBufs[i] = MatchResults;
    398             }
     302    StreamSet * const MatchResults = mPipeline->CreateStreamSet(1, 1);
     303
     304    if (uncompressedBasisBits == nullptr) {
     305        uncompressedBasisBits = s2p(byteStream);
     306    }
     307
     308    StreamSet * const LineBreakStream = linefeedStreamFromUncompressedBits(uncompressedBasisBits);
     309
     310    if (ccMultiplexing) {
     311
     312        const auto UnicodeSets = collectCCs(mRE, Unicode, std::set<Name *>({makeZeroWidth("\\b{g}")}));
     313
     314        auto mpx = std::make_shared<MultiplexedAlphabet>("mpx", UnicodeSets);
     315        mRE = transformCCs(mpx, mRE);
     316        auto mpx_basis = mpx->getMultiplexedCCs();
     317        StreamSet * const CharClasses = mPipeline->CreateStreamSet(mpx_basis.size());
     318        mPipeline->CreateKernelCall<CharClassesKernel>(std::move(mpx_basis), uncompressedBasisBits, CharClasses, BitNumbering::BigEndian);
     319
     320        Alphabets alphabets;
     321        alphabets.emplace_back(std::move(mpx), CharClasses);
     322        mPipeline->CreateKernelCall<ICGrepKernel>(mRE, uncompressedBasisBits, MatchResults, Externals{}, alphabets, BitNumbering::BigEndian, false);
     323
     324
     325    } else {
     326
     327        bool anyGCB = hasGraphemeClusterBoundary(mRE);
     328        bool isSimple = (mGrepRecordBreak != GrepRecordBreakKind::Unicode) && (!anyGCB);
     329        if (isSimple) {
     330            mRE = toUTF8(mRE);
    399331        }
    400     }
    401 
    402     StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    403 
    404     StreamSetBuffer * Matches = MergedResults;
     332        mPipeline->CreateKernelCall<ICGrepKernel>(mRE, uncompressedBasisBits, MatchResults, Externals{}, Alphabets{}, BitNumbering::BigEndian);
     333    }
     334
     335    StreamSet * Matches = MatchResults;
    405336    if (mMoveMatchesToEOL) {
    406         StreamSetBuffer * OriginalMatches = Matches;
    407         kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    408         Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    409         mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    410     }
    411 
    412     if (MaxCountFlag > 0) {
    413         kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
    414         untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
    415         StreamSetBuffer * const AllMatches = Matches;
    416         Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize, 1);
    417         mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
    418     }
    419 
    420     return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
     337        StreamSet * const MovedMatches = mPipeline->CreateStreamSet();
     338        mPipeline->CreateKernelCall<MatchedLinesKernel>(Matches, LineBreakStream, MovedMatches);
     339        Matches = MovedMatches;
     340    }
     341
     342    return std::pair<StreamSet *, StreamSet *>(LineBreakStream, Matches);
    421343
    422344}
    423345
    424346void LZ4GrepBaseGenerator::invokeScanMatchGrep(char* fileBuffer, size_t blockStart, size_t blockEnd, bool hasBlockChecksum) {
    425     auto main = this->getScanMatchGrepMainFunction();
     347    auto main = getScanMatchGrepMainFunction();
    426348    std::ostringstream s;
    427349    EmitMatch accum("", false, false, s);
     
    432354
    433355
    434 void LZ4GrepBaseGenerator::generateFullyDecompressionScanMatchGrepPipeline(re::RE *regex) {
    435     auto & iBuilder = mPxDriver.getBuilder();
    436     this->generateScanMatchMainFunc(iBuilder);
    437 
    438     StreamSetBuffer* compressedByteStream = this->loadByteStream();
    439 
    440     StreamSetBuffer * const uncompressedByteStream = this->byteStreamDecompression(compressedByteStream);
    441     StreamSetBuffer * uncompressedBitStream = this->s2p(uncompressedByteStream);
    442 
    443     StreamSetBuffer * LineBreakStream;
    444     StreamSetBuffer * Matches;
     356void LZ4GrepBaseGenerator::generateFullyDecompressionScanMatchGrepPipeline(RE *regex) {
     357    StreamSet* compressedByteStream = loadByteStream();
     358
     359    StreamSet * const uncompressedByteStream = byteStreamDecompression(compressedByteStream);
     360    StreamSet * uncompressedBitStream = s2p(uncompressedByteStream);
     361
     362    StreamSet * LineBreakStream;
     363    StreamSet * Matches;
    445364    std::tie(LineBreakStream, Matches) = grep(regex, uncompressedByteStream, uncompressedBitStream);
    446365
    447     kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
    448     scanMatchK->setInitialArguments({match_accumulator});
    449     mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, uncompressedByteStream}, {});
    450     mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
    451     mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
    452 
    453     mPxDriver.generatePipelineIR();
    454     mPxDriver.deallocateBuffers();
    455 
    456     iBuilder->CreateRetVoid();
    457 
    458     mPxDriver.finalizeObject();
    459 }
    460 
    461 void LZ4GrepBaseGenerator::generateMultiplexingScanMatchGrepPipeline(re::RE *regex, bool utf8CC) {
    462     auto & iBuilder = mPxDriver.getBuilder();
    463     this->generateScanMatchMainFunc(iBuilder);
    464 
    465     StreamSetBuffer *compressedByteStream = nullptr, *compressedBasisBits = nullptr;
    466     std::tie(compressedByteStream, compressedBasisBits) = this->loadByteStreamAndBitStream();
    467 
    468     StreamSetBuffer * LineBreakStream;
    469     StreamSetBuffer * Matches;
     366
     367    Kernel * scanMatchK = mPipeline->CreateKernelCall<ScanMatchKernel>(Matches, LineBreakStream, uncompressedByteStream, match_accumulator);
     368    mPxDriver.LinkFunction(scanMatchK, "accumulate_match_wrapper", accumulate_match_wrapper);
     369    mPxDriver.LinkFunction(scanMatchK, "finalize_match_wrapper", finalize_match_wrapper);
     370
     371    mMainMethod = mPipeline->compile();
     372
     373}
     374
     375void LZ4GrepBaseGenerator::generateMultiplexingScanMatchGrepPipeline(RE *regex, bool utf8CC) {
     376
     377    StreamSet *compressedByteStream = nullptr, *compressedBasisBits = nullptr;
     378    std::tie(compressedByteStream, compressedBasisBits) = loadByteStreamAndBitStream();
     379
     380    StreamSet * LineBreakStream;
     381    StreamSet * Matches;
    470382    std::tie(LineBreakStream, Matches) = multiplexingGrep(regex, compressedByteStream, compressedBasisBits, utf8CC);
    471383
    472 //    Kernel* matchDetector = mPxDriver.addKernelInstance<LZ4MatchDetectorKernel>(iBuilder);
    473 //    StreamSetBuffer* hasMatch = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8),
    474 //                                                                  this->getDefaultBufferBlocks(), 1);
    475 //    mPxDriver.makeKernelCall(matchDetector, {Matches, LineBreakStream}, {hasMatch});
    476 
    477 
    478     LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
    479 
    480     StreamSetBuffer *const uncompressedByteStream =
    481             mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8),
    482                                               this->getDefaultBufferBlocks(), 1);
    483     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(iBuilder, false, 4 * 1024 * 1024, true);
    484     lz4AioK->setInitialArguments({mFileSize});
    485     mPxDriver.makeKernelCall(
    486             lz4AioK,
    487             {
    488                     compressedByteStream,
    489 
    490                     // Block Data
    491                     blockInfo.isCompress,
    492                     blockInfo.blockStart,
    493                     blockInfo.blockEnd,
    494                     Matches
    495             }, {
    496                     uncompressedByteStream
    497             });
    498 
    499 
    500     kernel::Kernel * scanMatchK = mPxDriver.addKernelInstance<kernel::ScanMatchKernel>(iBuilder);
    501     scanMatchK->setInitialArguments({match_accumulator});
    502     mPxDriver.makeKernelCall(scanMatchK, {Matches, LineBreakStream, uncompressedByteStream}, {});
    503     mPxDriver.LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
    504     mPxDriver.LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
    505 
    506 
    507     mPxDriver.generatePipelineIR();
    508 
    509     mPxDriver.deallocateBuffers();
    510     iBuilder->CreateRetVoid();
    511     mPxDriver.finalizeObject();
    512 }
    513 
    514 
    515 void LZ4GrepBaseGenerator::generateMultiplexingCountOnlyGrepPipeline(re::RE *regex, bool utf8CC) {
    516     auto & iBuilder = mPxDriver.getBuilder();
    517     this->generateCountOnlyMainFunc(iBuilder);
    518 
    519     StreamSetBuffer *compressedByteStream = nullptr, *compressedBasisBits = nullptr;
    520     std::tie(compressedByteStream, compressedBasisBits) = this->loadByteStreamAndBitStream();
    521 
    522     StreamSetBuffer * LineBreakStream;
    523     StreamSetBuffer * Matches;
    524     std::tie(LineBreakStream, Matches) = multiplexingGrep(regex, compressedByteStream, compressedBasisBits, utf8CC);
    525 
    526     kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
    527     mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
    528     mPxDriver.generatePipelineIR();
    529 
    530     iBuilder->setKernel(matchCountK);
    531     Value * matchedLineCount = iBuilder->getAccumulator("countResult");
    532     matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
    533 
    534     mPxDriver.deallocateBuffers();
    535 
    536     iBuilder->CreateRet(matchedLineCount);
    537 
    538     mPxDriver.finalizeObject();
    539 }
    540 
    541 
    542 void LZ4GrepBaseGenerator::generateFullyDecompressionCountOnlyGrepPipeline(re::RE *regex) {
    543     auto & iBuilder = mPxDriver.getBuilder();
    544     this->generateCountOnlyMainFunc(iBuilder);
    545 
    546     StreamSetBuffer * const uncompressedByteStream = this->generateUncompressedByteStream();
    547 //    StreamSetBuffer * const uncompressedBitStream = this->generateUncompressedBitStreams();
    548 
    549     StreamSetBuffer * LineBreakStream;
    550     StreamSetBuffer * Matches;
    551 
     384    LZ4BlockInfo blockInfo = getBlockInfo(compressedByteStream);
     385
     386    StreamSet * const decompressionByteStream = mPipeline->CreateStreamSet(1, 8);
     387    mPipeline->CreateKernelCall<LZ4ByteStreamDecompressionKernel>(mFileSize, compressedByteStream, blockInfo, nullptr, decompressionByteStream );
     388    Kernel * const scanMatchK = mPipeline->CreateKernelCall<ScanMatchKernel>(Matches, LineBreakStream, decompressionByteStream, match_accumulator);
     389    mPxDriver.LinkFunction(scanMatchK, "accumulate_match_wrapper", accumulate_match_wrapper);
     390    mPxDriver.LinkFunction(scanMatchK, "finalize_match_wrapper", finalize_match_wrapper);
     391    mMainMethod = mPipeline->compile();
     392}
     393
     394
     395void LZ4GrepBaseGenerator::generateMultiplexingCountOnlyGrepPipeline(RE *regex, bool utf8CC) {
     396    StreamSet *compressedByteStream = nullptr, *compressedBasisBits = nullptr;
     397    std::tie(compressedByteStream, compressedBasisBits) = loadByteStreamAndBitStream();
     398    StreamSet * Matches = multiplexingGrep(regex, compressedByteStream, compressedBasisBits, utf8CC).second;
     399    mPipeline->CreateKernelCall<PopcountKernel>(Matches, mPipeline->getOutputScalar("countResult"));
     400    mMainMethod = mPipeline->compile();
     401}
     402
     403
     404void LZ4GrepBaseGenerator::generateFullyDecompressionCountOnlyGrepPipeline(RE *regex) {
     405    StreamSet * const uncompressedByteStream = generateUncompressedByteStream();
     406    StreamSet * LineBreakStream;
     407    StreamSet * Matches;
    552408    std::tie(LineBreakStream, Matches) = grep(regex, uncompressedByteStream, nullptr);
    553 
    554     kernel::Kernel * matchCountK = mPxDriver.addKernelInstance<kernel::PopcountKernel>(iBuilder);
    555     mPxDriver.makeKernelCall(matchCountK, {Matches}, {});
    556     mPxDriver.generatePipelineIR();
    557 
    558     iBuilder->setKernel(matchCountK);
    559     Value * matchedLineCount = iBuilder->getAccumulator("countResult");
    560     matchedLineCount = iBuilder->CreateZExt(matchedLineCount, iBuilder->getInt64Ty());
    561 
    562     mPxDriver.deallocateBuffers();
    563 
    564     iBuilder->CreateRet(matchedLineCount);
    565 
    566     mPxDriver.finalizeObject();
     409    mPipeline->CreateKernelCall<PopcountKernel>(Matches, mPipeline->getOutputScalar("countResult"));
     410    mMainMethod = mPipeline->compile();
    567411}
    568412
    569413
    570414ScanMatchGrepMainFunctionType LZ4GrepBaseGenerator::getScanMatchGrepMainFunction() {
    571     return reinterpret_cast<ScanMatchGrepMainFunctionType>(mPxDriver.getMain());
     415    return reinterpret_cast<ScanMatchGrepMainFunctionType>(mMainMethod);
    572416}
    573417CountOnlyGrepMainFunctionType LZ4GrepBaseGenerator::getCountOnlyGrepMainFunction() {
    574     return reinterpret_cast<CountOnlyGrepMainFunctionType>(mPxDriver.getMain());
    575 }
    576 
    577 void LZ4GrepBaseGenerator::generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    578     Module * M = iBuilder->getModule();
    579     Type * const int64Ty = iBuilder->getInt64Ty();
    580     Type * const sizeTy = iBuilder->getSizeTy();
    581     Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
    582 //    Type * const voidTy = iBuilder->getVoidTy();
    583     Type * const inputType = iBuilder->getInt8PtrTy();
    584 
    585     Function * const main = cast<Function>(M->getOrInsertFunction("Main", int64Ty, inputType, sizeTy, sizeTy, boolTy, nullptr));
    586     main->setCallingConv(CallingConv::C);
    587     Function::arg_iterator args = main->arg_begin();
    588     mInputStream = &*(args++);
    589     mInputStream->setName("input");
    590 
    591     mHeaderSize = &*(args++);
    592     mHeaderSize->setName("mHeaderSize");
    593 
    594     mFileSize = &*(args++);
    595     mFileSize->setName("mFileSize");
    596 
    597     mHasBlockChecksum = &*(args++);
    598     mHasBlockChecksum->setName("mHasBlockChecksum");
    599     // TODO for now, we do not handle blockCheckSum
    600     mHasBlockChecksum = iBuilder->getInt1(false);
    601 
    602     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
    603 }
    604 
    605 void LZ4GrepBaseGenerator::generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    606     Module * M = iBuilder->getModule();
    607     Type * const sizeTy = iBuilder->getSizeTy();
    608     Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
    609     Type * const voidTy = iBuilder->getVoidTy();
    610     Type * const inputType = iBuilder->getInt8PtrTy();
    611     Type * const intAddrTy = iBuilder->getIntAddrTy();
    612 
    613     Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, intAddrTy, nullptr));
    614     main->setCallingConv(CallingConv::C);
    615     Function::arg_iterator args = main->arg_begin();
    616     mInputStream = &*(args++);
    617     mInputStream->setName("input");
    618 
    619     mHeaderSize = &*(args++);
    620     mHeaderSize->setName("mHeaderSize");
    621 
    622     mFileSize = &*(args++);
    623     mFileSize->setName("mFileSize");
    624 
    625     mHasBlockChecksum = &*(args++);
    626     mHasBlockChecksum->setName("mHasBlockChecksum");
    627 
    628     match_accumulator = &*(args++);
    629     match_accumulator->setName("match_accumulator");
    630 
    631     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
    632 }
    633 
    634 std::vector<parabix::StreamSetBuffer *>
    635 LZ4GrepBaseGenerator::generateFakeStreams(const std::unique_ptr<kernel::KernelBuilder> &idb,
    636                                           parabix::StreamSetBuffer *refStream, std::vector<unsigned> numOfStreams) {
    637 
    638     if (!numOfStreams.size()) {
    639         return std::vector<StreamSetBuffer *>();
    640     }
    641     std::vector<StreamSetBuffer *> outputStreams;
    642     for (unsigned i = 0; i < numOfStreams.size(); i++) {
    643         outputStreams.push_back(mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfStreams[i]),
    644                                                                   this->getDefaultBufferBlocks(), 1));
    645     }
    646     Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<FakeStreamGeneratingKernel>(idb, refStream->getNumOfStreams(), numOfStreams);
    647     mPxDriver.makeKernelCall(fakeStreamGeneratorK, {refStream}, outputStreams);
     418    return reinterpret_cast<CountOnlyGrepMainFunctionType>(mMainMethod);
     419}
     420
     421StreamSets LZ4GrepBaseGenerator::generateFakeStreams(StreamSet * refStream, std::vector<unsigned> numOfStreams) {
     422    if (numOfStreams.empty()) {
     423        return StreamSets{};
     424    }
     425    StreamSets outputStreams;
     426    outputStreams.reserve(numOfStreams.size());
     427    for (const auto k : numOfStreams) {
     428        outputStreams.push_back(mPipeline->CreateStreamSet(k));
     429    }
     430    mPipeline->CreateKernelCall<FakeStreamGeneratingKernel>(refStream, outputStreams);
    648431    return outputStreams;
    649432}
     
    651434
    652435
    653 std::vector<parabix::StreamSetBuffer *>
    654 LZ4GrepBaseGenerator::decompressBitStreams(parabix::StreamSetBuffer *compressedByteStream,
    655                                            std::vector<parabix::StreamSetBuffer *> compressedBitStreams) {
     436StreamSets LZ4GrepBaseGenerator::decompressBitStreams(StreamSet *compressedByteStream, StreamSets compressedBitStreams) {
    656437    // Default implementation here will be slow
    657     std::vector<parabix::StreamSetBuffer *> retVec;
     438    StreamSets retVec;
    658439    for (unsigned i = 0; i < compressedBitStreams.size(); i++) {
    659         retVec.push_back(this->decompressBitStream(compressedByteStream, compressedBitStreams[i]));
     440        retVec.push_back(decompressBitStream(compressedByteStream, compressedBitStreams[i]));
    660441    }
    661442    return retVec;
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_base_generator.h

    r6165 r6184  
    66
    77#include <grep_interface.h>
    8 #include <kernels/streamset.h>
     8#include <kernels/relationship.h>
    99#include <cc/multiplex_CCs.h>
    1010#include <string>
     
    2222public:
    2323
    24     LZ4GrepBaseGenerator();
     24    enum FunctionType { CountOnly, Match };
    2525
    2626    void generateScanMatchGrepPipeline(re::RE* regex, bool enableMultiplexing, bool utf8CC);
     
    3333    CountOnlyGrepMainFunctionType getCountOnlyGrepMainFunction();
    3434
     35protected:
    3536
     37    LZ4GrepBaseGenerator(const FunctionType type);
    3638
    37 protected:
    38     virtual parabix::StreamSetBuffer* generateUncompressedByteStream() {
    39         parabix::StreamSetBuffer* bitStreams = this->generateUncompressedBitStreams();
    40         return this->p2s(bitStreams);
     39    virtual kernel::StreamSet* generateUncompressedByteStream() {
     40        kernel::StreamSet* bitStreams = generateUncompressedBitStreams();
     41        return p2s(bitStreams);
    4142    }
    42     virtual parabix::StreamSetBuffer* generateUncompressedBitStreams() = 0;
    43     virtual parabix::StreamSetBuffer* decompressBitStream(parabix::StreamSetBuffer* compressedByteStream, parabix::StreamSetBuffer* compressedBitStream) = 0;
    44     virtual std::vector<parabix::StreamSetBuffer*> decompressBitStreams(parabix::StreamSetBuffer* compressedByteStream, std::vector<parabix::StreamSetBuffer*> compressedBitStreams);
    4543
    46     std::vector<parabix::StreamSetBuffer*> generateFakeStreams(
    47             const std::unique_ptr<kernel::KernelBuilder> & iBuilder,
    48             parabix::StreamSetBuffer* refStream,
    49             std::vector<unsigned> numOfStreams
    50     );
     44    virtual kernel::StreamSet* generateUncompressedBitStreams() = 0;
     45    virtual kernel::StreamSet* decompressBitStream(kernel::StreamSet* compressedByteStream, kernel::StreamSet* compressedBitStream) = 0;
     46    virtual kernel::StreamSets decompressBitStreams(kernel::StreamSet* compressedByteStream, kernel::StreamSets compressedBitStreams);
     47
     48    kernel::StreamSets generateFakeStreams(kernel::StreamSet * refStream, std::vector<unsigned> numOfStreams);
     49
    5150    unsigned calculateTwistWidth(unsigned numOfStreams);
    5251
    5352private:
     53
     54    std::unique_ptr<kernel::PipelineBuilder> makeInternalPipeline(const FunctionType type);
     55
    5456    grep::GrepRecordBreakKind mGrepRecordBreak;
    55     void initREs(re::RE * REs);
     57
     58    void initREs(re::RE * re);
    5659
    5760
     
    6265    re::RE* u8NonFinalRe;
    6366    re::RE* u8FinalRe;
    64 
     67    kernel::Scalar * match_accumulator;
     68    void * mMainMethod;
    6569
    6670    std::vector<std::ostringstream> mResultStrs;
    6771
    68     void generateCountOnlyMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    69     void generateScanMatchMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    70 
    71 
    72     llvm::Value * match_accumulator;
    73 
    74 
    75     parabix::StreamSetBuffer * linefeedStreamFromUncompressedBits(parabix::StreamSetBuffer *uncompressedBasisBits);
     72    kernel::StreamSet * linefeedStreamFromUncompressedBits(kernel::StreamSet *uncompressedBasisBits);
    7673
    7774
     
    8481
    8582
    86     std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grep(re::RE *RE, parabix::StreamSetBuffer *byteStream,
    87                                                                            parabix::StreamSetBuffer *uncompressedBasisBits, bool ccMultiplexing = false);
    88     std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> multiplexingGrep(
    89             re::RE *RE,
    90             parabix::StreamSetBuffer *compressedByteStream,
    91             parabix::StreamSetBuffer *compressedBitStream,
    92             bool utf8CC
    93     );
    94     std::unique_ptr<cc::MultiplexedAlphabet> mpx;
     83    std::pair<kernel::StreamSet *, kernel::StreamSet *> grep(
     84            re::RE * re, kernel::StreamSet *byteStream, kernel::StreamSet *uncompressedBasisBits, bool ccMultiplexing = false);
     85
     86    std::pair<kernel::StreamSet *, kernel::StreamSet *> multiplexingGrep(
     87            re::RE * re, kernel::StreamSet *compressedByteStream, kernel::StreamSet *compressedBitStream, bool utf8CC);
    9588
    9689};
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_bitstream_generator.cpp

    r6144 r6184  
    1 
    2 
    31#include "lz4_grep_bitstream_generator.h"
    42
     3using namespace kernel;
    54
    6 namespace re { class CC; }
    7 
    8 using namespace llvm;
    9 using namespace parabix;
    10 using namespace kernel;
    11 using namespace grep;
    12 
    13 parabix::StreamSetBuffer* LZ4GrepBitStreamGenerator::generateUncompressedBitStreams() {
    14     StreamSetBuffer *compressedByteStream = nullptr, *compressedBasisBits = nullptr;
    15     std::tie(compressedByteStream, compressedBasisBits) = this->loadByteStreamAndBitStream();
    16     return this->bitStreamDecompression(compressedByteStream, compressedBasisBits);
     5StreamSet* LZ4GrepBitStreamGenerator::generateUncompressedBitStreams() {
     6    StreamSet *compressedByteStream = nullptr, *compressedBasisBits = nullptr;
     7    std::tie(compressedByteStream, compressedBasisBits) = loadByteStreamAndBitStream();
     8    return bitStreamDecompression(compressedByteStream, compressedBasisBits);
    179}
    1810
    19 parabix::StreamSetBuffer *LZ4GrepBitStreamGenerator::decompressBitStream(parabix::StreamSetBuffer *compressedByteStream,
    20                                                                          parabix::StreamSetBuffer *compressedBitStream) {
    21     auto ret = this->convertCompressedBitsStreamWithBitStreamAioApproach(compressedByteStream, {compressedBitStream});
    22     return ret[0];
     11StreamSet *LZ4GrepBitStreamGenerator::decompressBitStream(StreamSet *compressedByteStream, StreamSet * compressedBitStream) {
     12    const auto ret = convertCompressedBitsStreamWithBitStreamAioApproach(compressedByteStream, {compressedBitStream});
     13    assert (ret.size() == 1);
     14    return ret.front();
    2315}
    2416
    25 std::vector<parabix::StreamSetBuffer *>
    26 LZ4GrepBitStreamGenerator::decompressBitStreams(parabix::StreamSetBuffer *compressedByteStream,
    27                                                 std::vector<parabix::StreamSetBuffer *> compressedBitStreams) {
    28     return this->convertCompressedBitsStreamWithBitStreamAioApproach(compressedByteStream, compressedBitStreams);
     17StreamSets LZ4GrepBitStreamGenerator::decompressBitStreams(StreamSet * compressedByteStream, StreamSets compressedBitStreams) {
     18    return convertCompressedBitsStreamWithBitStreamAioApproach(compressedByteStream, compressedBitStreams);
    2919}
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_bitstream_generator.h

    r6144 r6184  
    66#include "lz4_grep_base_generator.h"
    77
    8 class LZ4GrepBitStreamGenerator: public LZ4GrepBaseGenerator {
     8class LZ4GrepBitStreamGenerator final : public LZ4GrepBaseGenerator {
     9public:
     10    LZ4GrepBitStreamGenerator(const FunctionType type) : LZ4GrepBaseGenerator(type) { }
    911protected:
    10     virtual parabix::StreamSetBuffer* generateUncompressedBitStreams() override;
    11     virtual parabix::StreamSetBuffer* decompressBitStream(parabix::StreamSetBuffer* compressedByteStream, parabix::StreamSetBuffer* compressedBitStream) override;
    12     virtual std::vector<parabix::StreamSetBuffer*> decompressBitStreams(parabix::StreamSetBuffer* compressedByteStream, std::vector<parabix::StreamSetBuffer*> compressedBitStreams) override;
     12    kernel::StreamSet * generateUncompressedBitStreams() override;
     13    kernel::StreamSet * decompressBitStream(kernel::StreamSet * compressedByteStream, kernel::StreamSet * compressedBitStream) override;
     14    kernel::StreamSets decompressBitStreams(kernel::StreamSet * compressedByteStream, kernel::StreamSets compressedBitStreams) override;
    1315};
    1416
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_bytestream_generator.cpp

    r6165 r6184  
    99#include <kernels/lz4/decompression/lz4_bytestream_decompression.h>
    1010#include <kernels/kernel_builder.h>
    11 
     11#include <kernels/pipeline_builder.h>
    1212
    1313using namespace kernel;
    14 using namespace parabix;
    1514
    16 
    17 parabix::StreamSetBuffer* LZ4GrepByteStreamGenerator::generateUncompressedByteStream() {
    18     StreamSetBuffer* compressedByteStream = this->loadByteStream();
    19     parabix::StreamSetBuffer * uncompressedByteStream = this->byteStreamDecompression(compressedByteStream);
     15StreamSet *LZ4GrepByteStreamGenerator::generateUncompressedByteStream() {
     16    StreamSet* compressedByteStream = loadByteStream();
     17    StreamSet * uncompressedByteStream = byteStreamDecompression(compressedByteStream);
    2018    return uncompressedByteStream;
    2119}
    22 StreamSetBuffer *LZ4GrepByteStreamGenerator::generateUncompressedBitStreams() {
    23     parabix::StreamSetBuffer * uncompressedByteStream = this->generateUncompressedByteStream();
    24     return this->s2p(uncompressedByteStream);
     20StreamSet *LZ4GrepByteStreamGenerator::generateUncompressedBitStreams() {
     21    StreamSet * uncompressedByteStream = generateUncompressedByteStream();
     22    return s2p(uncompressedByteStream);
    2523}
    2624
    27 parabix::StreamSetBuffer *
    28 LZ4GrepByteStreamGenerator::decompressBitStream(parabix::StreamSetBuffer *compressedByteStream,
    29                                                 parabix::StreamSetBuffer *compressedBitStream) {
    30     return this->decompressBitStreams(compressedByteStream, {compressedBitStream})[0];
     25StreamSet * LZ4GrepByteStreamGenerator::decompressBitStream(StreamSet *compressedByteStream, StreamSet *compressedBitStream) {
     26    return decompressBitStreams(compressedByteStream, {compressedBitStream})[0];
    3127}
    3228
    33 std::vector<parabix::StreamSetBuffer *>
    34 LZ4GrepByteStreamGenerator::decompressBitStreams(parabix::StreamSetBuffer *compressedByteStream,
    35                                                  std::vector<parabix::StreamSetBuffer *> compressedBitStreams) {
    36     auto & b = mPxDriver.getBuilder();
     29StreamSets LZ4GrepByteStreamGenerator::decompressBitStreams(StreamSet * compressedByteStream, StreamSets compressedBitStreams) {
    3730
    3831    std::vector<unsigned> numOfStreams(compressedBitStreams.size());
    39     std::transform(compressedBitStreams.begin(), compressedBitStreams.end(), numOfStreams.begin(), [](StreamSetBuffer* b){return b->getNumOfStreams();});
    40     unsigned totalStreamNum = std::accumulate(numOfStreams.begin(), numOfStreams.end(), 0u);
     32    std::transform(compressedBitStreams.begin(), compressedBitStreams.end(), numOfStreams.begin(),
     33                   [](StreamSet* b){
     34                        return b->getNumElements();
     35                   });
    4136
    42     unsigned twistWidth = this->calculateTwistWidth(totalStreamNum);
    43     StreamSetBuffer* twistedStream = this->twist(b, compressedBitStreams, twistWidth);
     37    const auto totalStreamNum = std::accumulate(numOfStreams.begin(), numOfStreams.end(), 0u);
    4438
    45     LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
    46     StreamSetBuffer* uncompressedTwistedStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, twistWidth), this->getDefaultBufferBlocks(), 1);
    47     std::vector<StreamSetBuffer*> inputStreams = {
    48             compressedByteStream,
     39    const auto twistWidth = calculateTwistWidth(totalStreamNum);
     40    StreamSet * twistedStream = twist(compressedBitStreams, twistWidth);
    4941
    50             blockInfo.isCompress,
    51             blockInfo.blockStart,
    52             blockInfo.blockEnd,
     42    LZ4BlockInfo blockInfo = getBlockInfo(compressedByteStream);
    5343
    54             twistedStream
    55     };
    56     std::vector<StreamSetBuffer*> outputStreams = {
    57             uncompressedTwistedStream
    58     };
     44    StreamSet * const uncompressedTwistedStream = mPipeline->CreateStreamSet(1, twistWidth);
    5945
    60     if (twistWidth <= 4) {
    61         Kernel* lz4I4AioK = mPxDriver.addKernelInstance<LZ4TwistDecompressionKernel>(b, twistWidth);
    62         lz4I4AioK->setInitialArguments({mFileSize});
    63         mPxDriver.makeKernelCall(lz4I4AioK, inputStreams, outputStreams);
     46    if (twistWidth <= 4) {   
     47        mPipeline->CreateKernelCall<LZ4TwistDecompressionKernel>(mFileSize, compressedByteStream, blockInfo, twistedStream, uncompressedTwistedStream);
     48    } else {
     49        mPipeline->CreateKernelCall<LZ4ByteStreamDecompressionKernel>(mFileSize, compressedByteStream, blockInfo, twistedStream, uncompressedTwistedStream);
     50    }
    6451
    65     } else {
    66         Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(b, true);
    67         lz4AioK->setInitialArguments({mFileSize});
    68         mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStreams);
    69     }
    70     return this->untwist(b, uncompressedTwistedStream, twistWidth, numOfStreams);
     52    return untwist(uncompressedTwistedStream, twistWidth, numOfStreams);
    7153}
    7254
    73 parabix::StreamSetBuffer* LZ4GrepByteStreamGenerator::twist(const std::unique_ptr<kernel::KernelBuilder> &b,
    74                                                             std::vector<StreamSetBuffer*> inputStreams,
    75                                                             unsigned twistWidth
    76 ) {
    77     std::vector<unsigned> numsOfStreams(inputStreams.size());
    78     std::transform(inputStreams.begin(), inputStreams.end(), numsOfStreams.begin(), [](StreamSetBuffer* b){return b->getNumOfStreams();});
    79     unsigned totalNumOfStreams = std::accumulate(numsOfStreams.begin(), numsOfStreams.end(), 0u);
    80     assert(totalNumOfStreams <= twistWidth);
     55StreamSet * LZ4GrepByteStreamGenerator::twist(const StreamSets & inputStreams, const unsigned twistWidth) {
    8156
    8257    if (twistWidth == 1) {
    8358        for (unsigned i = 0; i < inputStreams.size(); i++) {
    84             if (inputStreams[i]->getNumOfStreams() == 1) {
     59            if (inputStreams[i]->getNumElements() == 1) {
    8560                return inputStreams[i];
    8661            }
    8762        }
    88     } else if (twistWidth == 2 || twistWidth == 4) {
    89         StreamSetBuffer* twistedCharClasses = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, twistWidth),
    90                                                                                 this->getDefaultBufferBlocks(), 1);
    91         kernel::Kernel* twistK = mPxDriver.addKernelInstance<kernel::TwistMultipleByPDEPKernel>(b, numsOfStreams, twistWidth);
    92         mPxDriver.makeKernelCall(twistK, inputStreams, {twistedCharClasses});
    93         return twistedCharClasses;
     63        llvm_unreachable("did not find input stream");
     64    }
     65
     66    StreamSet * const twistedOutput = mPipeline->CreateStreamSet(1, twistWidth);
     67    if (twistWidth == 2 || twistWidth == 4) {
     68        mPipeline->CreateKernelCall<TwistMultipleByPDEPKernel>(inputStreams, twistedOutput);
    9469    } else if (twistWidth == 8) {
    95         StreamSetBuffer * const mtxByteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, twistWidth),
    96                                                                                   this->getDefaultBufferBlocks());
    97         Kernel * p2sK = mPxDriver.addKernelInstance<P2SMultipleStreamsKernel>(b, cc::BitNumbering::BigEndian, numsOfStreams);
    98         mPxDriver.makeKernelCall(p2sK, inputStreams, {mtxByteStream});
    99         return mtxByteStream;
     70        mPipeline->CreateKernelCall<P2SMultipleStreamsKernel>(inputStreams, twistedOutput, cc::BitNumbering::BigEndian);
    10071    } else {
    101         llvm::report_fatal_error("Twist: Unsupported twistWidth " + std::to_string(twistWidth));;
     72        llvm::report_fatal_error("Twist: Unsupported twistWidth " + std::to_string(twistWidth));
    10273    }
     74    return twistedOutput;
    10375}
    10476
    105 std::vector<StreamSetBuffer*> LZ4GrepByteStreamGenerator::untwist(const std::unique_ptr<kernel::KernelBuilder> &b,
    106                                                               parabix::StreamSetBuffer *inputStream,
    107                                                               unsigned twistWidth,
    108                                                               std::vector<unsigned> numOfStreams
    109 ) {
    110     unsigned totalNumOfStreams = std::accumulate(numOfStreams.begin(), numOfStreams.end(), 0u);
    111     assert(totalNumOfStreams <= twistWidth);
     77StreamSets LZ4GrepByteStreamGenerator::untwist(StreamSet * inputStream, const unsigned twistWidth, const std::vector<unsigned> & numOfStreams) {
     78    StreamSets retBuffers;
    11279    if (twistWidth == 1) {
    11380        std::vector<unsigned> fakeStreamNums;
     
    11784            }
    11885        }
    119         auto fakeStreams = this->generateFakeStreams(b, inputStream, fakeStreamNums);
    120 
    121         std::vector<StreamSetBuffer*> retBuffers;
     86        auto fakeStreams = generateFakeStreams(inputStream, fakeStreamNums);
    12287        unsigned j = 0;
    12388        for (unsigned i = 0; i < numOfStreams.size(); i++) {
    12489            if (numOfStreams[i] == 0) {
    125                 retBuffers.push_back(fakeStreams[j]);
    126                 j++;
     90                retBuffers.push_back(fakeStreams[j++]);
    12791            } else {
    12892                retBuffers.push_back(inputStream);
    12993            }
    13094        }
    131         return retBuffers;
    132     } else{
    133         std::vector<StreamSetBuffer*> retBuffers;
     95    } else{       
    13496        for (unsigned i = 0; i < numOfStreams.size(); i++) {
    135             retBuffers.push_back(mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(numOfStreams[i]), this->getDefaultBufferBlocks(), 1));
     97            retBuffers.push_back(mPipeline->CreateStreamSet(numOfStreams[i]));
    13698        }
    137 
    138 
    13999        if (twistWidth == 2 || twistWidth == 4) {
    140             kernel::Kernel* untwistK = mPxDriver.addKernelInstance<kernel::UntwistMultipleByPEXTKernel>(b, numOfStreams, twistWidth);
    141             mPxDriver.makeKernelCall(untwistK, {inputStream}, retBuffers);
    142             return retBuffers;
     100            mPipeline->CreateKernelCall<UntwistMultipleByPEXTKernel>(inputStream, retBuffers);
    143101        } else if (twistWidth == 8) {
    144             Kernel * s2pk = mPxDriver.addKernelInstance<S2PMultipleStreamsKernel>(b, cc::BitNumbering::BigEndian, true, numOfStreams);
    145             mPxDriver.makeKernelCall(s2pk, {inputStream}, retBuffers);
    146             return retBuffers;
     102            mPipeline->CreateKernelCall<S2PMultipleStreamsKernel>(inputStream, retBuffers, cc::BitNumbering::BigEndian, true);
    147103        } else {
    148104            llvm::report_fatal_error("Twist: Unsupported twistWidth " + std::to_string(twistWidth));;
    149105        }
    150106    }
     107    return retBuffers;
    151108}
    152109
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_bytestream_generator.h

    r6165 r6184  
    55#include "lz4_grep_base_generator.h"
    66
    7 class LZ4GrepByteStreamGenerator: public LZ4GrepBaseGenerator {
     7class LZ4GrepByteStreamGenerator final : public LZ4GrepBaseGenerator {
     8public:
     9    LZ4GrepByteStreamGenerator(const FunctionType type) : LZ4GrepBaseGenerator(type) { }
    810protected:
    9     virtual parabix::StreamSetBuffer* generateUncompressedByteStream() override;
    10     virtual parabix::StreamSetBuffer* generateUncompressedBitStreams() override;
    11     virtual parabix::StreamSetBuffer* decompressBitStream(parabix::StreamSetBuffer* compressedByteStream, parabix::StreamSetBuffer* compressedBitStream) override;
    12     virtual std::vector<parabix::StreamSetBuffer*> decompressBitStreams(parabix::StreamSetBuffer* compressedByteStream, std::vector<parabix::StreamSetBuffer*> compressedBitStreams) override;
    13 
     11    kernel::StreamSet * generateUncompressedByteStream() override;
     12    kernel::StreamSet * generateUncompressedBitStreams() override;
     13    kernel::StreamSet * decompressBitStream(kernel::StreamSet* compressedByteStream, kernel::StreamSet* compressedBitStream) override;
     14    kernel::StreamSets decompressBitStreams(kernel::StreamSet* compressedByteStream, kernel::StreamSets compressedBitStreams) override;
    1415private:
    15 
    16     parabix::StreamSetBuffer* twist(const std::unique_ptr<kernel::KernelBuilder> &b,
    17                                     std::vector<parabix::StreamSetBuffer*> inputStreams,
    18                                     unsigned twistWidth);
    19     std::vector<parabix::StreamSetBuffer*> untwist(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, parabix::StreamSetBuffer* inputStream, unsigned twistWidth, std::vector<unsigned> numOfStreams);
     16    kernel::StreamSet* twist(const kernel::StreamSets &inputStreams, const unsigned twistWidth);
     17    kernel::StreamSets untwist(kernel::StreamSet * inputStream, const unsigned twistWidth, const std::vector<unsigned> & numOfStreams);
    2018};
    2119
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_swizzle_generator.cpp

    r6143 r6184  
    44#include <kernels/lz4/decompression/lz4_swizzled_decompression.h>
    55#include <kernels/kernel_builder.h>
     6#include <kernels/pipeline_builder.h>
    67
    7 using namespace parabix;
    88using namespace kernel;
    99
    10 StreamSetBuffer *LZ4GrepSwizzleGenerator::generateUncompressedBitStreams() {
    11     StreamSetBuffer *compressedByteStream = nullptr, *compressedBasisBits = nullptr;
    12     std::tie(compressedByteStream, compressedBasisBits) = this->loadByteStreamAndBitStream();
    13     return this->swizzledDecompression(compressedByteStream, compressedBasisBits);
     10StreamSet *LZ4GrepSwizzleGenerator::generateUncompressedBitStreams() {
     11    StreamSet *compressedByteStream = nullptr, *compressedBasisBits = nullptr;
     12    std::tie(compressedByteStream, compressedBasisBits) = loadByteStreamAndBitStream();
     13    return swizzledDecompression(compressedByteStream, compressedBasisBits);
    1414}
    1515
    16 StreamSetBuffer *LZ4GrepSwizzleGenerator::decompressBitStream(parabix::StreamSetBuffer *compressedByteStream,
    17                                                                        parabix::StreamSetBuffer *compressedBitStream) {
    18     return this->convertCompressedBitsStreamWithSwizzledAioApproach(compressedByteStream, compressedBitStream, "combined");
     16StreamSet *LZ4GrepSwizzleGenerator::decompressBitStream(StreamSet * compressedByteStream, StreamSet *compressedBitStream) {
     17    return convertCompressedBitsStreamWithSwizzledAioApproach(compressedByteStream, compressedBitStream);
    1918}
    2019
    2120
    22 StreamSetBuffer * LZ4GrepSwizzleGenerator::convertCompressedBitsStreamWithSwizzledAioApproach(
    23         StreamSetBuffer *compressedByteStream,
    24         StreamSetBuffer *compressedBitStream,
    25         std::string prefix
     21StreamSet * LZ4GrepSwizzleGenerator::convertCompressedBitsStreamWithSwizzledAioApproach(StreamSet *compressedByteStream, StreamSet *compressedBitStream
    2622) {
    27     auto mGrepDriver = &mPxDriver;
    28     auto & b = mGrepDriver->getBuilder();
    29 
    30     LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
     23    LZ4BlockInfo blockInfo = getBlockInfo(compressedByteStream);
    3124
    3225    // Produce unswizzled bit streams
    33     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4),
    34                                                                       this->getDefaultBufferBlocks(), 1);
    35     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "source");
    36     mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
     26    StreamSet * u16Swizzle0 = mPipeline->CreateStreamSet(4);
    3727
    38     StreamSetBuffer * uncompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4),
    39                                                                                 this->getDefaultBufferBlocks(), 1);
     28    mPipeline->CreateKernelCall<SwizzleGenerator>(StreamSets{compressedBitStream}, StreamSets{u16Swizzle0});
    4029
     30    StreamSet * uncompressedSwizzled0 = mPipeline->CreateStreamSet(4);
    4131
    42     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledDecompressionKernel>(b, 4, 1, 4);
    43     lz4AioK->setInitialArguments({mFileSize});
    44     mPxDriver.makeKernelCall(
    45             lz4AioK,
    46             {
    47                     compressedByteStream,
     32    mPipeline->CreateKernelCall<LZ4SwizzledDecompressionKernel>(
     33        mFileSize,
     34        // inputs
     35        compressedByteStream, blockInfo,
     36        StreamSets{ u16Swizzle0 },
     37        // outputs
     38        StreamSets{ uncompressedSwizzled0 } );
    4839
    49                     // Block Data
    50                     blockInfo.isCompress,
    51                     blockInfo.blockStart,
    52                     blockInfo.blockEnd,
    53 
    54                     u16Swizzle0,
    55             }, {
    56                     uncompressedSwizzled0,
    57             });
    58 
    59 
    60 
    61     StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8, 1),
    62                                                                                        this->getDefaultBufferBlocks(), 1);
    63     Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 4, 1, 1, 64, "dst");
    64     mPxDriver.makeKernelCall(unSwizzleK2, {uncompressedSwizzled0}, {decompressionBitStream});
    65 
     40    StreamSet * decompressionBitStream = mPipeline->CreateStreamSet(8 , 1);
     41    mPipeline->CreateKernelCall<SwizzleGenerator>(StreamSets{uncompressedSwizzled0}, StreamSets{decompressionBitStream});
    6642    return decompressionBitStream;
    6743
  • icGREP/icgrep-devel/icgrep/lz4/grep/lz4_grep_swizzle_generator.h

    r6143 r6184  
    66#include "lz4_grep_base_generator.h"
    77
    8 class LZ4GrepSwizzleGenerator: public LZ4GrepBaseGenerator {
     8class LZ4GrepSwizzleGenerator final : public LZ4GrepBaseGenerator {
     9public:
     10    LZ4GrepSwizzleGenerator(const FunctionType type) : LZ4GrepBaseGenerator(type) { }
    911protected:
    10     virtual parabix::StreamSetBuffer* generateUncompressedBitStreams() override;
    11     virtual parabix::StreamSetBuffer* decompressBitStream(parabix::StreamSetBuffer* compressedByteStream, parabix::StreamSetBuffer* compressedBitStream) override;
    12 
     12    kernel::StreamSet* generateUncompressedBitStreams() override;
     13    kernel::StreamSet* decompressBitStream(kernel::StreamSet* compressedByteStream, kernel::StreamSet* compressedBitStream) override;
    1314private:
    14     parabix::StreamSetBuffer * convertCompressedBitsStreamWithSwizzledAioApproach(
    15             parabix::StreamSetBuffer *compressedByteStream,
    16             parabix::StreamSetBuffer *compressedBitStream,
    17             std::string prefix
    18     );
     15    kernel::StreamSet * convertCompressedBitsStreamWithSwizzledAioApproach(kernel::StreamSet *compressedByteStream, kernel::StreamSet *compressedBitStream);
    1916};
    2017
  • icGREP/icgrep-devel/icgrep/lz4/lz4_base_generator.cpp

    r6173 r6184  
    1414#include <kernels/lz4/decompression/lz4_twist_decompression.h>
    1515#include <kernels/lz4/decompression/lz4_bitstream_decompression.h>
     16#include <kernels/pipeline_builder.h>
    1617
    1718using namespace llvm;
    18 using namespace parabix;
    1919using namespace kernel;
    2020
    21 LZ4BaseGenerator::LZ4BaseGenerator():mPxDriver("lz4"), mLz4BlockSize(4 * 1024 * 1024), mInitBlockInfo(false) {
     21LZ4BaseGenerator::LZ4BaseGenerator()
     22: mPxDriver("lz4")
     23, mLz4BlockSize(4 * 1024 * 1024) {
    2224
    2325}
    2426
    25 StreamSetBuffer* LZ4BaseGenerator::loadByteStream() {
    26     auto & b = mPxDriver.getBuilder();
    27     StreamSetBuffer* byteStream = mPxDriver.addBuffer<ExternalBuffer>(b, b->getStreamSetTy(1, 8));
    28     kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(b);
    29     sourceK->setInitialArguments({mInputStream, mFileSize});
    30     mPxDriver.makeKernelCall(sourceK, {}, {byteStream});
     27StreamSet* LZ4BaseGenerator::loadByteStream() {
     28    StreamSet * const byteStream = mPipeline->CreateStreamSet(1, 8);
     29    mPipeline->CreateKernelCall<MemorySourceKernel>(mInputStream, mFileSize, byteStream);
    3130    return byteStream;
    3231}
    3332
    34 StreamSetBuffer* LZ4BaseGenerator::s2p(parabix::StreamSetBuffer* byteStream) {
    35     auto & b = mPxDriver.getBuilder();
    36     StreamSetBuffer* basisBits = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8, 1),
    37                                                              this->getDefaultBufferBlocks());
    38     Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(b, cc::BitNumbering::BigEndian);
    39     mPxDriver.makeKernelCall(s2pk, {byteStream}, {basisBits});
     33StreamSet* LZ4BaseGenerator::s2p(StreamSet* byteStream) {
     34    StreamSet * const basisBits = mPipeline->CreateStreamSet(8, 1);
     35    mPipeline->CreateKernelCall<S2PKernel>(byteStream, basisBits, cc::BitNumbering::BigEndian);
    4036    return basisBits;
    4137}
    42 parabix::StreamSetBuffer* LZ4BaseGenerator::p2s(parabix::StreamSetBuffer* bitStream) {
    43     auto & b = mPxDriver.getBuilder();
    44     StreamSetBuffer* byteStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8),
    45                                                                    this->getDefaultBufferBlocks());
    46     Kernel * p2sk = mPxDriver.addKernelInstance<P2SKernel>(b, cc::BitNumbering::BigEndian);
    47     mPxDriver.makeKernelCall(p2sk, {bitStream}, {byteStream});
     38
     39StreamSet* LZ4BaseGenerator::p2s(StreamSet* bitStream) {
     40    StreamSet * const byteStream = mPipeline->CreateStreamSet(1, 8);
     41    mPipeline->CreateKernelCall<P2SKernel>(bitStream, byteStream, cc::BitNumbering::BigEndian);
    4842    return byteStream;
    4943}
    5044
    51 std::pair<parabix::StreamSetBuffer*, parabix::StreamSetBuffer*>  LZ4BaseGenerator::loadByteStreamAndBitStream() {
    52     StreamSetBuffer* byteStream = this->loadByteStream();
    53     StreamSetBuffer* basisBits = s2p(byteStream);
     45std::pair<StreamSet*, StreamSet*>  LZ4BaseGenerator::loadByteStreamAndBitStream() {
     46    StreamSet * const byteStream = loadByteStream();
     47    StreamSet * const basisBits = s2p(byteStream);
    5448    return std::make_pair(byteStream, basisBits);
    5549}
    5650
    57 LZ4BlockInfo LZ4BaseGenerator::getBlockInfo(StreamSetBuffer* compressedByteStream) {
    58     if (mInitBlockInfo) {
     51LZ4BlockInfo LZ4BaseGenerator::getBlockInfo(StreamSet* compressedByteStream) {
     52    if (mBlockInfo.isCompress) {
    5953        return mBlockInfo;
    6054    }
    6155
    62     auto & b = mPxDriver.getBuilder();
    63     LZ4BlockInfo blockInfo;
    64     blockInfo.isCompress = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8), this->getDefaultBufferBlocks(), 1);
    65     blockInfo.blockStart = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64),
    66                                                              this->getDefaultBufferBlocks(), 1);
    67     blockInfo.blockEnd = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 64), this->getDefaultBufferBlocks(), 1);
     56    mBlockInfo.isCompress =  mPipeline->CreateStreamSet(1, 8);
     57    mBlockInfo.blockStart = mPipeline->CreateStreamSet(1, 64);
     58    mBlockInfo.blockEnd = mPipeline->CreateStreamSet(1, 64);
    6859
    69     Kernel * blockDecoderK = mPxDriver.addKernelInstance<LZ4BlockDecoderKernel>(b);
    70     blockDecoderK->setInitialArguments({b->CreateTrunc(mHasBlockChecksum, b->getInt1Ty()), mHeaderSize, mFileSize});
    71     mPxDriver.makeKernelCall(blockDecoderK, {compressedByteStream}, {blockInfo.isCompress, blockInfo.blockStart, blockInfo.blockEnd});
     60    mPipeline->CreateKernelCall<LZ4BlockDecoderKernel>(
     61                // arguments
     62                mHasBlockChecksum, mHeaderSize, mFileSize,
     63                // inputs
     64                compressedByteStream,
     65                // outputs
     66                mBlockInfo.isCompress,
     67                mBlockInfo.blockStart,
     68                mBlockInfo.blockEnd);
    7269
    73     mInitBlockInfo = true;
    74     mBlockInfo = blockInfo;
    75     return blockInfo;
     70    return mBlockInfo;
    7671}
    7772
    78 StreamSetBuffer * LZ4BaseGenerator::byteStreamDecompression(StreamSetBuffer* compressedByteStream) {
    79     auto & b = mPxDriver.getBuilder();
    80     LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
    81 
    82     StreamSetBuffer *const decompressionByteStream =
    83             mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(1, 8),
    84                                               this->getDefaultBufferBlocks(), 1);
    85     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamDecompressionKernel>(b);
    86     lz4AioK->setInitialArguments({mFileSize});
    87     mPxDriver.makeKernelCall(
    88             lz4AioK,
    89             {
    90                     compressedByteStream,
    91 
    92                     // Block Data
    93                     blockInfo.isCompress,
    94                     blockInfo.blockStart,
    95                     blockInfo.blockEnd
    96             }, {
    97                     decompressionByteStream
    98             });
    99 
     73StreamSet * LZ4BaseGenerator::byteStreamDecompression(StreamSet* compressedByteStream) {
     74    LZ4BlockInfo blockInfo = getBlockInfo(compressedByteStream);
     75    StreamSet * const decompressionByteStream = mPipeline->CreateStreamSet(1, 8);
     76    mPipeline->CreateKernelCall<LZ4ByteStreamDecompressionKernel>(mFileSize, compressedByteStream, blockInfo, nullptr, decompressionByteStream );
    10077    return decompressionByteStream;
    10178}
    10279
    103 StreamSetBuffer * LZ4BaseGenerator::swizzledDecompression(
    104         StreamSetBuffer* compressedByteStream,
    105         StreamSetBuffer* compressedBasisBits
    106 ) {
    107     auto & b = mPxDriver.getBuilder();
    108     LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
     80StreamSet * LZ4BaseGenerator::swizzledDecompression(StreamSet* compressedByteStream, StreamSet* compressedBasisBits) {
     81    LZ4BlockInfo blockInfo = getBlockInfo(compressedByteStream);
    10982
    11083    // Produce unswizzled bit streams
    111     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4),
    112                                                                       this->getDefaultBufferBlocks(), 1);
    113     StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4),
    114                                                                       this->getDefaultBufferBlocks(), 1);
    115     Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 8, 2, 1, 64, "source");
    116     mPxDriver.makeKernelCall(unSwizzleK, {compressedBasisBits}, {u16Swizzle0, u16Swizzle1});
     84    StreamSet * const u16Swizzle0 = mPipeline->CreateStreamSet(4);
     85    StreamSet * const u16Swizzle1 = mPipeline->CreateStreamSet(4);
     86    mPipeline->CreateKernelCall<SwizzleGenerator>(StreamSets{compressedBasisBits}, StreamSets{u16Swizzle0, u16Swizzle1});
    11787
     88    StreamSet * const uncompressedSwizzled0 = mPipeline->CreateStreamSet(4);
     89    StreamSet * const uncompressedSwizzled1 = mPipeline->CreateStreamSet(4);
    11890
     91    mPipeline->CreateKernelCall<LZ4SwizzledDecompressionKernel>(
     92        mFileSize,
     93        // inputs
     94        compressedByteStream, blockInfo,
     95        StreamSets{ u16Swizzle0, u16Swizzle1 },
     96        // outputs
     97        StreamSets{ uncompressedSwizzled0, uncompressedSwizzled1 } );
    11998
    120     StreamSetBuffer * uncompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4),
    121                                                                                 this->getDefaultBufferBlocks(), 1);
    122     StreamSetBuffer * uncompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(4),
    123                                                                                 this->getDefaultBufferBlocks(), 1);
     99    StreamSet * const decompressionBitStream = mPipeline->CreateStreamSet(8);
    124100
    125 
    126     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4SwizzledDecompressionKernel>(b, 4, 2, 4);
    127     lz4AioK->setInitialArguments({mFileSize});
    128     mPxDriver.makeKernelCall(
    129             lz4AioK,
    130             {
    131                     compressedByteStream,
    132 
    133                     blockInfo.isCompress,
    134                     blockInfo.blockStart,
    135                     blockInfo.blockEnd,
    136 
    137                     u16Swizzle0,
    138                     u16Swizzle1
    139             }, {
    140                     uncompressedSwizzled0,
    141                     uncompressedSwizzled1
    142             });
    143 
    144 
    145     StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(8, 1),
    146                                                                                        this->getDefaultBufferBlocks(), 1);
    147 
    148     Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(b, 8, 1, 2);
    149     mPxDriver.makeKernelCall(unSwizzleK2, {uncompressedSwizzled0, uncompressedSwizzled1}, {decompressionBitStream});
     101    mPipeline->CreateKernelCall<SwizzleGenerator>(StreamSets{uncompressedSwizzled0, uncompressedSwizzled1}, StreamSets{decompressionBitStream});
    150102
    151103    return decompressionBitStream;
    152104}
    153105
    154 StreamSetBuffer * LZ4BaseGenerator::bitStreamDecompression(
    155         parabix::StreamSetBuffer* compressedByteStream,
    156         parabix::StreamSetBuffer* compressedBasisBits
    157 ) {
    158     return this->convertCompressedBitsStreamWithBitStreamAioApproach(compressedByteStream, {compressedBasisBits})[0];
     106StreamSet * LZ4BaseGenerator::bitStreamDecompression(StreamSet* compressedByteStream, StreamSet * compressedBasisBits) {
     107    return convertCompressedBitsStreamWithBitStreamAioApproach(compressedByteStream, {compressedBasisBits})[0];
    159108}
    160109
    161 std::vector<StreamSetBuffer*> LZ4BaseGenerator::convertCompressedBitsStreamWithBitStreamAioApproach(
    162         parabix::StreamSetBuffer* compressedByteStream,
    163         std::vector<StreamSetBuffer*> compressedBitStreams
    164 ) {
    165     auto & b = mPxDriver.getBuilder();
    166 
    167     LZ4BlockInfo blockInfo = this->getBlockInfo(compressedByteStream);
    168 
    169     std::vector<StreamSetBuffer *> inputStreams = {
    170             compressedByteStream,
    171 
    172             blockInfo.isCompress,
    173             blockInfo.blockStart,
    174             blockInfo.blockEnd,
    175     };
    176 
    177     std::vector<StreamSetBuffer *> outputStream;
    178     std::vector<unsigned> numbersOfStreams;
    179 
    180     for (unsigned i = 0; i < compressedBitStreams.size(); i++) {
    181         unsigned numOfStreams = compressedBitStreams[i]->getNumOfStreams();
    182         numbersOfStreams.push_back(numOfStreams);
    183         inputStreams.push_back(compressedBitStreams[i]);
    184         outputStream.push_back(mPxDriver.addBuffer<StaticBuffer>(b, b->getStreamSetTy(numOfStreams, 1),
    185                                                                  this->getDefaultBufferBlocks(), 1));
     110StreamSets LZ4BaseGenerator::convertCompressedBitsStreamWithBitStreamAioApproach(StreamSet * compressedByteStream, StreamSets compressedBitStreams) {
     111    LZ4BlockInfo blockInfo = getBlockInfo(compressedByteStream);
     112    StreamSets outputStreams;
     113    outputStreams.reserve(compressedBitStreams.size());
     114    for (const auto & bitStream : compressedBitStreams) {
     115        outputStreams.push_back(mPipeline->CreateStreamSet(bitStream->getNumElements(), bitStream->getFieldWidth()));
    186116    }
    187 
    188     Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4BitStreamDecompressionKernel>(b, numbersOfStreams);
    189     lz4AioK->setInitialArguments({mFileSize});
    190     mPxDriver.makeKernelCall(lz4AioK, inputStreams, outputStream);
    191 
    192     return outputStream;
     117    mPipeline->CreateKernelCall<LZ4BitStreamDecompressionKernel>(mFileSize, compressedByteStream, blockInfo, compressedBitStreams, outputStreams);
     118    return outputStreams;
    193119}
    194120
     
    198124
    199125unsigned LZ4BaseGenerator::getDefaultBufferBlocks() {
    200     return this->getBlockSizeBufferBlocks() * 2; // buffer 2 LZ4 Block By Default
     126    return getBlockSizeBufferBlocks() * 2; // buffer 2 LZ4 Block By Default
    201127}
     128
     129LZ4BaseGenerator::~LZ4BaseGenerator() {
     130
     131}
  • icGREP/icgrep-devel/icgrep/lz4/lz4_base_generator.h

    r6150 r6184  
    1212
    1313#include <toolchain/toolchain.h>
    14 
    1514#include <toolchain/cpudriver.h>
    1615#include <string>
    1716
     17namespace kernel { class StreamSet; }
     18namespace kernel { class Scalar; }
     19
    1820struct LZ4BlockInfo {
    19     parabix::StreamSetBuffer* blockStart;
    20     parabix::StreamSetBuffer* blockEnd;
    21     parabix::StreamSetBuffer* isCompress;
     21    kernel::StreamSet * blockStart;
     22    kernel::StreamSet * blockEnd;
     23    kernel::StreamSet * isCompress;
     24
     25    LZ4BlockInfo() : blockStart(nullptr), blockEnd(nullptr), isCompress(nullptr) { }
    2226};
    2327
     
    2529public:
    2630    LZ4BaseGenerator();
    27     virtual ~LZ4BaseGenerator(){};
     31    virtual ~LZ4BaseGenerator();
    2832protected:
    2933    //// Member Function
    3034    // Input
    31     parabix::StreamSetBuffer* loadByteStream();
    32     std::pair<parabix::StreamSetBuffer*, parabix::StreamSetBuffer*> loadByteStreamAndBitStream();
     35    kernel::StreamSet * loadByteStream();
     36    std::pair<kernel::StreamSet*, kernel::StreamSet*> loadByteStreamAndBitStream();
    3337
    3438    // Stream Conversion
    35     parabix::StreamSetBuffer* s2p(parabix::StreamSetBuffer* byteStream);
    36     parabix::StreamSetBuffer* p2s(parabix::StreamSetBuffer* bitStream);
     39    kernel::StreamSet* s2p(kernel::StreamSet* byteStream);
     40    kernel::StreamSet* p2s(kernel::StreamSet* bitStream);
    3741
    3842
    3943            // LZ4 Decoder
    40     LZ4BlockInfo getBlockInfo(parabix::StreamSetBuffer* compressedByteStream);
    41     parabix::StreamSetBuffer * byteStreamDecompression(
    42             parabix::StreamSetBuffer* compressedByteStream
    43     );
    44     parabix::StreamSetBuffer * swizzledDecompression(
    45             parabix::StreamSetBuffer* compressedByteStream,
    46             parabix::StreamSetBuffer* compressedBasisBits
    47     );
    48     parabix::StreamSetBuffer * bitStreamDecompression(
    49             parabix::StreamSetBuffer* compressedByteStream,
    50             parabix::StreamSetBuffer* compressedBasisBits
    51     );
     44    LZ4BlockInfo getBlockInfo(kernel::StreamSet* compressedByteStream);
    5245
    53     std::vector<parabix::StreamSetBuffer*> convertCompressedBitsStreamWithBitStreamAioApproach(
    54             parabix::StreamSetBuffer* compressedByteStream,
    55             std::vector<parabix::StreamSetBuffer*> compressedBitStreams
    56     );
     46    kernel::StreamSet * byteStreamDecompression(kernel::StreamSet* compressedByteStream);
    5747
     48    kernel::StreamSet * swizzledDecompression(kernel::StreamSet* compressedByteStream, kernel::StreamSet* compressedBasisBits);
    5849
     50    kernel::StreamSet * bitStreamDecompression(kernel::StreamSet* compressedByteStream, kernel::StreamSet* compressedBasisBits);
     51
     52    kernel::StreamSets convertCompressedBitsStreamWithBitStreamAioApproach(kernel::StreamSet* compressedByteStream, kernel::StreamSets compressedBitStreams);
    5953
    6054    // BufferSize related Helper Function
     
    6559    //// Data Member
    6660    // Driver
    67     ParabixDriver mPxDriver;
     61    CPUDriver mPxDriver;
     62    std::unique_ptr<kernel::PipelineBuilder> mPipeline;
    6863
    6964    // Runtime Arguments
    70     llvm::Value * mInputStream;
    71     llvm::Value * mHeaderSize;
    72     llvm::Value * mFileSize;
    73     llvm::Value * mHasBlockChecksum;
     65    kernel::Scalar * mInputStream;
     66    kernel::Scalar * mHeaderSize;
     67    kernel::Scalar * mFileSize;
     68    kernel::Scalar * mHasBlockChecksum;
    7469
    75     unsigned mLz4BlockSize;
     70    const unsigned mLz4BlockSize;
    7671
    77     bool mInitBlockInfo;
    7872    LZ4BlockInfo mBlockInfo;
    7973};
  • icGREP/icgrep-devel/icgrep/lz4/lz4_decompression_generator.cpp

    r6137 r6184  
    66#include <boost/iostreams/device/mapped_file.hpp>
    77#include <kernels/kernel_builder.h>
    8 #include <kernels/p2s_kernel.h>
    98#include <kernels/stdout_kernel.h>
    10 
    11 
     9#include <kernels/pipeline_builder.h>
    1210#include <llvm/Support/raw_ostream.h>
    1311
     
    1513
    1614using namespace llvm;
    17 using namespace parabix;
    1815using namespace kernel;
    1916
    20 LZ4DecompressionGenerator::LZ4DecompressionGenerator():LZ4BaseGenerator() {
     17LZ4DecompressionGenerator::LZ4DecompressionGenerator()
     18: LZ4BaseGenerator() {
     19    mPipeline = std::move(makeInternalPipeline());
     20}
     21
     22inline std::unique_ptr<kernel::PipelineBuilder> LZ4DecompressionGenerator::makeInternalPipeline() {
     23    Bindings inputs;
     24
     25    auto & b = mPxDriver.getBuilder();
     26
     27    Type * const int8PtrTy = b->getInt8PtrTy();
     28    Type * const sizeTy = b->getSizeTy();
     29    Type * const boolTy = b->getIntNTy(sizeof(bool) * 8);
     30
     31    inputs.emplace_back(int8PtrTy, "input");
     32    inputs.emplace_back(sizeTy, "headerSize");
     33    inputs.emplace_back(sizeTy, "fileSize");
     34    inputs.emplace_back(boolTy, "hasBlockChecksum");
     35    inputs.emplace_back(int8PtrTy, "outputFile");
     36
     37    return mPxDriver.makePipeline(inputs, Bindings{});
    2138}
    2239
     
    3956    boost::iostreams::mapped_file_source mappedFile;
    4057    mappedFile.open(inputFileName, lz4Frame.getBlocksLength() + lz4Frame.getBlocksStart());
    41     char *fileBuffer = const_cast<char *>(mappedFile.data());
     58    char * fileBuffer = const_cast<char *>(mappedFile.data());
    4259
    43     this->generateDecompressionPipeline(outputFileName);
     60    auto main = generateDecompressionPipeline();
    4461
    45     auto main = this->getMainFunc();
    46     main(fileBuffer, lz4Frame.getBlocksStart(), lz4Frame.getBlocksStart() + lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum());
     62    main(fileBuffer, lz4Frame.getBlocksStart(), lz4Frame.getBlocksStart() + lz4Frame.getBlocksLength(), lz4Frame.hasBlockChecksum(), outputFileName.c_str());
    4763    mappedFile.close();
    4864    return 0;
    4965}
    5066
    51 
    52 MainFunctionType LZ4DecompressionGenerator::getMainFunc() {
    53     return reinterpret_cast<MainFunctionType>(mPxDriver.getMain());
     67MainFunctionType LZ4DecompressionGenerator::generateDecompressionPipeline() {
     68    StreamSet * compressedByteStream = loadByteStream();
     69    StreamSet * uncompressedByteStream = byteStreamDecompression(compressedByteStream);
     70    Scalar * outputFileName = mPipeline->getInputScalar("outputFile");
     71    mPipeline->CreateKernelCall<FileSink>(outputFileName, uncompressedByteStream);
     72    return reinterpret_cast<MainFunctionType>(mPipeline->compile());
    5473}
    55 
    56 void LZ4DecompressionGenerator::generateDecompressionPipeline(const std::string &outputFile) {
    57     auto & b = mPxDriver.getBuilder();
    58     this->generateMainFunc(b);
    59 
    60 
    61 
    62     StreamSetBuffer* compressedByteStream = this->loadByteStream();
    63     StreamSetBuffer* uncompressedByteStream = this->byteStreamDecompression(compressedByteStream);
    64 
    65     Kernel * outK = mPxDriver.addKernelInstance<FileSink>(b, 8);
    66     outK->setInitialArguments({b->GetString(outputFile)});
    67     mPxDriver.makeKernelCall(outK, {uncompressedByteStream}, {});
    68 
    69     mPxDriver.generatePipelineIR();
    70     mPxDriver.deallocateBuffers();
    71 
    72     b->CreateRetVoid();
    73 
    74     mPxDriver.finalizeObject();
    75 }
    76 
    77 
    78 void LZ4DecompressionGenerator::generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    79     Module * M = iBuilder->getModule();
    80     Type * const sizeTy = iBuilder->getSizeTy();
    81     Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
    82     Type * const voidTy = iBuilder->getVoidTy();
    83     Type * const inputType = iBuilder->getInt8PtrTy();
    84 
    85     Function * const main = cast<Function>(M->getOrInsertFunction("Main", voidTy, inputType, sizeTy, sizeTy, boolTy, nullptr));
    86     main->setCallingConv(CallingConv::C);
    87     Function::arg_iterator args = main->arg_begin();
    88     mInputStream = &*(args++);
    89     mInputStream->setName("input");
    90 
    91     mHeaderSize = &*(args++);
    92     mHeaderSize->setName("mHeaderSize");
    93 
    94     mFileSize = &*(args++);
    95     mFileSize->setName("mFileSize");
    96 
    97     mHasBlockChecksum = &*(args++);
    98     mHasBlockChecksum->setName("mHasBlockChecksum");
    99     // TODO for now, we do not handle blockCheckSum
    100     mHasBlockChecksum = iBuilder->getInt1(false);
    101 
    102     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
    103 }
  • icGREP/icgrep-devel/icgrep/lz4/lz4_decompression_generator.h

    r6137 r6184  
    2020
    2121
    22 typedef void (*MainFunctionType)(char * byte_data, size_t headerSize, size_t filesize, bool hasBlockChecksum);
     22typedef void (*MainFunctionType)(char * byte_data, size_t headerSize, size_t filesize, bool hasBlockChecksum, const char * outputFileName);
    2323
    2424
    2525
    26 class LZ4DecompressionGenerator: public LZ4BaseGenerator {
     26class LZ4DecompressionGenerator final : public LZ4BaseGenerator {
    2727
    2828public:
    2929    LZ4DecompressionGenerator();
    3030
    31     int decompress(std::string&& inputFileName, std::string&& outputFileName, bool overwriteOutput);
     31    int decompress(std::string && inputFileName, std::string&& outputFileName, bool overwriteOutput);
    3232
    33     MainFunctionType getMainFunc();
     33private:
    3434
    35     void generateDecompressionPipeline(const std::string &outputFile);
    36 protected:
    37     void generateMainFunc(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     35    std::unique_ptr<kernel::PipelineBuilder> makeInternalPipeline();
     36
     37    MainFunctionType generateDecompressionPipeline();
    3838
    3939};
  • icGREP/icgrep-devel/icgrep/lz4/lz4_frame_decoder.cpp

    r6137 r6184  
    2222
    2323LZ4FrameDecoder::LZ4FrameDecoder(const std::string & filename) {
    24     this->init(filename);
     24    init(filename);
    2525}
    2626
    2727void LZ4FrameDecoder::init(const std::string &filename) {
    28     const size_t minFilesize = this->getMinFileSize();
     28    const size_t minFilesize = getMinFileSize();
    2929
    3030    std::ifstream f(filename, std::ios::binary | std::ios::ate);
     
    5151
    5252    mBlocksStart = 4 + mFDLength;       // MagicNb & FD
    53     long long blocksEnd = mFilesize - this->endMarkSize() - (mHasContentChecksum ? this->contentChecksumSize() : 0);      // EndMark & checksum
     53    long long blocksEnd = mFilesize - endMarkSize() - (mHasContentChecksum ? contentChecksumSize() : 0);      // EndMark & checksum
    5454    if (blocksEnd > 0 && mBlocksStart <= static_cast<size_t>(blocksEnd)) {
    5555        mBlocksLength = blocksEnd - mBlocksStart;
     
    5959
    6060bool LZ4FrameDecoder::decodeFrameDescriptor(std::ifstream & f) {
    61     const size_t minFilesize = this->getMinFileSize();
     61    const size_t minFilesize = getMinFileSize();
    6262
    6363    char flag, blockDescriptor, headerChecksum;
     
    7474
    7575    if (mFilesize < minFilesize +
    76             (mHasContentChecksum ? this->contentChecksumSize() : 0) +
     76            (mHasContentChecksum ? contentChecksumSize() : 0) +
    7777            (hasContentSize ? 8 : 0)
    7878       ) {
  • icGREP/icgrep-devel/icgrep/lz4/lz4_frame_decoder.h

    r6137 r6184  
    6161        return 4 +         // Magic number
    6262               3 +         // Frame descriptor (3-11 bytes)
    63                this->endMarkSize();          // End mark
     63               endMarkSize();          // End mark
    6464    }
    6565};
Note: See TracChangeset for help on using the changeset viewer.