Changeset 6066


Ignore:
Timestamp:
Jun 7, 2018, 11:59:41 AM (4 months ago)
Author:
xwa163
Message:

fix some warning in lz4 related kernels

Location:
icGREP/icgrep-devel/icgrep
Files:
11 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/bitstream_pdep_kernel.cpp

    r6055 r6066  
    5656        std::vector<PHINode*> bufferPhiArray(mNumberOfStream, NULL);
    5757        std::vector<Value*> bufferArray(mNumberOfStream, NULL);
    58         for (int iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
     58        for (unsigned iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
    5959            PHINode * const bufferPhi = b->CreatePHI(b->getIntNTy(pdepWidth), 2);
    6060            bufferPhi->addIncoming(Constant::getNullValue(b->getIntNTy(pdepWidth)), entry);
     
    101101
    102102            std::vector<PHINode * > updatedBufferArray(mNumberOfStream, NULL);
    103             for (int iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
     103            for (unsigned iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
    104104                Value* buffer = bufferArray[iStreamIndex];
    105105                PHINode * const updatedBuffer = b->CreatePHI(buffer->getType(), 2);
     
    114114            Value * const swizzleOffset = b->CreateURem(updatedSourceOffset, PDEP_WIDTH);
    115115
    116             for (int iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
     116            for (unsigned iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
    117117                Value * const swizzleBlock = b->CreateBlockAlignedLoad(b->getInputStreamBlockPtr("source", b->getSize(iStreamIndex), blockOffset));
    118118
     
    147147            // Apply PDEP to each element of the combined swizzle using the current PDEP mask
    148148            Value * const mask = b->CreateExtractElement(selectors, i);
    149             for (int iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
     149            for (unsigned iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
    150150                Value * source_field = bufferArray[iStreamIndex];
    151151
     
    161161        }
    162162
    163         for (int iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
     163        for (unsigned iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
    164164            // Store the result
    165165            Value * const outputStreamPtr = b->getOutputStreamBlockPtr("output", b->getSize(iStreamIndex), strideIndex);
     
    170170        sourceOffsetPhi->addIncoming(sourceOffset, finishedBlock);
    171171        bufferSizePhi->addIncoming(bufferSize, finishedBlock);
    172         for (int iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
     172        for (unsigned iStreamIndex = 0; iStreamIndex < mNumberOfStream; iStreamIndex++) {
    173173            bufferPhiArray[iStreamIndex]->addIncoming(bufferArray[iStreamIndex], finishedBlock);
    174174        }
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_bitstream_match_copy_kernel.cpp

    r6055 r6066  
    101101        ConstantInt * const INT64_1 = b->getInt64(1);
    102102
    103         Value * BITBLOCK_0 = b->CreateBitCast(ConstantInt::get(b->getIntNTy(b->getBitBlockWidth()), 0), b->getBitBlockType());
    104 
    105103        // ---- Type
    106104        Type* BITBLOCK_TYPE = b->getBitBlockType();
     
    154152        Value* currentInitM0 = b->CreateLoad(b->CreateGEP(m0MarkerBasePtr, dataBlockIndex));
    155153        vector<Value*> initSourceData = this->loadAllI64BitStreamValues(b, sourceStreamPtr, dataBlockIndex);
    156 
    157         Value* bitBlockIndex = b->CreateUDiv(dataBlockIndex, b->getSize(4));
    158         Value* extractIndex = b->CreateURem(dataBlockIndex, b->getSize(4));
    159154
    160155        BasicBlock* carryBitProcessBlock = b->CreateBasicBlock("CarryBitProcessBlock");
     
    325320        phiSourceDataAccessable->addIncoming(phiTargetMatchOffset, doMatchCopyBlock);
    326321        vector<PHINode*> phiPdepSourceData;
    327         for (int i = 0; i < mNumberOfStreams; i++) {
     322        for (unsigned i = 0; i < mNumberOfStreams; i++) {
    328323            PHINode* v = b->CreatePHI(b->getInt64Ty(), 2);
    329324            v->addIncoming(pdepSourceData[i], doMatchCopyBlock);
     
    334329        // ---- doubleSourceDataBody
    335330        b->SetInsertPoint(doubleSourceDataBody);
    336         for (int i = 0; i < mNumberOfStreams; i++) {
     331        for (unsigned i = 0; i < mNumberOfStreams; i++) {
    337332            PHINode* v = phiPdepSourceData[i];
    338333            Value* newValue = b->CreateOr(v, b->CreateShl(v, phiSourceDataAccessable));
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_bytestream_aio.cpp

    r6064 r6066  
    104104    LZ4ByteStreamAioKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &b, llvm::Value *lz4BlockStart,
    105105                                                 llvm::Value *lz4BlockEnd) {
    106         BasicBlock* entryBlock = b->GetInsertBlock();
    107 
    108106        Value* isTerminal = b->CreateICmpEQ(lz4BlockEnd, b->getScalarField("fileSize"));
    109107        b->setTerminationSignal(isTerminal);
     
    397395                                                                            llvm::Value *currentTokenLocalPos
    398396    ) {
    399         Value* SIZE_1 = b->getSize(1);
    400397        Value* BYTE_F0 = b->getInt8(0xf0);
    401398        Value* shouldExtendLiteral = b->CreateICmpEQ(b->CreateAnd(tokenValue, BYTE_F0), BYTE_F0);
     
    460457                                                                          llvm::Value *blockPosBase
    461458    ) {
    462         Value* SIZE_1 = b->getSize(1);
    463459        Value* BYTE_0F = b->getInt8(0x0f);
    464460        Value* shouldExtendMatch = b->CreateICmpEQ(b->CreateAnd(tokenValue, BYTE_0F), BYTE_0F);
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder_new.cpp

    r6059 r6066  
    167167//        b->CallPrintInt("phiCursorValue", phiCursorValue);
    168168//        b->CallPrintInt("literalMasks", literalMasks);
    169         Value* matchOffsetMarkers = accelerationRet.second;
     169//        Value* matchOffsetMarkers = accelerationRet.second;
    170170
    171171        // The pending data block index (for compression space) will be the same as the block index of current acceleration block
     
    325325        );
    326326
    327         Value* extenderPtrBase = b->CreatePointerCast(b->getRawInputPointer("extender", SIZE_0), INT_ACCELERATION_TYPE->getPointerTo());
    328         Value* actualPtr = b->CreateGEP(
    329                 b->CreatePointerCast(b->getRawInputPointer("extender", SIZE_0), INT_ACCELERATION_TYPE->getPointerTo()),
    330                 b->CreateUDiv(maskedTokenPos, SIZE_ACCELERATION_WIDTH)
    331         );
     327//        Value* extenderPtrBase = b->CreatePointerCast(b->getRawInputPointer("extender", SIZE_0), INT_ACCELERATION_TYPE->getPointerTo());
     328//        Value* actualPtr = b->CreateGEP(
     329//                b->CreatePointerCast(b->getRawInputPointer("extender", SIZE_0), INT_ACCELERATION_TYPE->getPointerTo()),
     330//                b->CreateUDiv(maskedTokenPos, SIZE_ACCELERATION_WIDTH)
     331//        );
    332332        Value* initTokenMarker = b->CreateShl(INT_ACCELERATION_1, tokenPosRem);
    333333        BasicBlock* accelerationProcessBlock = b->CreateBasicBlock("accelerationProcessBlock");
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_parallel_bytestream_aio.cpp

    r6065 r6066  
    5454    Value* LZ4ParallelByteStreamAioKernel::generateSimdAcceleration(const std::unique_ptr<KernelBuilder> &b, llvm::Value *beginTokenPosVec,
    5555                                  llvm::Value *blockEndVec) {
     56        /*
    5657        // TODO incomplete
    5758
     
    106107
    107108
    108 
     109*/
    109110        return beginTokenPosVec;    //TODO
     111
    110112    }
    111113
     
    117119
    118120        // Constant
    119         ConstantInt* SIZE_0 = b->getSize(0);
    120         ConstantInt* SIZE_1 = b->getSize(1);
    121         ConstantInt* BYTE_FF = b->getInt8(0xff);
    122121        Value* BIT_BLOCK_0 = ConstantVector::getNullValue(b->getBitBlockType());
    123122        Value* BIT_BLOCK_1 = b->simd_fill(SIMD_WIDTH, b->getIntN(SIMD_WIDTH, 0x1));
     
    126125        Value* BIT_BLOCK_FF = b->simd_fill(SIMD_WIDTH, b->getIntN(SIMD_WIDTH, 0xff));
    127126        Value* BIT_BLOCK_FFFF = b->simd_fill(SIMD_WIDTH, b->getIntN(SIMD_WIDTH, 0xffff));
    128         Type* i4Ty = b->getIntNTy(4);
    129127        Type* INT_BIT_BLOCK_TY = b->getIntNTy(b->getBitBlockWidth());
    130128        Constant* INT_BIT_BLOCK_TY_0 = b->getIntN(b->getBitBlockWidth(), 0);
     
    688686    void LZ4ParallelByteStreamAioKernel::handleSimdMatchCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value* matchOffsetVec, llvm::Value* matchLengthVec, llvm::Value* outputPosVec) {
    689687        // TODO use memcpy first
    690         Value* l = b->CreateExtractElement(matchLengthVec, (uint64_t)0);
    691         Value* shouldPrint = b->CreateICmpNE(l, b->getSize(0));
     688//        Value* l = b->CreateExtractElement(matchLengthVec, (uint64_t)0);
     689//        Value* shouldPrint = b->CreateICmpNE(l, b->getSize(0));
    692690//        b->CallPrintIntCond("matchOffset", b->CreateExtractElement(matchOffsetVec, (uint64_t)0), shouldPrint);
    693691//        b->CallPrintIntCond("matchLength", l, shouldPrint);
    694         BasicBlock* entryBlock = b->GetInsertBlock();
    695692        Value* outputCapacity = b->getCapacity("outputStream");
    696693        Value* outputBasePtr = b->CreatePointerCast(b->getRawOutputPointer("outputStream", b->getSize(0)), b->getInt8PtrTy());
     
    743740
    744741    void LZ4ParallelByteStreamAioKernel::handleSimdLiteralCopy(const std::unique_ptr<KernelBuilder> &b, llvm::Value* literalStartVec, llvm::Value* literalLengthVec, llvm::Value* outputPosVec) {
    745         Value* l = b->CreateExtractElement(literalLengthVec, (uint64_t)0);
    746 
    747742        Value* outputCapacity = b->getCapacity("outputStream");
    748743        Value* outputPosRemVec = b->simd_and(outputPosVec, b->simd_fill(SIMD_WIDTH, b->simd_not(b->CreateNeg(outputCapacity))));
    749744        // TODO use memcpy first
    750 
    751         BasicBlock* entryBlock = b->GetInsertBlock();
    752 
    753745
    754746        Value* inputBasePtr = b->CreatePointerCast(b->getRawInputPointer("byteStream", b->getSize(0)), b->getInt8PtrTy());
     
    869861    llvm::Value* LZ4ParallelByteStreamAioKernel::simdFetchByteDataByGather(const std::unique_ptr<KernelBuilder> &b, llvm::Value* basePtr, llvm::Value* offsetVec, llvm::Value* mask) {
    870862        Value* BIT_BLOCK_FF = b->simd_fill(SIMD_WIDTH, b->getIntN(SIMD_WIDTH, 0xff));
    871         Function *gatherFunc = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_avx2_gather_d_q_256);
     863//        Function *gatherFunc = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_avx2_gather_d_q_256);
    872864        Function *gatherFunc2 = Intrinsic::getDeclaration(b->getModule(), Intrinsic::x86_avx2_gather_d_d);
    873865
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_aio.cpp

    r6059 r6066  
    132132    LZ4SwizzledAioKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &b, llvm::Value *lz4BlockStart,
    133133                                                           llvm::Value *lz4BlockEnd) {
    134         BasicBlock* entryBlock = b->GetInsertBlock();
    135 
    136134        Value* isTerminal = b->CreateICmpEQ(lz4BlockEnd, b->getScalarField("fileSize"));
    137135        b->setTerminationSignal(isTerminal);
     
    190188
    191189        Type* INT_ACCELERATION_TYPE = b->getIntNTy(ACCELERATION_WIDTH);
    192         Type* BITBLOCK_PTR_TYPE = b->getBitBlockType()->getPointerTo();
    193190
    194191        Value* INT_ACCELERATION_0 = b->getIntN(ACCELERATION_WIDTH, 0);
     
    802799        Value* SIZE_1 = b->getSize(1);
    803800
    804 
    805         Value* inputStreamSize = b->getCapacity("sourceStreamSet0");
     801//        Value* inputStreamSize = b->getCapacity("sourceStreamSet0");
    806802        Value* outputStreamSize = b->getCapacity("outputStreamSet0");
    807803
     
    809805
    810806
    811         BasicBlock* entryBlock = b->GetInsertBlock();
    812807        Value* outputPos = b->getScalarField("outputPos");
    813808
     
    817812
    818813        vector<Value*> outputStreamBasePtrs; // <4 * i64>*
    819         for (int i = 0; i < mStreamSize; i++) {
     814        for (unsigned i = 0; i < mStreamSize; i++) {
    820815            outputStreamBasePtrs.push_back(b->CreatePointerCast(b->getRawOutputPointer("outputStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
    821816        }
     
    832827
    833828        Value* literalRem = b->CreateURem(literalStart, SIZE_64);
    834         Value* literalBlockIndex = b->CreateUDiv(b->CreateURem(literalStart, inputStreamSize), SIZE_64);
    835829
    836830        Value* outputPosRem = b->CreateURem(outputPos, SIZE_64);
     
    856850
    857851
    858         for (int i = 0; i < mStreamSize; i++) {
     852        for (unsigned i = 0; i < mStreamSize; i++) {
    859853            Value* inputValue = inputLiteralValues[i];
    860854
     
    907901
    908902        vector<Value*> sourceStreamBasePtrs, outputStreamBasePtrs; // <4 * i64>*
    909         for (int i = 0; i < mStreamSize; i++) {
     903        for (unsigned i = 0; i < mStreamSize; i++) {
    910904            sourceStreamBasePtrs.push_back(b->CreatePointerCast(b->getRawInputPointer("sourceStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE, "aa" + std::to_string(i)));
    911905            outputStreamBasePtrs.push_back(b->CreatePointerCast(b->getRawOutputPointer("outputStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
     
    943937        Value* clearMask = b->simd_fill(64, b->CreateSub(b->CreateShl(SIZE_1, outputPosRem), SIZE_1));
    944938
    945         for (int i = 0; i < mStreamSize; i++) {
     939        for (unsigned i = 0; i < mStreamSize; i++) {
    946940            Value* inputValue = b->CreateLoad(b->CreateGEP(sourceStreamBasePtrs[i], literalBlockIndex));
    947941            Value* outputValue = b->CreateLoad(b->CreateGEP(outputStreamBasePtrs[i], outputBlockIndex));
     
    973967
    974968        vector<Value*> outputStreamBasePtrs; // <4 * i64>*
    975         for (int i = 0; i < mStreamSize; i++) {
     969        for (unsigned i = 0; i < mStreamSize; i++) {
    976970            outputStreamBasePtrs.push_back(b->CreatePointerCast(b->getRawOutputPointer("outputStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
    977971        }
     
    10091003        copySize = b->CreateUMin(copySize, phiMatchLength);
    10101004
    1011         for (int i = 0; i < mStreamSize; i++) {
     1005        for (unsigned i = 0; i < mStreamSize; i++) {
    10121006            Value* inputValue = b->CreateLoad(b->CreateGEP(outputStreamBasePtrs[i], outputFromPosBlockIndex));
    10131007            Value* outputValue = b->CreateLoad(b->CreateGEP(outputStreamBasePtrs[i], outputToPosBlockIndex));
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp

    r6055 r6066  
    103103    Value* m0MarkerBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("M0Marker", SIZE_0), I64_PTR_TY); // i64*
    104104    vector<Value*> sourceStreamBasePtrs, outputStreamBasePtrs; // <4 * i64>*
    105     for (int i = 0; i < mStreamSize; i++) {
     105    for (unsigned i = 0; i < mStreamSize; i++) {
    106106        sourceStreamBasePtrs.push_back(iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
    107107        outputStreamBasePtrs.push_back(iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
     
    137137    Value* currentInitM0 = iBuilder->CreateLoad(iBuilder->CreateGEP(m0MarkerBasePtr, dataBlockIndex));
    138138    vector<Value*> initSourceData;
    139     for (int i = 0; i < mStreamSize; i++) {
     139    for (unsigned i = 0; i < mStreamSize; i++) {
    140140        // Because of swizzled form, the sourceStream can be accessed linearly
    141141        initSourceData.push_back(iBuilder->CreateLoad(iBuilder->CreateGEP(sourceStreamBasePtrs[i], dataBlockIndex)));
     
    171171    Value* carryCopyFromBlockIndex = iBuilder->CreateUDiv(carryCopyFromPos, SIZE_64);
    172172    Value* carryCopyFromOffset = iBuilder->CreateURem(carryCopyFromPos, SIZE_64);
    173     for (int i = 0; i < mStreamSize; i++) {
     173    for (unsigned i = 0; i < mStreamSize; i++) {
    174174        Value* v = iBuilder->CreateLoad(iBuilder->CreateGEP(outputStreamBasePtrs[i], carryCopyFromBlockIndex));
    175175        v = iBuilder->CreateLShr(v, iBuilder->simd_fill(mPDEPWidth, carryCopyFromOffset));
     
    192192
    193193    vector<PHINode*> outputData;
    194     for (int i = 0; i < mStreamSize; i++) {
     194    for (unsigned i = 0; i < mStreamSize; i++) {
    195195        PHINode* outputValue = iBuilder->CreatePHI(iBuilder->getBitBlockType(), 3);
    196196        outputValue->addIncoming(initSourceData[i], processLoopBody);
     
    265265    vector<Value*> pdepSourceData;
    266266
    267     for (int i = 0; i < mStreamSize; i++) {
     267    for (unsigned i = 0; i < mStreamSize; i++) {
    268268        Value* fromPtr = iBuilder->CreateGEP(outputStreamBasePtrs[i], matchCopyFromBlockIndex);
    269269        Value* fromBlockValue = iBuilder->CreateLoad(fromPtr);
     
    306306    phiSourceDataAccessable->addIncoming(phiTargetMatchOffset, doMatchCopyBlock);
    307307    vector<PHINode*> phiPdepSourceData;
    308     for (int i = 0; i < mStreamSize; i++) {
     308    for (unsigned i = 0; i < mStreamSize; i++) {
    309309        PHINode* v = iBuilder->CreatePHI(iBuilder->getBitBlockType(), 2);
    310310        v->addIncoming(pdepSourceData[i], doMatchCopyBlock);
     
    315315    // ---- doubleSourceDataBody
    316316    iBuilder->SetInsertPoint(doubleSourceDataBody);
    317     for (int i = 0; i < mStreamSize; i++) {
     317    for (unsigned i = 0; i < mStreamSize; i++) {
    318318        PHINode* v = phiPdepSourceData[i];
    319319        Value* newValue = iBuilder->CreateOr(v, iBuilder->CreateShl(v, iBuilder->simd_fill(mPDEPWidth, phiSourceDataAccessable)));
     
    327327    iBuilder->SetInsertPoint(doubleSourceDataExit);
    328328    // At this point, we can guarantee we have enough data for pdep
    329     for (int i = 0; i < mStreamSize; i++) {
     329    for (unsigned i = 0; i < mStreamSize; i++) {
    330330        // Do Match Copy by PDEP
    331331        Value* allFromValue = phiPdepSourceData[i];
     
    347347    // ---- MatchCopyLoopExit
    348348    iBuilder->SetInsertPoint(matchCopyLoopExit);
    349     for (int i = 0; i < mStreamSize; i++) {
     349    for (unsigned i = 0; i < mStreamSize; i++) {
    350350        iBuilder->CreateStore(outputData[i], iBuilder->CreateGEP(outputStreamBasePtrs[i], dataBlockIndex));
    351351    }
  • icGREP/icgrep-devel/icgrep/kernels/streams_merge.cpp

    r6044 r6066  
    8282    unsigned outputIndex = 0;
    8383    for (unsigned i = 0; i < mStreamsNumOfSets.size(); i++) {
    84         int streamNum = mStreamsNumOfSets[i];
     84        unsigned streamNum = mStreamsNumOfSets[i];
    8585        for (unsigned j = 0; j < streamNum; j++) {
    8686            iBuilder->storeOutputStreamBlock(
     
    113113    unsigned inputIndex = 0;
    114114    for (unsigned i = 0; i < mStreamsNumOfSets.size(); i++) {
    115         int streamNum = mStreamsNumOfSets[i];
     115        unsigned streamNum = mStreamsNumOfSets[i];
    116116        for (unsigned j = 0; j < streamNum; j++) {
    117117            iBuilder->storeOutputStreamBlock(
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6064 r6066  
    4848    this->generateMainFunc(iBuilder);
    4949
    50     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     50    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    5151
    5252    // GeneratePipeline
     
    5454
    5555    //// Decode Block Information
    56     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
    57     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    58     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     56    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
     57    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     58    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    5959
    6060    //// Generate Helper Markers Extenders, FX, XF
    61     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    62     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     61    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
     62    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    6363    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    6464    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     
    7171    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
    7272    //TODO handle uncompressed part
    73     StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    74     StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    75     StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    76 
    77     mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    78     mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    79     mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     73    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     74    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     75    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     76
     77    mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
     78    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
     79    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    8080
    8181    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderNewKernel>(iBuilder);
     
    107107
    108108    // Deletion
    109     StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    110     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks());
     109    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
     110    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
    111111
    112112    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
    113113    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
    114114
    115     StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     115    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    116116    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
    117117    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
     
    142142    this->generateMainFunc(iBuilder);
    143143
    144     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     144    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    145145
    146146    // GeneratePipeline
     
    171171    this->generateMainFunc(iBuilder);
    172172
    173     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     173    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    174174
    175175    // GeneratePipeline
     
    180180
    181181    // Produce unswizzled bit streams
    182     StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     182    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    183183    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    184184
     
    208208    this->generateMainFunc(iBuilder);
    209209
    210     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     210    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    211211
    212212    // GeneratePipeline
     
    215215    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    216216
    217     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
     217    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
    218218    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    219219    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
     
    241241    this->generateMainFunc(iBuilder);
    242242
    243     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     243    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    244244
    245245    // GeneratePipeline
     
    249249    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    250250
    251     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    252     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     251    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
     252    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    253253
    254254    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     
    256256
    257257    // Produce unswizzled bit streams
    258     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     258    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    259259    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    260260    mPxDriver.makeKernelCall(unSwizzleK, {depositedSwizzle0, depositedSwizzle1}, {depositedBits});
     
    281281    this->generateMainFunc(iBuilder);
    282282
    283     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     283    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    284284
    285285    // GeneratePipeline
     
    288288    StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    289289
    290     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
     290    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
    291291    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    292292    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    293293
    294     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     294    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    295295    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
    296296    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
     
    318318    this->generateMainFunc(iBuilder);
    319319
    320     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     320    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    321321
    322322    // GeneratePipeline
     
    326326    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    327327
    328     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    329     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     328    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
     329    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    330330
    331331    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    332332    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    333333
    334     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    335     StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     334    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
     335    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    336336
    337337    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     
    340340
    341341    // Produce unswizzled bit streams
    342     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     342    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    343343    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    344344    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedBits});
     
    397397void LZ4Generator::generateLoadByteStreamAndBitStream(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    398398    mCompressedByteStream = mPxDriver.addBuffer<ExternalBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8));
    399     mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks());
     399    mCompressedBasisBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getInputBufferBlocks(iBuilder));
    400400
    401401    kernel::Kernel * sourceK = mPxDriver.addKernelInstance<MemorySourceKernel>(iBuilder);
     
    408408StreamSetBuffer * LZ4Generator::generateSwizzledAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    409409    //// Decode Block Information
    410     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
    411     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    412     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     410    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
     411    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     412    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    413413
    414414    //// Generate Helper Markers Extenders, FX, XF
    415     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    416     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     415    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
     416    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    417417    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    418418    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     
    425425
    426426    // Produce unswizzled bit streams
    427     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    428     StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     427    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
     428    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    429429    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 2, 1, 64, "source");
    430430    mPxDriver.makeKernelCall(unSwizzleK, {mCompressedBasisBits}, {u16Swizzle0, u16Swizzle1});
     
    432432
    433433
    434     StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    435     StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     434    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
     435    StreamSetBuffer * decompressedSwizzled1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    436436
    437437
     
    457457
    458458
    459     StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
     459    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    460460
    461461    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
     
    467467parabix::StreamSetBuffer * LZ4Generator::generateParallelAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    468468    //// Decode Block Information
    469     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
    470     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    471     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     469    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
     470    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     471    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    472472
    473473    //// Generate Helper Markers Extenders
    474     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    475     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     474    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
     475    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    476476    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    477477    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     
    482482
    483483
    484     StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(), 1);
     484    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
    485485
    486486    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ParallelByteStreamAioKernel>(iBuilder);
     
    506506StreamSetBuffer * LZ4Generator::generateAIODecompression(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    507507    //// Decode Block Information
    508     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
    509     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    510     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     508    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
     509    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     510    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    511511
    512512
     
    525525
    526526
    527     StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(), 1);
     527    StreamSetBuffer * const decompressionByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder), 1);
    528528
    529529    Kernel* lz4AioK = mPxDriver.addKernelInstance<LZ4ByteStreamAioKernel>(iBuilder);
     
    548548void LZ4Generator::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    549549    //// Decode Block Information
    550     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
    551     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    552     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     550    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
     551    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     552    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    553553
    554554    //// Generate Helper Markers Extenders, FX, XF
    555     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    556     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     555    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
     556    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    557557    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    558558    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     
    566566
    567567    //TODO handle uncompressed part
    568     StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    569     StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    570     StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    571 
    572     mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    573     mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    574     mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     568    StreamSetBuffer * const UncompressedStartPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     569    StreamSetBuffer * const UncompressedLength = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     570    StreamSetBuffer * const UncompressedOutputPos = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     571
     572    mDeletionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
     573    mM0Marker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
     574    mDepositMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks(iBuilder));
    575575
    576576    Kernel* Lz4IndexBuilderK = mPxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
     
    603603
    604604std::pair<StreamSetBuffer*, StreamSetBuffer*> LZ4Generator::generateSwizzleExtractData(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    605     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    606     StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     605    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
     606    StreamSetBuffer * u16Swizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    607607
    608608    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(iBuilder, 8, 64);
     
    613613void LZ4Generator::generateCompressionMarker(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    614614    if (!mCompressionMarker) {
    615         mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     615        mCompressionMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    616616        Kernel * bitstreamNotK = mPxDriver.addKernelInstance<LZ4BitStreamNotKernel>(iBuilder);
    617617        mPxDriver.makeKernelCall(bitstreamNotK, {mDeletionMarker}, {mCompressionMarker});
     
    623623
    624624    // Deletion
    625     StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
    626     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks());
     625    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
     626    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(), this->getInputBufferBlocks(iBuilder));
    627627
    628628    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(iBuilder, 64, 8);
    629629    mPxDriver.makeKernelCall(delK, {mCompressedBasisBits, mCompressionMarker}, {deletedBits, deletionCounts});
    630630
    631     StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     631    StreamSetBuffer * compressedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    632632    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(iBuilder, 64, 8);
    633633    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedBits});
     
    637637
    638638int LZ4Generator::get4MbBufferBlocks() {
    639     return 4 * 1024 * 1024 / codegen::BlockSize * 4; // TODO * 4 here in the end is a workaround for parallel aio pipeline
    640 }
    641 
    642 int LZ4Generator::getInputBufferBlocks() {
    643     return this->get4MbBufferBlocks() * 2;
    644 }
    645 int LZ4Generator::getDecompressedBufferBlocks() {
    646     return this->get4MbBufferBlocks() * 2;
     639    return 4 * 1024 * 1024 / codegen::BlockSize;
     640}
     641
     642int LZ4Generator::getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
     643    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
     644}
     645int LZ4Generator::getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & b) {
     646    return this->get4MbBufferBlocks() * 2 * b->getBitBlockWidth() / 64;
    647647}
    648648
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.h

    r6064 r6066  
    5050
    5151    // BufferSize related Helper Function
    52     virtual int getInputBufferBlocks();
    53     virtual int getDecompressedBufferBlocks();
     52    virtual int getInputBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
     53    virtual int getDecompressedBufferBlocks(const std::unique_ptr<kernel::KernelBuilder> & iBuilder);
    5454    int get4MbBufferBlocks();
    5555
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r6065 r6066  
    9999parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromDecompressedBits(parabix::StreamSetBuffer *decompressedBasisBits) {
    100100//    auto mGrepDriver = &mPxDriver;
    101     const unsigned baseBufferSize = this->getInputBufferBlocks();
    102101    auto & idb = mPxDriver.getBuilder();
     102    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
    103103    StreamSetBuffer * LineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    104104    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
     
    112112
    113113    //// Decode Block Information
    114     StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(), 1);
    115     StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
    116     StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(), 1);
     114    StreamSetBuffer * const BlockData_IsCompressed = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks(iBuilder), 1);
     115    StreamSetBuffer * const BlockData_BlockStart = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
     116    StreamSetBuffer * const BlockData_BlockEnd = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks(iBuilder), 1);
    117117
    118118    //// Generate Helper Markers Extenders, FX, XF
    119     StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(), 1);
    120     mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     119    StreamSetBuffer * const Extenders = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder), 1);
     120    mMatchOffsetMarker = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks(iBuilder));
    121121    Kernel * extenderK = mPxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
    122122    mPxDriver.makeKernelCall(extenderK, {mCompressedBasisBits}, {Extenders});
     
    129129
    130130    // Produce unswizzled bit streams
    131     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     131    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    132132    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "source");
    133133    mPxDriver.makeKernelCall(unSwizzleK, {compressedBitStream}, {u16Swizzle0});
    134134
    135     StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     135    StreamSetBuffer * decompressedSwizzled0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    136136
    137137
     
    156156
    157157
    158     StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
     158    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    159159    Kernel * unSwizzleK2 = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 4, 1, 1, 64, "dst");
    160160    mPxDriver.makeKernelCall(unSwizzleK2, {decompressedSwizzled0}, {decompressionBitStream});
     
    168168    auto & idb = mGrepDriver->getBuilder();
    169169
    170     StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     170    StreamSetBuffer * u16Swizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
    171171    Kernel * delK = mPxDriver.addKernelInstance<SwizzledDeleteByPEXTkernel>(idb, 4, 64);
    172172    mPxDriver.makeKernelCall(delK, {mDeletionMarker, compressedBitStream}, {u16Swizzle0});
    173173
    174     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     174    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getInputBufferBlocks(idb), 1);
    175175    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(idb, 4, 1);
    176176    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, u16Swizzle0}, {depositedSwizzle0});
    177177
    178     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     178    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(4), this->getDecompressedBufferBlocks(idb), 1);
    179179    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(idb, 4, 1, 4);
    180180    mPxDriver.makeKernelCall(swizzledMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedSwizzle0}, {matchCopiedSwizzle0});
    181181
    182182    // Produce unswizzled bit streams
    183     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
     183    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
    184184    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(idb, 4, 1, 1);
    185185    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0}, {matchCopiedBits});
     
    198198    this->generateCompressionMarker(idb);
    199199
    200     StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
    201     StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks());
     200    StreamSetBuffer * deletedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
     201    StreamSetBuffer * deletionCounts = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(), this->getInputBufferBlocks(idb));
    202202
    203203    Kernel * delK = mPxDriver.addKernelInstance<PEXTFieldCompressKernel>(idb, 64, numberOfStream);
    204204    mPxDriver.makeKernelCall(delK, {compressedBitStream, mCompressionMarker}, {deletedBits, deletionCounts});
    205205
    206     StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks());
     206    StreamSetBuffer * compressedLineStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getInputBufferBlocks(idb));
    207207    Kernel * streamCompressionK = mPxDriver.addKernelInstance<StreamCompressKernel>(idb, 64, numberOfStream);
    208208    mPxDriver.makeKernelCall(streamCompressionK, {deletedBits, deletionCounts}, {compressedLineStream});
    209209
    210210    // Deposit
    211     StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
     211    StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
    212212    Kernel * bitStreamPDEPk = mPxDriver.addKernelInstance<BitStreamPDEPKernel>(idb, numberOfStream, prefix + "BitStreamPDEPKernel");
    213213    mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, compressedLineStream}, {depositedBits});
    214214
    215215    // Match Copy
    216     StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks());
     216    StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numberOfStream), this->getDecompressedBufferBlocks(idb));
    217217    Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(idb, numberOfStream, prefix + "BitStreamMatchCopyKernel");
    218218    mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
     
    223223parabix::StreamSetBuffer * LZ4GrepGenerator::linefeedStreamFromCompressedBits() {
    224224    auto mGrepDriver = &mPxDriver;
    225     const unsigned baseBufferSize = this->getInputBufferBlocks();
    226225    auto & idb = mGrepDriver->getBuilder();
     226    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
    227227
    228228    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     
    238238    auto & idb = mGrepDriver->getBuilder();
    239239    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
    240     const unsigned baseBufferSize = this->getInputBufferBlocks();
    241     bool CC_Multiplexing = true;
    242     int MaxCountFlag = 0;
     240    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
    243241
    244242
     
    272270    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
    273271    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
    274     StreamSetBuffer * decompressedCombinedStream = nullptr;
    275 
    276     decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
    277 
    278 /*
     272    StreamSetBuffer * decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
     273
    279274    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
    280275    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     
    284279
    285280
    286     StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
     281    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
    287282    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
    288283    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
     
    306301    }
    307302
     303//    if (MaxCountFlag > 0) {
     304//        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
     305//        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
     306//        StreamSetBuffer * const AllMatches = Matches;
     307//        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     308//        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
     309//    }
     310
     311//    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
     312
     313};
     314std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio) {
     315
     316    this->initREs(REs);
     317    auto mGrepDriver = &mPxDriver;
     318
     319    auto & idb = mGrepDriver->getBuilder();
     320    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
     321    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
     322    int MaxCountFlag = 0;
     323
     324    //  Regular Expression Processing and Analysis Phase
     325    const auto nREs = mREs.size();
     326
     327    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
     328
     329
     330    std::map<std::string, StreamSetBuffer *> propertyStream;
     331
     332    std::vector<std::string> externalStreamNames;
     333    std::set<re::Name *> UnicodeProperties;
     334
     335    const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
     336    StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     337
     338    mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
     339    mREs[0] = transformCCs(mpx.get(), mREs[0]);
     340    std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
     341    auto numOfCharacterClasses = mpx_basis.size();
     342    StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     343
     344    kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
     345    mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
     346
     347    StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     348    kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
     349    mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
     350
     351    StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
     352    kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
     353    mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
     354    StreamSetBuffer * decompressedCombinedStream = nullptr;
     355
     356    if (useAio) {
     357        decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
     358    } else {
     359        decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
     360    }
     361
     362    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
     363    StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
     364    kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
     365    mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
     366
     367    /*
     368    StreamSetBuffer * LineBreakStream = this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
     369    StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
     370     */
     371
     372    StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks(idb));
     373    Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
     374    mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
     375
     376    kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
     377    mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
     378    MatchResultsBufs[0] = MatchResults;
     379
     380    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
     381    if (mREs.size() > 1) {
     382        MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     383        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
     384        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     385    }
     386    StreamSetBuffer * Matches = MergedResults;
     387    if (mMoveMatchesToEOL) {
     388        StreamSetBuffer * OriginalMatches = Matches;
     389        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
     390        Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     391        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
     392    }
     393
    308394    if (MaxCountFlag > 0) {
    309395        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
     
    315401
    316402    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
    317     */
    318 
    319403};
    320 std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::multiplexingGrepPipeline(std::vector<re::RE *> &REs, bool useAio) {
     404
     405std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
     406        std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
    321407
    322408    this->initREs(REs);
     
    325411    auto & idb = mGrepDriver->getBuilder();
    326412    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
    327     const unsigned baseBufferSize = this->getInputBufferBlocks();
    328     bool CC_Multiplexing = true;
     413    const unsigned baseBufferSize = this->getInputBufferBlocks(idb);
    329414    int MaxCountFlag = 0;
    330415
     
    334419    std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    335420
     421    StreamSetBuffer * LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
     422
    336423
    337424    std::map<std::string, StreamSetBuffer *> propertyStream;
    338425
    339     std::vector<std::string> externalStreamNames;
    340     std::set<re::Name *> UnicodeProperties;
    341 
    342     const auto UnicodeSets = re::collectCCs(mREs[0], &cc::Unicode, std::set<re::Name *>({re::makeZeroWidth("\\b{g}")}));
    343     StreamSetBuffer * const MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    344 
    345     mpx = make_unique<cc::MultiplexedAlphabet>("mpx", UnicodeSets);
    346     mREs[0] = transformCCs(mpx.get(), mREs[0]);
    347     std::vector<re::CC *> mpx_basis = mpx->getMultiplexedCCs();
    348     auto numOfCharacterClasses = mpx_basis.size();
    349     StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    350 
    351     kernel::Kernel * ccK = mGrepDriver->addKernelInstance<kernel::CharClassesKernel>(idb, std::move(mpx_basis));
    352     mGrepDriver->makeKernelCall(ccK, {mCompressedBasisBits}, {CharClasses});
    353 
    354     StreamSetBuffer * CompressedLineFeedStream = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    355     kernel::Kernel * linefeedK = mPxDriver.addKernelInstance<kernel::LineFeedKernelBuilder>(idb, Binding{idb->getStreamSetTy(8), "basis", FixedRate(), Principal()});
    356     mPxDriver.makeKernelCall(linefeedK, {mCompressedBasisBits}, {CompressedLineFeedStream});
    357 
    358     StreamSetBuffer * combinedStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses + 1), baseBufferSize);
    359     kernel::Kernel* streamCombineKernel = mPxDriver.addKernelInstance<StreamsCombineKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
    360     mPxDriver.makeKernelCall(streamCombineKernel, {CompressedLineFeedStream, CharClasses}, {combinedStream});
    361     StreamSetBuffer * decompressedCombinedStream = nullptr;
    362 
    363     if (useAio) {
    364         decompressedCombinedStream = this->convertCompressedBitsStreamWithAioApproach(combinedStream, 1 + numOfCharacterClasses, "combined");
    365     } else {
    366         decompressedCombinedStream = this->convertCompressedBitsStream(combinedStream, 1 + numOfCharacterClasses, "combined");
    367     }
    368 
    369     StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1), baseBufferSize);
    370     StreamSetBuffer * decompressedCharClasses = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), baseBufferSize);
    371     kernel::Kernel* streamSplitKernel = mPxDriver.addKernelInstance<StreamsSplitKernel>(idb, std::vector<unsigned>({1, (unsigned)numOfCharacterClasses}));
    372     mPxDriver.makeKernelCall(streamSplitKernel, {decompressedCombinedStream}, {LineBreakStream, decompressedCharClasses});
    373 
    374     /*
    375     StreamSetBuffer * LineBreakStream = this->convertCompressedBitsStream(CompressedLineFeedStream, 1, "LineFeed");
    376     StreamSetBuffer * decompressedCharClasses = this->convertCompressedBitsStream(CharClasses, numOfCharacterClasses, "mpx");
    377      */
    378 
    379     StreamSetBuffer * fakeMatchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(8), this->getInputBufferBlocks());
    380     Kernel* fakeStreamGeneratorK = mPxDriver.addKernelInstance<LZ4FakeStreamGeneratingKernel>(idb, numOfCharacterClasses, 8);
    381     mPxDriver.makeKernelCall(fakeStreamGeneratorK, {decompressedCharClasses}, {fakeMatchCopiedBits});
    382 
    383     kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[0], externalStreamNames, std::vector<cc::Alphabet *>{mpx.get()});
    384     mGrepDriver->makeKernelCall(icgrepK, {fakeMatchCopiedBits, decompressedCharClasses}, {MatchResults});
    385     MatchResultsBufs[0] = MatchResults;
     426    for(unsigned i = 0; i < nREs; ++i) {
     427        std::vector<std::string> externalStreamNames;
     428        std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
     429
     430        std::set<re::Name *> UnicodeProperties;
     431
     432        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     433        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
     434        mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
     435        MatchResultsBufs[i] = MatchResults;
     436    }
    386437
    387438    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
     
    408459
    409460    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
    410 };
    411 
    412 std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> LZ4GrepGenerator::grepPipeline(
    413         std::vector<re::RE *> &REs, parabix::StreamSetBuffer *decompressedBasisBits) {
    414 
    415     this->initREs(REs);
    416     auto mGrepDriver = &mPxDriver;
    417 
    418     auto & idb = mGrepDriver->getBuilder();
    419     // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
    420     const unsigned baseBufferSize = this->getInputBufferBlocks();
    421     int MaxCountFlag = 0;
    422 
    423     //  Regular Expression Processing and Analysis Phase
    424     const auto nREs = mREs.size();
    425 
    426     std::vector<StreamSetBuffer *> MatchResultsBufs(nREs);
    427 
    428     StreamSetBuffer * LineBreakStream = LineBreakStream = this->linefeedStreamFromDecompressedBits(decompressedBasisBits);
    429 
    430 
    431     std::map<std::string, StreamSetBuffer *> propertyStream;
    432 
    433     for(unsigned i = 0; i < nREs; ++i) {
    434         std::vector<std::string> externalStreamNames;
    435         std::vector<StreamSetBuffer *> icgrepInputSets = {decompressedBasisBits};
    436 
    437         std::set<re::Name *> UnicodeProperties;
    438 
    439         StreamSetBuffer * MatchResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    440         kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance<kernel::ICGrepKernel>(idb, mREs[i], externalStreamNames);
    441         mGrepDriver->makeKernelCall(icgrepK, icgrepInputSets, {MatchResults});
    442         MatchResultsBufs[i] = MatchResults;
    443     }
    444 
    445     StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    446     if (mREs.size() > 1) {
    447         MergedResults = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    448         kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance<kernel::StreamsMerge>(idb, 1, mREs.size());
    449         mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
    450     }
    451     StreamSetBuffer * Matches = MergedResults;
    452     if (mMoveMatchesToEOL) {
    453         StreamSetBuffer * OriginalMatches = Matches;
    454         kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance<kernel::MatchedLinesKernel>(idb);
    455         Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    456         mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    457     }
    458 
    459     if (MaxCountFlag > 0) {
    460         kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
    461         untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
    462         StreamSetBuffer * const AllMatches = Matches;
    463         Matches = mGrepDriver->addBuffer<StaticBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
    464         mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
    465     }
    466 
    467     return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
    468461
    469462}
     
    482475    this->generateScanMatchMainFunc(iBuilder);
    483476
    484     StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks());
     477    StreamSetBuffer * const DecompressedByteStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getDecompressedBufferBlocks(iBuilder));
    485478
    486479    // GeneratePipeline
     
    490483    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    491484
    492     StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    493     StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     485    StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
     486    StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(iBuilder), 1);
    494487
    495488    Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
    496489    mPxDriver.makeKernelCall(multiplePdepK, {mDepositMarker, swizzle.first, swizzle.second}, {depositedSwizzle0, depositedSwizzle1});
    497490
    498     StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    499     StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     491    StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
     492    StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    500493
    501494    Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     
    503496
    504497    // Produce unswizzled bit streams
    505     StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     498    StreamSetBuffer * extractedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    506499    Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    507500    mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {extractedbits});
     
    621614
    622615
    623     StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
     616    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    624617    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
    625618//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
     
    662655
    663656
    664     StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks());
     657    StreamSetBuffer * const decompressionBitStream = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), this->getDecompressedBufferBlocks(iBuilder));
    665658    Kernel * s2pk = mPxDriver.addKernelInstance<S2PKernel>(iBuilder, /*aligned = */ true, "a");
    666659//    Kernel * s2pk = mPxDriver.addKernelInstance<S2PByPextKernel>(iBuilder, "a");
     
    710703    } else {
    711704        StreamSetBuffer * const extractedBits = this->generateBitStreamExtractData(iBuilder);
    712         StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks());
     705        StreamSetBuffer * depositedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getDecompressedBufferBlocks(iBuilder));
    713706        Kernel * bitStreamPDEPk = enableGather ? mPxDriver.addKernelInstance<BitStreamGatherPDEPKernel>(iBuilder, 8) : mPxDriver.addKernelInstance<BitStreamPDEPKernel>(iBuilder, 8);
    714707        mPxDriver.makeKernelCall(bitStreamPDEPk, {mDepositMarker, extractedBits}, {depositedBits});
    715708
    716         StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     709        StreamSetBuffer * matchCopiedBits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    717710        Kernel * bitStreamMatchCopyK = mPxDriver.addKernelInstance<LZ4BitStreamMatchCopyKernel>(iBuilder, 8);
    718711        mPxDriver.makeKernelCall(bitStreamMatchCopyK, {mMatchOffsetMarker, mM0Marker, mCompressedByteStream, depositedBits}, {matchCopiedBits});
     
    755748        auto swizzle = this->generateSwizzleExtractData(iBuilder);
    756749
    757         StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    758         StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     750        StreamSetBuffer * depositedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
     751        StreamSetBuffer * depositedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    759752
    760753        Kernel * multiplePdepK = mPxDriver.addKernelInstance<SwizzledMultiplePDEPkernel>(iBuilder, 4, 2);
     
    771764*/
    772765
    773         StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    774         StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     766        StreamSetBuffer * matchCopiedSwizzle0 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
     767        StreamSetBuffer * matchCopiedSwizzle1 = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(iBuilder), 1);
    775768
    776769        Kernel * swizzledMatchCopyK = mPxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     
    778771
    779772        // Produce unswizzled bit streams
    780         StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks());
     773        StreamSetBuffer * matchCopiedbits = mPxDriver.addBuffer<StaticBuffer>(iBuilder, iBuilder->getStreamSetTy(8), this->getInputBufferBlocks(iBuilder));
    781774        Kernel * unSwizzleK = mPxDriver.addKernelInstance<SwizzleGenerator>(iBuilder, 8, 1, 2);
    782775        mPxDriver.makeKernelCall(unSwizzleK, {matchCopiedSwizzle0, matchCopiedSwizzle1}, {matchCopiedbits});
     
    813806    Type * const sizeTy = iBuilder->getSizeTy();
    814807    Type * const boolTy = iBuilder->getIntNTy(sizeof(bool) * 8);
    815     Type * const voidTy = iBuilder->getVoidTy();
     808//    Type * const voidTy = iBuilder->getVoidTy();
    816809    Type * const inputType = iBuilder->getInt8PtrTy();
    817810
Note: See TracChangeset for help on using the changeset viewer.