Changeset 5941 for icGREP


Ignore:
Timestamp:
Apr 5, 2018, 4:03:37 AM (14 months ago)
Author:
xwa163
Message:
  1. Add attributes to disable some features of multiblock kernel
  2. Fix bug for lz4d new approach in large data, pass all test cases
  3. Disable lz4d related test cases for old approach
Location:
icGREP/icgrep-devel
Files:
15 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/QA/lz4d_ext_dep/main.py

    r5921 r5941  
    1111
    1212test_options = [
    13     ('extract_only', '-extract-only', {'extract_only' : True}),
    14     ('extract_and_deposit', '-extract-and-deposit-only', {'extract_and_deposit_only': True}),
    15     ('normal', '', {}),
     13    # ('extract_only', '-extract-only', {'extract_only' : True}),
     14    # ('extract_and_deposit', '-extract-and-deposit-only', {'extract_and_deposit_only': True}),
     15    # ('normal', '', {}),
    1616    ('extract_only_new_approach', '-new-approach -extract-only', {'extract_only' : True}),
    1717    ('extract_and_deposit_new_approach', '-new-approach -extract-and-deposit-only', {'extract_and_deposit_only': True}),
     
    2222def run_test(test_file, lz4_option, python_lz4_option, test_file_full_path, output_file_full_path, python_output_file_full_path):
    2323    global failure_count
    24     lz4d_cmd = "%s %s --thread-num=1 -segment-size=8 -f %s %s" % (
     24    lz4d_cmd = "%s %s --thread-num=1 -f %s %s" % (
    2525    lz4d_program_under_test, lz4_option, test_file_full_path, output_file_full_path, )
    2626
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r5937 r5941  
    488488}
    489489
    490 //
    491 // The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
    492 // matched lines.
    493 
    494 class EmitMatch : public MatchAccumulator {
    495     friend class EmitMatchesEngine;
    496 public:
    497     EmitMatch(std::string linePrefix, std::ostringstream & strm) : mLinePrefix(linePrefix), mLineCount(0), mTerminated(true), mResultStr(strm) {}
    498     void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
    499     void finalize_match(char * buffer_end) override;
    500 protected:
    501     std::string mLinePrefix;
    502     size_t mLineCount;
    503     bool mTerminated;
    504     std::ostringstream & mResultStr;
    505 };
    506490
    507491//
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.h

    r5913 r5941  
    7777};
    7878
     79
     80//
     81// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
     82// matched lines.
     83
     84class EmitMatch : public MatchAccumulator {
     85    friend class EmitMatchesEngine;
     86public:
     87    EmitMatch(std::string linePrefix, std::ostringstream & strm) : mLinePrefix(linePrefix), mLineCount(0), mTerminated(true), mResultStr(strm) {}
     88    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
     89    void finalize_match(char * buffer_end) override;
     90protected:
     91    std::string mLinePrefix;
     92    size_t mLineCount;
     93    bool mTerminated;
     94    std::ostringstream & mResultStr;
     95};
     96
    7997class EmitMatchesEngine : public GrepEngine {
    8098public:
  • icGREP/icgrep-devel/icgrep/kernels/attributes.h

    r5921 r5941  
    9696        // is enough data to execute a stride rather than the upper bound.)
    9797
     98        DisableTemporaryBuffer,
     99
     100        // Workaround attribute, force disable temporary buffer
     101
     102        DisableSufficientChecking,
     103
     104        // Workaround attribute, force disable sufficient data or sufficient space checking in pipelilne, always assume that
     105        // the data or space is sufficient
     106
     107        DisableAvailableItemCountAdjustment,
     108
     109        // Workaround attribute, keep original availableItemCount in multiblock kernel (do not replace it by linear available item count)
     110
    98111        /** OUTPUT STREAM ATTRIBUTES **/
    99112
     
    141154        // Whether the input streamset is in swizzled form
    142155
    143         ConstantStrideLengthOne,
    144 
    145         // TODO: Workaround here, the Pack Size is always one
    146 
    147 
    148156//        Here is a revised definition of SegmentedReverse:
    149157
     
    271279    friend Attribute Principal();
    272280    friend Attribute AlwaysConsume();
     281    friend Attribute DisableTemporaryBuffer();
     282    friend Attribute DisableSufficientChecking();
     283    friend Attribute DisableAvailableItemCountAdjustment();
    273284    friend Attribute RoundUpTo(const unsigned);
    274285    friend Attribute LookAhead(const unsigned);
     
    279290    friend Attribute ConditionalRegionEnd();
    280291    friend Attribute Swizzled();
    281     friend Attribute ConstantStrideLengthOne();
    282292    friend Attribute CanTerminateEarly();
    283293    friend Attribute MustExplicitlyTerminate();
     
    346356}
    347357
     358inline Attribute DisableTemporaryBuffer() {
     359    return Attribute(Attribute::KindId::DisableTemporaryBuffer, 0);
     360}
     361
     362inline Attribute DisableAvailableItemCountAdjustment() {
     363    return Attribute(Attribute::KindId::DisableAvailableItemCountAdjustment, 0);
     364}
     365
     366inline Attribute DisableSufficientChecking() {
     367    return Attribute(Attribute::KindId::DisableSufficientChecking, 0);
     368}
     369
    348370inline Attribute Principal() {
    349371    return Attribute(Attribute::KindId::Principal, 0);
     
    386408}
    387409
    388 inline Attribute ConstantStrideLengthOne() {
    389     return Attribute(Attribute::KindId::ConstantStrideLengthOne, 0);
    390 }
    391 
    392 
    393 
    394410}
    395411
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5921 r5941  
    7474    }
    7575
    76     bool isConstantStrideLengthOne() const {
    77         return hasAttribute(AttributeId::ConstantStrideLengthOne);
     76    bool isDisableTemporaryBuffer() const {
     77        return hasAttribute(AttributeId::DisableTemporaryBuffer);
     78    }
     79
     80    bool isDisableSufficientChecking() const {
     81        return hasAttribute(AttributeId::DisableSufficientChecking);
     82    }
     83
     84    bool isDisableAvailableItemCountAdjustment() const {
     85        return hasAttribute(AttributeId::DisableAvailableItemCountAdjustment);
    7886    }
    7987
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5883 r5941  
    618618 ** ------------------------------------------------------------------------------------------------------------- */
    619619inline bool LLVM_READNONE MultiBlockKernel::requiresTemporaryInputBuffer(const Binding & binding, const ProcessingRate & rate) const {
     620    if (binding.isDisableTemporaryBuffer()) {
     621        return false;
     622    }
    620623    if (requiresBufferedFinalStride(binding)) {
    621624        return true;
     
    631634 ** ------------------------------------------------------------------------------------------------------------- */
    632635inline bool LLVM_READNONE MultiBlockKernel::requiresTemporaryOutputBuffer(const Binding & binding, const ProcessingRate & rate) const {
     636    if (binding.isDisableTemporaryBuffer()) {
     637        return false;
     638    }
    633639    if (requiresBufferedFinalStride(binding)) {
    634640        return true;
     
    10681074    for (unsigned i = 0; i < inputSetCount; i++) {
    10691075        const Binding & input = mStreamSetInputs[i];
     1076        if (input.isDisableAvailableItemCountAdjustment()) {
     1077            continue;
     1078        }
     1079
    10701080        if (input.getRate().isFixed() && input.nonDeferred()) {
    10711081            Value * const processable = b->CreateMul(numOfStrides, inputStrideSize[i]);
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder_new.cpp

    r5923 r5941  
    3232    {
    3333        Binding{iBuilder->getInt1Ty(), "hasBlockChecksum"},
    34         Binding{iBuilder->getSizeTy(), "headerSize"}
     34        Binding{iBuilder->getSizeTy(), "headerSize"},
     35        Binding{iBuilder->getSizeTy(), "fileSize"}
    3536    },
    3637    {},
     
    8283    Value* processedItemCount = iBuilder->getProcessedItemCount("byteStream");
    8384
    84     Value* mIsFinalBlock = iBuilder->CreateICmpEQ(availableItemCount, INT64_0);
     85    Value* totalItemCount = iBuilder->CreateAdd(availableItemCount, processedItemCount);
     86    Value* mIsFinalBlock = iBuilder->CreateICmpEQ(totalItemCount, iBuilder->getScalarField("fileSize"));
    8587    iBuilder->setTerminationSignal(mIsFinalBlock);
    86 
    87     Value* totalItemCount = iBuilder->CreateAdd(availableItemCount, processedItemCount);
    8888
    8989    Value* totalItemCount2 = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r5939 r5941  
    1515using namespace kernel;
    1616using namespace std;
     17
    1718namespace kernel{
    1819    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
     
    2122                               {
    2223                                       Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
    23                                        Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
    24                                        Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
    25                                        Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
     24                                       Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream"), {DisableTemporaryBuffer(), DisableAvailableItemCountAdjustment(), DisableSufficientChecking()}},
     25//                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
     26//                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
    2627
    2728                                       // block data
    2829                                       Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
    29                                                ConstantStrideLengthOne()},
     30                                               AlwaysConsume()},
    3031                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
    31                                                ConstantStrideLengthOne()},
     32                                               AlwaysConsume()},
    3233                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
    33                                                ConstantStrideLengthOne()}
     34                                               AlwaysConsume()}
    3435
    3536                               },
     
    4445                                               BoundedRate(0, 1)},
    4546
    46                                        Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1)},
    47                                        Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
    48                                        Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
    49                                        Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)}
     47                                       Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1), {DisableTemporaryBuffer(), DisableSufficientChecking()}},
     48                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), DisableSufficientChecking()}, //TODO disable temporary buffer for all output streams
     49                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), DisableSufficientChecking()},
     50                                       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), DisableSufficientChecking()},
     51                                       Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1), {DisableTemporaryBuffer()}}
    5052                               },
    5153            //Arguments
    52                                {},
     54                               {
     55                                       Binding{iBuilder->getSizeTy(), "fileSize"}
     56                               },
    5357                               {},
    5458            //Internal states:
     
    5761                                       Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
    5862                               }) {
    59 //        addAttribute(MustExplicitlyTerminate());
     63        this->setStride(4 * 1024 * 1024);
     64        addAttribute(MustExplicitlyTerminate());
    6065    }
    6166
    6267    void LZ4IndexBuilderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *const numOfStrides) {
    63 //        iBuilder->CallPrintInt("entry", iBuilder->getSize(0));
    64 //        iBuilder->CallPrintInt("aaa", iBuilder->getProducedItemCount("e1Marker"));
    65 
    6668
    6769
     
    7476
    7577        Value* totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
    76 
     78        Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
    7779//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
    78 //        iBuilder->CallPrintInt("totalNumber", totalNumber);
    79 //        iBuilder->setTerminationSignal(iBuilder->CreateICmpEQ(availableBlockEnd, iBuilder->getSize(0)));
     80
    8081
    8182        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
     
    8384        iBuilder->SetInsertPoint(blockEndConBlock);
    8485        Value* blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
    85         iBuilder->CallPrintInt("blockEnd", blockEnd);
    86 
    87         Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
    88 //        iBuilder->CallPrintInt("totalExtender", totalExtender);
    89 
    90 //        iBuilder->CallPrintInt("processByteStream", iBuilder->getProcessedItemCount("byteStream"));
    91 //        iBuilder->CallPrintInt("availableByteStream", iBuilder->getAvailableItemCount("byteStream"));
    92 
    93 
    94 //        iBuilder->CallPrintInt("consumedExtender", iBuilder->getConsumedItemCount("extender"));
    95 //        iBuilder->CallPrintInt("processExtender", iBuilder->getProcessedItemCount("extender"));
    96 //        iBuilder->CallPrintInt("availableExtender", iBuilder->getAvailableItemCount("extender"));
    97 //        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
    98 
    9986
    10087        Value* blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
     
    10592//        iBuilder->CallPrintInt("----blockEnd", blockEnd);
    10693
    107         iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
     94//        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
     95        iBuilder->CreateBr(processBlock);
    10896
    10997        iBuilder->SetInsertPoint(processBlock);
    11098
    111 
    11299        //TODO handle uncompressed block
     100
    113101        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
    114 
    115 
    116102
    117103        Value* newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
     
    123109
    124110        iBuilder->setProcessedItemCount("byteStream", blockEnd);
     111
     112
    125113//        iBuilder->setProcessedItemCount("extender", blockEnd);
    126114//        iBuilder->setProcessedItemCount("CC_0xFX", blockEnd);
     
    182170        // TODO Clear Output Buffer at the beginning instead of marking 0
    183171        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), false);
     172//        iBuilder->CallPrintInt("markStart", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)));
     173//        iBuilder->CallPrintInt("phiCursorPosAfterLiteral", phiCursorPosAfterLiteral);
    184174        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, true);
    185175        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
     
    263253                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
    264254        );
    265 //        iBuilder->CallPrintInt("matchOffset", matchOffset);
    266255        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
    267 //    iBuilder->CallPrintInt("m0Start", outputPos);
    268256        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
    269 //    iBuilder->CallPrintInt("m0End", outputEndPos);
    270257        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
    271 //    iBuilder->CallPrintInt("matchOffset", matchOffset);
    272258        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
     259        this->markCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos, true, false);
     260
    273261        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
    274262    }
     
    276264
    277265    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
    278 
     266        // Constant
    279267        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
    280         //TODO use memset to clear output buffer
     268
     269        Value* m0OutputBlockPtr = iBuilder->getOutputStreamBlockPtr("M0Marker", iBuilder->getSize(0));
     270        iBuilder->CreateMemSet(m0OutputBlockPtr, iBuilder->getInt8(0), 4 * 1024 * 1024 / 8, true);
     271
     272
     273        Value* isTerminal = iBuilder->CreateICmpEQ(blockEnd, iBuilder->getScalarField("fileSize"));
     274
     275        iBuilder->setTerminationSignal(isTerminal);
     276
     277        //TODO use memset to clear output buffer for extract marker
     278
    281279        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
    282280
     
    297295        //TODO add acceleration here
    298296        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
     297
    299298//        iBuilder->CallPrintInt("tokenPos", phiCursorValue);
    300299//        iBuilder->CallPrintInt("token", token);
     
    336335        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
    337336        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
     337        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
     338        // finalM0OutputPos should always be 4MB * n except for the final block
    338339
    339340        iBuilder->CreateBr(processCon);
     
    355356        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
    356357        Constant* SIZE_ZERO = iBuilder->getSize(0);
     358        Type* bitBlockType = iBuilder->getBitBlockType();
    357359        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
    358360
     
    377379        iBuilder->SetInsertPoint(advanceBodyBlock);
    378380
    379         Value* currentPosBitBlockIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH), baseInputBlockIndex);
     381
     382        Value* currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
     383        Value* currentPosBitBlockIndex = iBuilder->CreateSub(currentBlockGlobalPos, baseInputBlockIndex);
    380384
    381385        Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
    382386
    383387        Value* ptr = iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO, currentPosBitBlockIndex);
     388        Value* rawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_ZERO), bitBlockType->getPointerTo());
     389        Value* ptr2 = iBuilder->CreateGEP(rawPtr, iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks())));
     390        ptr = ptr2; //TODO workaround here
     391
    384392
    385393        Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
     
    410418
    411419    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) {
    412         // Stride Size here is Constant 1 instead of BitBlockWidth
     420        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(this->getStride());
    413421        Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    414422        Constant* SIZE_ZERO = iBuilder->getSize(0);
     
    416424//        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_BIT_BLOCK_WIDTH);
    417425
    418         Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->getProcessedItemCount(inputBufferName));
     426        //TODO possible bug here, maybe we need to use iBuilder->getStride()
     427        Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->CreateMul(iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_STRIDE_SIZE), SIZE_STRIDE_SIZE));
    419428
    420429        Value* targetBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
     
    451460    // Assume we have enough output buffer
    452461    llvm::BasicBlock *LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
    453                                                                     const std::string &bitstreamName,
    454                                                                     llvm::Value *start, llvm::Value *end, bool isOne,
    455                                                                     bool setProduced) {
     462                                                                         const std::string &bitstreamName,
     463                                                                         llvm::Value *start, llvm::Value *end, bool isOne,
     464                                                                         bool setProduced) {
    456465        const unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
    457466        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth);
     
    529538
    530539        Value *targetPtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, SIZE_ZERO, curBlockLocalIndex);
     540        Value *rawInputPointer = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_ZERO), iBuilder->getBitBlockType()->getPointerTo());
     541        Value * ptr = iBuilder->CreateGEP(rawInputPointer, iBuilder->CreateURem(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks())));
     542//        iBuilder->CallPrintInt("targetPtr", targetPtr);
     543//        iBuilder->CallPrintInt("targetPtr2", ptr);
     544        targetPtr = ptr; //TODO workaround here
     545
     546
     547        //TODO fixed circular here
     548
    531549        Value *oldValue = iBuilder->CreateLoad(targetPtr);
    532550        oldValue = iBuilder->CreateBitCast(oldValue, INT_BIT_BLOCK_TY);
     
    557575
    558576    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
    559                                                              const string &outputBufferName, Type *pointerType,
    560                                                              Value *value) {
     577                                                          const string &outputBufferName, Type *pointerType,
     578                                                          Value *value) {
     579
    561580        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    562581        Value* SIZE_ZERO = iBuilder->getSize(0);
     
    564583
    565584        Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
     585//        iBuilder->CallPrintInt("previousProduced", previousProduced);
    566586
    567587        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
     
    575595        Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
    576596        ptr = iBuilder->CreatePointerCast(ptr, pointerType);
     597        ptr = iBuilder->CreateGEP(ptr, blockOffset);
     598
     599        Value* tmpOffset = iBuilder->CreateURem(outputOffset, iBuilder->getSize(this->getAnyStreamSetBuffer(outputBufferName)->getBufferBlocks() * iBuilder->getBitBlockWidth()));
     600        Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(outputBufferName, SIZE_ZERO), pointerType);
     601        Value* ptr2 = iBuilder->CreateGEP(outputRawPtr, tmpOffset);
     602        ptr = ptr2;
     603//        iBuilder->CallPrintInt("ptr", ptr);
     604//        iBuilder->CallPrintInt("ptr2", ptr2);
     605
    577606        // GEP here is safe
    578         iBuilder->CreateStore(value, iBuilder->CreateGEP(ptr, blockOffset));
     607        iBuilder->CreateStore(value, ptr);
     608
     609        if (outputBufferName == "m0End") {
     610//            iBuilder->CallPrintInt("output:m0End", value);
     611        }
    579612
    580613        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
     
    583616
    584617    void LZ4IndexBuilderKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
    585                                                             std::vector<std::string> outputList) {
     618                                                         std::vector<std::string> outputList) {
    586619        previousProducedMap.clear();
    587620        for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_multiple_pdep_kernel.cpp

    r5926 r5941  
    4545
    4646        Value * itemsToDo = mAvailableItemCount[0];
    47 
    4847        Value * sourceItemsAvail = mAvailableItemCount[1]; //TODO need to be calculated from numOfStrides
    4948
     
    180179                                  )
    181180        );
     181
    182182    }
    183183
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_numbers_to_bitstream_kernel.cpp

    r5939 r5941  
    1717using namespace kernel;
    1818using namespace std;
     19
    1920
    2021namespace kernel {
     
    6263
    6364        Value *itemsToDo = mAvailableItemCount[0];
    64 //        iBuilder->CallPrintInt("itemsToDo", itemsToDo);
    65         Value *isFinalBlock = iBuilder->CreateICmpEQ(itemsToDo, iBuilder->getSize(0));
    66         iBuilder->setTerminationSignal(isFinalBlock);
     65//        Value *isFinalBlock = iBuilder->CreateICmpEQ(itemsToDo, iBuilder->getSize(0));
     66        Value *isFinalBlock = mIsFinal;
     67//        iBuilder->setTerminationSignal(isFinalBlock);
    6768
    6869        Value *itemProcessed = iBuilder->getProcessedItemCount(START_NUM_STREAM_NAME);
     
    7778
    7879        Value *availableOutputBlocks = iBuilder->CreateUMin(remainSpace, numOfStrides);
     80        availableOutputBlocks = remainSpace; //TODO workaround here
    7981
    8082//        Value *inputStartBasePtr = iBuilder->getInputStreamBlockPtr(START_NUM_STREAM_NAME, SIZE_ZERO);
     
    8284//        Value *inputEndBasePtr = iBuilder->getInputStreamBlockPtr(END_NUM_STREAM_NAME, SIZE_ZERO);
    8385//        inputEndBasePtr = iBuilder->CreatePointerCast(inputEndBasePtr, iBuilder->getInt64Ty()->getPointerTo());
    84 //        Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
     86        Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
    8587        Value *initCarryBit = iBuilder->getScalarField("carryBit");
    8688
     
    175177                iBuilder->CreateSelect(
    176178                        enterNewOutputBlock,
    177                         phiCurrentItemIndex,
     179                        iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentEndLocalBlockOffset, SIZE_ZERO), iBuilder->CreateAdd(phiCurrentItemIndex, SIZE_ONE), phiCurrentItemIndex),
    178180                        iBuilder->CreateAdd(phiCurrentItemIndex, SIZE_ONE)
    179181                ),
     
    253255        Value *newProcessedItemCount = iBuilder->CreateAdd(iBuilder->getProcessedItemCount(START_NUM_STREAM_NAME),
    254256                                                           iBuilder->CreateSub(phiCurrentItemIndex,
    255                                                                                initCurrentItemIndex));
     257                                                                               initCurrentItemIndex)); //TODO bug here
    256258
    257259
     
    315317                               {
    316318                                       Binding{iBuilder->getStreamSetTy(1, 64), START_NUM_STREAM_NAME,
    317                                                BoundedRate(0, 1), AlwaysConsume()},
    318                                        Binding{iBuilder->getStreamSetTy(1, 64), END_NUM_STREAM_NAME, BoundedRate(0, 1),
    319                                                AlwaysConsume()}
     319                                               BoundedRate(0, 1)/*, AlwaysConsume()*/},
     320                                       Binding{iBuilder->getStreamSetTy(1, 64), END_NUM_STREAM_NAME, BoundedRate(0, 1)/*,
     321                                               AlwaysConsume()*/}
    320322                               },
    321323            //Outputs
     
    338340//        addAttribute(CanTerminateEarly());
    339341//        setNoTerminateAttribute(true);
    340         addAttribute(MustExplicitlyTerminate());
     342//        addAttribute(MustExplicitlyTerminate());
    341343    }
    342344}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp

    r5924 r5941  
    1212using namespace kernel;
    1313using namespace std;
     14
     15Value* LZ4SwizzledMatchCopyKernel::loadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string bufferName, Value* offset) {
     16    // GEP here is safe
     17    Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     18    Value* inputLocalBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
     19    Value* inputLocalBlockOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
     20
     21    Value* blockBasePtr = iBuilder->getInputStreamBlockPtr(bufferName, iBuilder->getSize(0), inputLocalBlockIndex);
     22    blockBasePtr = iBuilder->CreatePointerCast(blockBasePtr, iBuilder->getInt64Ty()->getPointerTo());
     23    // GEP here is safe
     24    return iBuilder->CreateLoad(iBuilder->CreateGEP(blockBasePtr, inputLocalBlockOffset));
     25}
    1426
    1527void LZ4SwizzledMatchCopyKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value * const numOfStrides)  {
     
    2032    Constant *SIZE_PDEP_WIDTH = iBuilder->getSize(mPDEPWidth);
    2133
     34    BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
     35
    2236    Value *itemsToDo = mAvailableItemCount[3];
    23 
    24 
     37    Value *processedItemCount = mInitialProcessedItemCount[3];
     38    Value *totalItemCount = iBuilder->CreateAdd(itemsToDo, processedItemCount);
     39    Value *isFinalBlock = iBuilder->CreateICmpULT(itemsToDo, iBuilder->getSize(4 * 1024 * 1024));
     40    this->mIsFinalBlock = isFinalBlock;
     41    iBuilder->setTerminationSignal(isFinalBlock);
    2542
    2643    Value *previousProducedItemCount = iBuilder->getProducedItemCount("outputStreamSet0");
     
    2946    Value *outputBufferBlocks = iBuilder->getSize(
    3047            this->getAnyStreamSetBuffer("outputStreamSet0")->getBufferBlocks());
    31     Value *outputRawBeginPtr = iBuilder->CreatePointerCast(
    32             iBuilder->getRawOutputPointer("outputStreamSet0", SIZE_ZERO),
    33             iBuilder->getBitBlockType()->getPointerTo()); // TODO it is possible the pointer cast here is not necessary
    34     Value *outputCurrentPtr = iBuilder->getOutputStreamBlockPtr("outputStreamSet0", SIZE_ZERO);
    35     Value *producedOffset = iBuilder->CreatePtrDiff(outputCurrentPtr, outputRawBeginPtr);
    36     producedOffset = iBuilder->CreateUDiv(producedOffset, iBuilder->getSize(mStreamCount));
    37 
    38     Value *remainSpace = iBuilder->CreateSub(outputBufferBlocks, producedOffset);
    39     Value *matchCopyWindowBlock = iBuilder->getSize(256 * 256 / codegen::BlockSize);
    40     Value *remainWindowBlock = iBuilder->CreateSelect(
    41             iBuilder->CreateICmpUGE(producedOffset, matchCopyWindowBlock),
    42             iBuilder->getSize(0),
    43             iBuilder->CreateSub(matchCopyWindowBlock, producedOffset)
    44     );
    45     Value *writableBlocks = iBuilder->CreateSub(remainSpace,
    46                                                 remainWindowBlock); //TODO handle beginning, if producedItemCount / bitblockWidth < windowBlock, there is no need for the substraction here
    47 
    48     Value *outputBlocks = iBuilder->CreateUMin(writableBlocks, numOfStrides);
    49 //    outputBlocks = iBuilder->CreateUMin(outputBlocks, this->getMaximumMatchCopyBlock(iBuilder));
    50 
    51 
    52     Value *isFinalBlock =
    53             iBuilder->CreateOr(
    54                     iBuilder->CreateICmpULT(itemsToDo, iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH)),
    55                     iBuilder->CreateICmpEQ(itemsToDo, iBuilder->getSize(0))
    56             );
    57 
    58     this->mIsFinalBlock = isFinalBlock;
    59     iBuilder->setTerminationSignal(isFinalBlock);
    60 
     48
     49    Value *outputBlocks = iBuilder->getSize(4 * 1024 * 1024 / iBuilder->getBitBlockWidth()); // Always be 4MB
     50
     51
     52    BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock");
     53    Value* isInputEnough = iBuilder->CreateOr(isFinalBlock, iBuilder->CreateICmpUGE(itemsToDo, iBuilder->getSize(4 * 1024 * 1024)));
     54
     55    iBuilder->CreateCondBr(isInputEnough, processBlock, exitBlock);
     56
     57    iBuilder->SetInsertPoint(processBlock);
    6158    // Output Copy
    6259    this->generateOutputCopy(iBuilder, outputBlocks);
    63 
    64 
    6560
    6661    Value *newProducedItemCount = iBuilder->getProducedItemCount("outputStreamSet0");
     
    7166
    7267    // Match Copy
    73     BasicBlock *exitBlock = iBuilder->CreateBasicBlock("exit_block");
     68    BasicBlock *processExitBlock = iBuilder->CreateBasicBlock("exit_block");
    7469
    7570    Value *initM0StartProcessIndex = iBuilder->getProcessedItemCount("m0Start");
     
    115110
    116111    Value *hasMoreMatchInfo = iBuilder->CreateICmpULT(phiProcessIndex, totalM0StartItemsCount);
    117     iBuilder->CreateCondBr(hasMoreMatchInfo, loadNextMatchInfoBodyBlock, exitBlock);
     112    iBuilder->CreateCondBr(hasMoreMatchInfo, loadNextMatchInfoBodyBlock, processExitBlock);
    118113
    119114    iBuilder->SetInsertPoint(loadNextMatchInfoBodyBlock);
    120115
    121     Value *m0StartBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("m0Start", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
    122     Value *m0EndBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("m0End", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
    123     Value *matchOffsetBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("matchOffset", SIZE_ZERO), iBuilder->getInt64Ty()->getPointerTo());
    124 
    125116
    126117    Value *m0StartBaseOffset = iBuilder->CreateURem(initM0StartProcessIndex, SIZE_BIT_BLOCK_WIDTH);
    127 //    iBuilder->CallPrintInt("rawPtr", iBuilder->getRawInputPointer("m0Start", SIZE_ZERO));
    128 //    iBuilder->CallPrintInt("ptr", m0StartBasePtr);
    129 //    iBuilder->CallPrintInt("initM0StartProcessIndex", initM0StartProcessIndex);
    130118    Value *m0StartLoadOffset = iBuilder->CreateAdd(m0StartBaseOffset,
    131119                                                   iBuilder->CreateSub(phiProcessIndex, initM0StartProcessIndex));
    132120
    133     Value *newM0Start = iBuilder->CreateLoad(iBuilder->CreateGEP(m0StartBasePtr, m0StartLoadOffset));
    134     Value *newM0End = iBuilder->CreateLoad(iBuilder->CreateGEP(m0EndBasePtr, m0StartLoadOffset));
    135     Value *newMatchOffset = iBuilder->CreateLoad(iBuilder->CreateGEP(matchOffsetBasePtr, m0StartLoadOffset));
     121
     122    Value *newM0Start = this->loadInt64NumberInput(iBuilder, "m0Start", m0StartLoadOffset);
     123    Value *newM0End = this->loadInt64NumberInput(iBuilder, "m0End", m0StartLoadOffset);
     124    Value *newMatchOffset = this->loadInt64NumberInput(iBuilder, "matchOffset", m0StartLoadOffset);
    136125
    137126    Value *depositStart = newM0Start;
    138 //    iBuilder->CallPrintInt("depositStart", depositStart);
    139 //    iBuilder->CallPrintInt("newMatchLength", newMatchLength);
    140127
    141128    Value *depositEnd = iBuilder->CreateAdd(newM0End, iBuilder->getInt64(1));
     
    153140    Value *hasNotReachEnd = iBuilder->CreateICmpULT(phiMatchPos, newProducedItemCount);
    154141//    iBuilder->CallPrintInt("newProducedItemCount", newProducedItemCount);
    155     iBuilder->CreateCondBr(hasNotReachEnd, matchCopyBodyBlock, exitBlock);
     142    iBuilder->CreateCondBr(hasNotReachEnd, matchCopyBodyBlock, processExitBlock);
    156143
    157144    iBuilder->SetInsertPoint(matchCopyBodyBlock);
     
    186173    Value* fullMask = iBuilder->simd_fill(mPDEPWidth, singleMask);
    187174
    188 //    iBuilder->CallPrintInt("phiMatchPos", phiMatchPos);
    189 //    iBuilder->CallPrintInt("currentCopySize", currentCopySize);
    190 //    iBuilder->CallPrintInt("aaa", iBuilder->CreateShl(SIZE_ONE, iBuilder->CreateAdd(matchCopyFromBlockOffset, currentCopySize)));
    191 //    iBuilder->CallPrintRegister("fullMask", fullMask);
    192 
    193175    for (int i = 0; i < mStreamSize; i++) {
    194176        Value* rawOutputBasePtr = iBuilder->getRawOutputPointer("outputStreamSet" + std::to_string(i), SIZE_ZERO);
     
    230212    iBuilder->CreateBr(matchCopyLoopCon);
    231213
    232     iBuilder->SetInsertPoint(exitBlock);
    233 //    iBuilder->CallPrintInt("test", SIZE_ZERO);
     214    iBuilder->SetInsertPoint(processExitBlock);
    234215    iBuilder->setScalarField("pendingMatchOffset", phiMatchOffset);
    235216    iBuilder->setScalarField("pendingMatchLength", phiMatchLength);
    236217    iBuilder->setScalarField("pendingMatchPos", phiMatchPos);
    237 //    iBuilder->CallPrintInt("pendingMatchLength", phiMatchLength);
    238218    iBuilder->setProcessedItemCount("m0Start", phiProcessIndex);
    239219    iBuilder->setProcessedItemCount("m0End", phiProcessIndex);
    240220    iBuilder->setProcessedItemCount("matchOffset", phiProcessIndex);
     221
     222    iBuilder->CreateBr(exitBlock);
     223    iBuilder->SetInsertPoint(exitBlock);
    241224}
    242225
     
    250233
    251234    Value *itemsToDo = mAvailableItemCount[3];
    252 //    iBuilder->CallPrintInt("swizzledMatchCopy:itemsToDo", itemsToDo);
    253235    Value *copySize = iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH);
    254 //    iBuilder->CallPrintInt("swizzledMatchCopy:copySize", copySize);
    255236    Value* actualCopySize = iBuilder->CreateUMin(itemsToDo, copySize);
    256237    Value* copyByte = iBuilder->CreateUDivCeil(iBuilder->CreateMul(copySize, iBuilder->getSize(mStreamCount)), iBuilder->getSize(8)); // i8
     
    313294        // Inputs
    314295                           {
    315                                    //TODO add swizzled attribute
    316296                                   Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), AlwaysConsume()},
    317297                                   Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), AlwaysConsume()},
     
    322302                           {},
    323303        // Arguments
    324                            {},
     304                           {
     305                                   Binding{iBuilder->getSizeTy(), "fileSize"} //TODO remove
     306                           },
    325307                           {},
    326308                           {
     
    337319    assert((mSwizzleFactor == (iBuilder->getBitBlockWidth() / PDEP_width)) && "swizzle factor must equal bitBlockWidth / PDEP_width");
    338320    assert((mPDEPWidth == 64 || mPDEPWidth == 32) && "PDEP width must be 32 or 64");
    339 
     321    this->setStride(4 * 1024 * 1024);
    340322    addAttribute(MustExplicitlyTerminate());
    341323
    342 
    343     mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", BoundedRate(0, 1), {AlwaysConsume(), Swizzled()}});
    344     mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", BoundedRate(0, 1)});
     324    mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", BoundedRate(0, 1), {Swizzled(), DisableTemporaryBuffer()}});
     325    mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", BoundedRate(0, 1), DisableTemporaryBuffer()});
    345326
    346327    for (int i = 1; i < streamSize; i++) {
    347         mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0"), {AlwaysConsume(), Swizzled()}});
    348         mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("outputStreamSet0")});
     328        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0"), {Swizzled(), DisableTemporaryBuffer()}});
     329        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("outputStreamSet0"), DisableTemporaryBuffer()});
    349330    }
    350331}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.h

    r5906 r5941  
    3030        llvm::Value* getMaximumMatchCopyBlock(const std::unique_ptr<KernelBuilder> &iBuilder);
    3131        llvm::Value* mIsFinalBlock;
     32        llvm::Value* loadInt64NumberInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string bufferName, llvm::Value* offset);
    3233    };
    3334}
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r5926 r5941  
    158158
    159159    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
     160    swizzledMatchCopyK->setInitialArguments({fileSize});
    160161    pxDriver.makeKernelCall(swizzledMatchCopyK, {M0_Start, M0_End, Match_Offset, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    161162
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GeneratorNew.cpp

    r5923 r5941  
    4848
    4949int LZ4GeneratorNew::getInputBufferBlocks() {
    50     return this->get4MbBufferBlocks();
     50    return this->get4MbBufferBlocks() * 2;
    5151}
    5252int LZ4GeneratorNew::getDecompressedBufferBlocks() {
    53     return this->get4MbBufferBlocks();
     53    return this->get4MbBufferBlocks() * 2;
    5454}
    5555
     
    5757void LZ4GeneratorNew::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    5858    //// Decode Block Information
    59     StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->get4MbBufferBlocks());
    60     StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->get4MbBufferBlocks());
    61     StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->get4MbBufferBlocks());
     59    StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->getInputBufferBlocks());
     60    StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
     61    StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
    6262
    6363    //// Generate Helper Markers Extenders, FX, XF
    64     StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->get4MbBufferBlocks());
    65     StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->get4MbBufferBlocks());
    66     StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->get4MbBufferBlocks());
     64    StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     65    StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     66    StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    6767
    6868
     
    9393
    9494    //TODO handle uncompressed part
    95     StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->get4MbBufferBlocks());
    96     StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->get4MbBufferBlocks());
    97     StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->get4MbBufferBlocks());
     95    StreamSetBuffer * const UncompressedStartPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
     96    StreamSetBuffer * const UncompressedLength = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
     97    StreamSetBuffer * const UncompressedOutputPos = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
    9898
    9999    EMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), e1BufferSize);
     
    104104
    105105    Kernel * blockDecoderK = pxDriver.addKernelInstance<LZ4BlockDecoderNewKernel>(iBuilder);
    106     blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize});
     106    blockDecoderK->setInitialArguments({iBuilder->CreateTrunc(hasBlockChecksum, iBuilder->getInt1Ty()), headerSize, fileSize});
    107107    pxDriver.makeKernelCall(blockDecoderK, {ByteStream, Extenders}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    108108
     
    110110
    111111    Kernel* Lz4IndexBuilderK = pxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
     112    Lz4IndexBuilderK->setInitialArguments({fileSize});
    112113    pxDriver.makeKernelCall(
    113114            Lz4IndexBuilderK,
     
    115116                    ByteStream,
    116117                    Extenders,
    117                     CC_0xFX,
    118                     CC_0xXF,
     118//                    CC_0xFX,
     119//                    CC_0xXF,
    119120
    120121                    // Block Data
     
    131132                    M0_Start,
    132133                    M0_End,
    133                     Match_Offset
     134                    Match_Offset,
     135                    M0Marker
    134136            });
    135137
    136138
    137     Kernel * buildM0StartMarkerK = pxDriver.addKernelInstance<LZ4NumbersToBitstreamKernel>("buildM0Marker", iBuilder);
    138     pxDriver.makeKernelCall(buildM0StartMarkerK, {M0_Start, M0_End}, {M0Marker});
     139//    Kernel * buildM0StartMarkerK = pxDriver.addKernelInstance<LZ4NumbersToBitstreamKernel>("buildM0Marker", iBuilder);
     140//    pxDriver.makeKernelCall(buildM0StartMarkerK, {M0_Start, M0_End}, {M0Marker});
    139141
    140142
    141143    Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
    142     pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker}); // TODO deposit
     144    pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker});
    143145
    144146}
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5921 r5941  
    648648
    649649            const Binding & output = kernel->getStreamOutput(buffer);
     650
     651            if (output.isDisableSufficientChecking()) {
     652                continue;
     653            }
     654
    650655            const auto name = output.getName();
    651656            BasicBlock * const sufficient = b->CreateBasicBlock(name + "HasOutputSpace");
     
    724729        for (const StreamSetBuffer * buffer : I->second) {
    725730            const Binding & input = kernel->getStreamInput(buffer);
     731            if (input.isDisableSufficientChecking()) {
     732                continue;
     733            }
     734
    726735            const auto name = input.getName();
    727736            BasicBlock * const sufficient = b->CreateBasicBlock(name + "HasInputData");
     
    734743                const auto ub = kernel->getUpperBound(input.getRate()); assert (ub > 0);
    735744                strideLength = b->getSize(ceiling(ub * kernel->getStride()) - 1);
    736             }
    737 
    738             if (input.isConstantStrideLengthOne()) {
    739                 // TODO workaround here
    740                 strideLength = b->getSize(1);
    741745            }
    742746
     
    754758          //  b->CallPrintInt("< " + kernel->getName() + "_" + name + "_unprocessed", unprocessed);
    755759
    756             Value * const hasSufficientData = input.isConstantStrideLengthOne() ?
    757                                               b->CreateOr(b->CreateICmpUGE(unprocessed, strideLength), isFinal) :
    758                                               b->CreateOr(b->CreateICmpUGT(unprocessed, strideLength), isFinal);
    759 //            Value * const hasSufficientData = b->CreateOr(b->CreateICmpUGT(unprocessed, strideLength), isFinal);
    760 //            Value * const hasSufficientData = b->CreateOr(b->CreateICmpUGE(unprocessed, strideLength), isFinal);
     760            Value * const hasSufficientData = b->CreateOr(b->CreateICmpUGT(unprocessed, strideLength), isFinal);
    761761
    762762          //  b->CallPrintInt("* < " + kernel->getName() + "_" + name + "_sufficientData", hasSufficientData);
Note: See TracChangeset for help on using the changeset viewer.