Changeset 5974 for icGREP/icgrep-devel


Ignore:
Timestamp:
Apr 18, 2018, 3:30:28 AM (15 months ago)
Author:
xwa163
Message:
  1. Use i1 bit stream instead of i64 number stream in M0 related streams and Match Offset related stream
  2. Improve the performance of lz4_index_builder
Location:
icGREP/icgrep-devel/icgrep
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r5967 r5974  
    4545
    4646           Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)},
    47            Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
    48            Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
    49            Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)},
    50            Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)}
     47           Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
     48           Binding{iBuilder->getStreamSetTy(1, 1), "M0CountMarker", BoundedRate(0, 1)},
     49           Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", RateEqualTo("byteStream")}
    5150    },
    5251    //Arguments
     
    5857    {
    5958           Binding{iBuilder->getSizeTy(), "blockDataIndex"},
    60            Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
     59           Binding{iBuilder->getInt64Ty(), "m0OutputPos"},
     60           Binding{iBuilder->getInt64Ty(), "compressedSpaceClearPos"}
    6161    }) {
    6262        this->setStride(4 * 1024 * 1024);
     
    6565
    6666    void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
     67//        iBuilder->CallPrintInt("IndexBuilder:entry", iBuilder->getSize(0));
    6768
    6869        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
     
    104105
    105106    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, Value* token, Value* tokenPos, Value* blockEnd) {
     107//        iBuilder->CallPrintInt("blockEnd", blockEnd);
    106108        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
    107109
    108110        Value * extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
    109111
     112        BasicBlock* extendLiteralLengthCon = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_con");
    110113        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
    111114        BasicBlock* extendLiteralLengthExit = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_exit");
    112115
    113         iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthBody, extendLiteralLengthExit);
     116        iBuilder->CreateCondBr(extendedLiteralValue, extendLiteralLengthCon, extendLiteralLengthExit);
     117
     118        iBuilder->SetInsertPoint(extendLiteralLengthCon);
     119
     120        iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(iBuilder->CreateLoad(
     121                iBuilder->getRawInputPointer("byteStream", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)))),
     122                                                            iBuilder->getInt8(0xff)), extendLiteralLengthExit,
     123                                     extendLiteralLengthBody);
     124
    114125
    115126        iBuilder->SetInsertPoint(extendLiteralLengthBody);
    116         Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
     127        Value* newCursorPos2 = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), blockEnd);
    117128        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
    118129
     
    120131
    121132        iBuilder->SetInsertPoint(extendLiteralLengthExit);
    122 
    123         PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
    124         phiCursorPosAfterLiteral->addIncoming(newCursorPos, advanceFinishBlock);
     133//        PHINode* newCursorPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     134//        newCursorPos->addIncoming(a, extendLiteralLengthCon);
     135//        newCursorPos->addIncoming(newCursorPos2, advanceFinishBlock);
     136
     137        PHINode* phiCursorPosAfterLiteral = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
     138        phiCursorPosAfterLiteral->addIncoming(iBuilder->CreateAdd(tokenPos, iBuilder->getInt64(1)), extendLiteralLengthCon);
     139        phiCursorPosAfterLiteral->addIncoming(newCursorPos2, advanceFinishBlock);
    125140        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
    126141
     
    229244                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
    230245        );
    231         this->generateStoreNumberOutput(iBuilder, "m0Start", outputPos);
    232         this->generateStoreNumberOutput(iBuilder, "m0End", outputEndPos);
    233         this->generateStoreNumberOutput(iBuilder, "matchOffset", matchOffset);
     246        iBuilder->setProducedItemCount("M0CountMarker", iBuilder->CreateAdd(iBuilder->getProducedItemCount("M0CountMarker"), iBuilder->getSize(1)));
     247        this->markCircularOutputBitstream(iBuilder, "MatchOffsetMarker", offsetPos);
     248//        iBuilder->CallPrintInt("offsetPos", offsetPos);
     249//        iBuilder->CallPrintInt("matchOffset", matchOffset);
     250
     251
    234252        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
    235253        this->setCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos);
     
    240258    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
    241259        // Constant
    242 
    243         this->clearCircularOutputBitstream(iBuilder, "deletionMarker", blockStart, blockEnd);
     260        Value* clearPos = iBuilder->getScalarField("compressedSpaceClearPos");
     261        // We can not only clear [blockStart, blockEnd), since there are 4 bytes between blockEnd and nextBlockStart
     262        this->clearCircularOutputBitstream(iBuilder, "deletionMarker", clearPos, blockEnd);
     263        this->clearCircularOutputBitstream(iBuilder, "MatchOffsetMarker", clearPos, blockEnd);
     264        iBuilder->setScalarField("compressedSpaceClearPos", blockEnd);
    244265
    245266        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
     
    302323        // Store final M0 pos to make sure the bit stream will be long enough
    303324        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
    304         this->generateStoreNumberOutput(iBuilder, "m0Start", finalM0OutputPos);
    305         this->generateStoreNumberOutput(iBuilder, "m0End", finalM0OutputPos);
    306         this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64(0));
    307325        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
    308326        // finalM0OutputPos should always be 4MB * n except for the final block
     
    316334    Value * LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value * startPos, Value * maxPos) {
    317335
    318         unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
    319         Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
    320         Type* bitBlockType = iBuilder->getBitBlockType();
    321         Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
     336        Constant* SIZE_64 = iBuilder->getSize(64);
    322337
    323338        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
     
    339354        iBuilder->SetInsertPoint(advanceBodyBlock);
    340355
    341         Value * currentBlockGlobalPos = iBuilder->CreateAnd(phiCurrentPos, ConstantExpr::getNeg(INT64_BIT_BLOCK_WIDTH));
    342         Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
    343 
    344         Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, currentBlockGlobalPos), bitBlockType->getPointerTo());
    345 
    346         Value * currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
    347         currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
     356        Value * currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
     357        Value * currentBlockLocalPos = iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64));
     358        Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
     359
     360        Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, iBuilder->getSize(0)), iBuilder->getInt64Ty()->getPointerTo());
     361        Value * currentBitValue = iBuilder->CreateLoad(iBuilder->CreateGEP(ptr, currentBlockLocalPos));
     362
     363        currentBitValue = iBuilder->CreateLShr(currentBitValue, currentPosBitBlockOffset);
    348364        currentBitValue = iBuilder->CreateNot(currentBitValue);
    349365
    350366        Value * forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
    351367        Value * newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
    352         newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
     368        newOffset = iBuilder->CreateUMin(newOffset, iBuilder->getSize(64));
    353369
    354370        Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
     
    360376        }
    361377
    362         phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, INT64_BIT_BLOCK_WIDTH), iBuilder->GetInsertBlock());
     378        phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, iBuilder->getSize(64)), iBuilder->GetInsertBlock());
    363379        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
    364380        iBuilder->CreateBr(advanceConBlock);
     
    402418                                                             const std::string &bitstreamName,
    403419                                                             llvm::Value *start, llvm::Value *end) {
    404         //TODO currently we assume that start/end pos is not in the same byte
     420        //TODO currently we assume that start/end pos is not in the same byte because of the requirement of the LZ4 format
    405421        Value* SIZE_0 = iBuilder->getSize(0);
    406422        Value* SIZE_8 = iBuilder->getSize(8);
     
    477493        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
    478494
    479         Value* outputBufferBytes = iBuilder->CreateUDiv(iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth()), SIZE_8);
     495        Value* outputBufferBytes = iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
    480496        Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
    481497
     
    541557
    542558        Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
    543         // TODO bug here when start end in the same byte
    544 //        iBuilder->CallPrintInt("memsetEndByte", memsetEndByte);
    545 //        iBuilder->CallPrintInt("memsetStartByte", memsetStartByte);
    546 //        iBuilder->CallPrintInt("memsetSize1_1", memsetSize);
    547 
    548559
    549560        memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
     
    555566        Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
    556567        Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
    557 //        iBuilder->CallPrintInt("memset1Ptr", iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem));
    558 //        iBuilder->CallPrintInt("memsetSize1", memsetSize1);
    559 
    560 //        iBuilder->CallPrintInt("memset2Ptr", rawOutputPtr);
    561 //        iBuilder->CallPrintInt("memsetSize2", memsetSize2);
     568
    562569        iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), iBuilder->getInt8(0xff), memsetSize1, true);
    563570        iBuilder->CreateMemSet(rawOutputPtr, iBuilder->getInt8(0xff), memsetSize2, true);
     
    567574    }
    568575
     576    void LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder, const string &bitstreamName, Value *pos) {
     577        Value* SIZE_0 = iBuilder->getSize(0);
     578        Value* SIZE_8 = iBuilder->getSize(8);
     579        Value* INT8_1 = iBuilder->getInt8(1);
     580        Type* bytePtrType = iBuilder->getInt8PtrTy();
     581
     582        Value* outputBufferBytes = iBuilder->getSize(this->getOutputStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
     583
     584        Value* bytePos = iBuilder->CreateUDiv(pos, SIZE_8);
     585        bytePos = iBuilder->CreateURem(bytePos, outputBufferBytes);
     586        Value* byteOffset = iBuilder->CreateTrunc(iBuilder->CreateURem(pos, SIZE_8), iBuilder->getInt8Ty());
     587
     588        Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), bytePtrType);
     589        Value* outputTargetPtr = iBuilder->CreateGEP(outputRawPtr, bytePos);
     590
     591        Value* targetValue = iBuilder->CreateLoad(outputTargetPtr);
     592        targetValue = iBuilder->CreateOr(targetValue, iBuilder->CreateShl(INT8_1, byteOffset));
     593        iBuilder->CreateStore(targetValue, outputTargetPtr);
     594
     595        Value* a = iBuilder->CreateURem(iBuilder->CreateUDiv(pos, iBuilder->getSize(iBuilder->getBitBlockWidth())), iBuilder->getSize(this->getOutputStreamSetBuffer(bitstreamName)->getBufferBlocks()));
     596        Value* p = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), iBuilder->getBitBlockType()->getPointerTo());
     597//        iBuilder->CallPrintInt("--pos", pos);
     598//        iBuilder->CallPrintRegister("aa", iBuilder->CreateLoad(iBuilder->CreateGEP(p, a)));
     599
     600    }
     601
    569602}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.h

    r5961 r5974  
    6363                                                               const std::string &bitstreamName,
    6464                                                               llvm::Value *start, llvm::Value *end);
     65
     66        void markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string &bitstreamName, llvm::Value *pos);
    6567    };
    6668}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp

    r5967 r5974  
    55#include "lz4_swizzled_match_copy_kernel.h"
    66#include <kernels/kernel_builder.h>
     7#include <kernels/streamset.h>
     8#include <toolchain/toolchain.h>
     9#include <vector>
     10#include <llvm/Support/raw_ostream.h>
    711
    812using namespace llvm;
    9 
     13using namespace std;
    1014namespace kernel {
    1115
     16Value *LZ4SwizzledMatchCopyKernel::advanceUntilNextBit(const std::unique_ptr<KernelBuilder> &iBuilder, string inputName, Value *startPos, bool isNextOne) {
     17    BasicBlock* entryBlock = iBuilder->GetInsertBlock();
     18
     19    Constant* SIZE_0 = iBuilder->getSize(0);
     20    Constant* SIZE_1 = iBuilder->getSize(1);
     21    Value* SIZE_64 = iBuilder->getSize(64); // maybe need to handle 32 bit machine
     22    Value* SIZE_INPUT_64_COUNT = iBuilder->getSize(this->getInputStreamSetBuffer(inputName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 64);
     23
     24    Value* initCurrentPos = startPos;
     25
     26    Value* offsetMarkerRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_0), iBuilder->getInt64Ty()->getPointerTo());
     27
     28    BasicBlock* findNextMatchOffsetConBlock = iBuilder->CreateBasicBlock("findNextMatchOffsetConBlock");
     29    BasicBlock* findNextMatchOffsetBodyBlock = iBuilder->CreateBasicBlock("findNextMatchOffsetBodyBlock");
     30
     31    iBuilder->CreateBr(findNextMatchOffsetConBlock);
     32    iBuilder->SetInsertPoint(findNextMatchOffsetConBlock);
     33    // Find position marker bit of next 1 bit
     34
     35    PHINode* phiCurrentPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     36    phiCurrentPos->addIncoming(initCurrentPos, entryBlock);
     37
     38    Value* currentPosGlobalBlockIndex = iBuilder->CreateUDiv(phiCurrentPos, SIZE_64);
     39    Value* currentPosLocalBlockIndex = iBuilder->CreateURem(currentPosGlobalBlockIndex, SIZE_INPUT_64_COUNT);
     40    Value* currentPosBlockOffset = iBuilder->CreateURem(phiCurrentPos, SIZE_64);
     41    Value* currentValue = iBuilder->CreateLoad(iBuilder->CreateGEP(offsetMarkerRawPtr, currentPosLocalBlockIndex));
     42
     43    Value* countValue = iBuilder->CreateLShr(currentValue, currentPosBlockOffset);
     44    if (!isNextOne) {
     45        countValue = iBuilder->CreateNot(countValue);
     46    }
     47    Value* forwardZero = iBuilder->CreateCountForwardZeroes(countValue);
     48    Value* realForwardZero = iBuilder->CreateAdd(currentPosBlockOffset, forwardZero);
     49
     50    // If targetMarker == 0, move to next block, otherwise count forward zero
     51    phiCurrentPos->addIncoming(iBuilder->CreateMul(SIZE_64, iBuilder->CreateAdd(currentPosGlobalBlockIndex, SIZE_1)), iBuilder->GetInsertBlock());
     52    iBuilder->CreateCondBr(iBuilder->CreateICmpUGE(realForwardZero, SIZE_64), findNextMatchOffsetConBlock, findNextMatchOffsetBodyBlock);
     53
     54    iBuilder->SetInsertPoint(findNextMatchOffsetBodyBlock);
     55
     56    Value* newPosition = iBuilder->CreateAdd(iBuilder->CreateMul(currentPosGlobalBlockIndex, SIZE_64), realForwardZero);
     57
     58    return newPosition;
     59}
     60
     61Value* LZ4SwizzledMatchCopyKernel::loadNextMatchOffset(const unique_ptr<KernelBuilder> &iBuilder) {
     62    Value* initCurrentPos = iBuilder->CreateAdd(iBuilder->getScalarField("currentOffsetMarkerPos"), iBuilder->getSize(1));
     63    Value* newPosition = this->advanceUntilNextBit(iBuilder, "MatchOffsetMarker", initCurrentPos, true);
     64
     65    // Load Match Offset from newPosition
     66    iBuilder->setScalarField("currentOffsetMarkerPos", newPosition);
     67    iBuilder->setProcessedItemCount("MatchOffsetMarker", newPosition);
     68
     69    Value* matchOffsetPtr = iBuilder->getRawInputPointer("byteStream", newPosition);
     70    // For now, it is safe to cast matchOffset pointer into i16 since the input byte stream is always linear available
     71    matchOffsetPtr = iBuilder->CreatePointerCast(matchOffsetPtr, iBuilder->getInt16Ty()->getPointerTo());
     72    Value* matchOffset = iBuilder->CreateZExt(iBuilder->CreateLoad(matchOffsetPtr), iBuilder->getSizeTy());
     73
     74    return matchOffset;
     75}
     76
     77pair<Value*, Value*> LZ4SwizzledMatchCopyKernel::loadNextM0StartEnd(const unique_ptr<KernelBuilder> &iBuilder) {
     78    Value* initCurrentPos = iBuilder->getScalarField("currentM0MarkerPos");
     79    Value* m0Start = this->advanceUntilNextBit(iBuilder, "M0Marker", initCurrentPos, true);
     80    Value* m0End = this->advanceUntilNextBit(iBuilder, "M0Marker", m0Start, false);
     81    iBuilder->setScalarField("currentM0MarkerPos", m0End);
     82    return std::make_pair(m0Start, m0End);
     83};
     84
     85
     86
     87
    1288void LZ4SwizzledMatchCopyKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    13 
    1489    ConstantInt * const SIZE_ZERO = iBuilder->getSize(0);
    1590    ConstantInt * const SIZE_ONE = iBuilder->getSize(1);
     
    2499
    25100    Value * const itemsToDo = iBuilder->CreateUMin(iBuilder->CreateSub(available, processed), SIZE_4_MEGS);
    26 
    27101    iBuilder->setTerminationSignal(iBuilder->CreateICmpULT(itemsToDo, SIZE_4_MEGS));
    28102
     
    35109
    36110    // Match Copy
    37     Value * const initM0StartProcessIndex = iBuilder->getProcessedItemCount("m0Start");
    38     Value * const totalM0StartItemsCount = iBuilder->getAvailableItemCount("m0Start");
     111    Value *initM0StartProcessIndex = iBuilder->getProcessedItemCount("M0CountMarker");
     112    Value *totalM0StartItemsCount = iBuilder->getAvailableItemCount("M0CountMarker");
    39113
    40114    Value * const initMatchOffset = iBuilder->getScalarField("pendingMatchOffset");
     
    70144    iBuilder->SetInsertPoint(loadNextMatchInfoBodyBlock);
    71145
    72     Value * const newM0Start = loadOffset(iBuilder, "m0Start", phiProcessIndex);
    73     Value * const newM0End = loadOffset(iBuilder, "m0End", phiProcessIndex);
    74     Value * const newMatchOffset = loadOffset(iBuilder, "matchOffset", phiProcessIndex);
     146    auto ret = this->loadNextM0StartEnd(iBuilder);
     147    Value *newM0Start = ret.first;
     148    Value *newM0End = ret.second;
     149    iBuilder->setProcessedItemCount("M0Marker", newM0End);
     150    Value *newMatchOffset = this->loadNextMatchOffset(iBuilder);
     151
     152
     153
    75154    Value * const newMatchLength = iBuilder->CreateAdd(iBuilder->CreateSub(newM0End, newM0Start), iBuilder->getInt64(1));
    76155
    77     phiProcessIndex->addIncoming(iBuilder->CreateAdd(phiProcessIndex, SIZE_ONE), loadNextMatchInfoBodyBlock);
    78 
    79     phiMatchPos->addIncoming(newM0Start, loadNextMatchInfoBodyBlock);
    80     phiMatchOffset->addIncoming(newMatchOffset, loadNextMatchInfoBodyBlock);
    81     phiMatchLength->addIncoming(newMatchLength, loadNextMatchInfoBodyBlock);
     156    phiProcessIndex->addIncoming(iBuilder->CreateAdd(phiProcessIndex, SIZE_ONE), iBuilder->GetInsertBlock());
     157
     158    phiMatchPos->addIncoming(newM0Start, iBuilder->GetInsertBlock());
     159    phiMatchOffset->addIncoming(newMatchOffset, iBuilder->GetInsertBlock());
     160    phiMatchLength->addIncoming(newMatchLength, iBuilder->GetInsertBlock());
    82161
    83162    iBuilder->CreateBr(matchCopyLoopCon);
     
    140219    iBuilder->setScalarField("pendingMatchLength", phiMatchLength);
    141220    iBuilder->setScalarField("pendingMatchPos", phiMatchPos);
    142     iBuilder->setProcessedItemCount("m0Start", phiProcessIndex);
    143     iBuilder->setProcessedItemCount("m0End", phiProcessIndex);
    144     iBuilder->setProcessedItemCount("matchOffset", phiProcessIndex);
     221    iBuilder->setProcessedItemCount("M0CountMarker", phiProcessIndex);
    145222    iBuilder->setProcessedItemCount("sourceStreamSet0", toProcessItemCount);
    146223}
     
    164241// Inputs
    165242{
    166        Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), DisableSufficientChecking()},
    167        Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), DisableSufficientChecking()},
    168        Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), DisableSufficientChecking()},
     243                                   Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", BoundedRate(0, 1), {DisableSufficientChecking()}},
     244                                   Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1), {DisableSufficientChecking()}},
     245                                   Binding{iBuilder->getStreamSetTy(1, 1), "M0CountMarker", BoundedRate(0, 1), {DisableSufficientChecking()}},
     246                                   Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}
    169247},
    170248// Outputs
     
    172250// Arguments
    173251{
    174        Binding{iBuilder->getSizeTy(), "fileSize"} //TODO remove
    175252},
    176253{},
     
    180257       Binding{iBuilder->getSizeTy(), "pendingMatchOffset"},
    181258       Binding{iBuilder->getSizeTy(), "pendingMatchLength"},
     259       Binding(iBuilder->getSizeTy(), "currentOffsetMarkerPos"),
     260       Binding(iBuilder->getSizeTy(), "currentM0MarkerPos")
    182261})
    183262, mSwizzleFactor(swizzleFactor)
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.h

    r5967 r5974  
    2929        const unsigned mStreamSize;
    3030        const unsigned mStreamCount;
     31        llvm::Value* loadNextMatchOffset(const std::unique_ptr<KernelBuilder> &iBuilder);
     32        std::pair<llvm::Value*, llvm::Value*> loadNextM0StartEnd(const std::unique_ptr<KernelBuilder> &iBuilder);
     33        llvm::Value *advanceUntilNextBit(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName,
     34                                          llvm::Value *startPos, bool isNextOne);
     35
    3136    };
    3237}
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r5957 r5974  
    150150
    151151    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    152     swizzledMatchCopyK->setInitialArguments({fileSize});
    153     pxDriver.makeKernelCall(swizzledMatchCopyK, {M0_Start, M0_End, Match_Offset, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     152    pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, M0CountMarker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    154153
    155154
     
    222221    //// Generate Helper Markers Extenders, FX, XF
    223222    StreamSetBuffer * const Extenders = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     223    MatchOffsetMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     224        // FX and XF streams will be added to IndexBuilderKernel in the future
     225//    StreamSetBuffer * const CC_0xFX = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
     226//    StreamSetBuffer * const CC_0xXF = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    224227
    225228    Kernel * extenderK = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "extenders", std::vector<re::CC *>{re::makeCC(0xFF)}, 8);
     
    232235    pxDriver.makeKernelCall(blockDecoderK, {ByteStream}, {BlockData_IsCompressed, BlockData_BlockStart, BlockData_BlockEnd});
    233236
     237//    re::CC* xfCC = re::makeCC(0x0f);
     238//    re::CC* fxCC = re::makeCC(0xf0);
     239//    for (re::codepoint_t i = 1; i <= 0xf; i++) {
     240//        xfCC = re::makeCC(xfCC, re::makeCC(i * 0x10 + 0x0f));
     241//        fxCC = re::makeCC(fxCC, re::makeCC(0xf0 + i));
     242//    }
     243
     244//    Kernel * CC_0xFXKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xFX", std::vector<re::CC *>{fxCC}, 8);
     245//    pxDriver.makeKernelCall(CC_0xFXKernel, {BasisBits}, {CC_0xFX});
     246
     247//    Kernel * CC_0xXFKernel = pxDriver.addKernelInstance<ParabixCharacterClassKernelBuilder>(iBuilder, "CC_0xXF", std::vector<re::CC *>{xfCC}, 8);
     248//    pxDriver.makeKernelCall(CC_0xXFKernel, {BasisBits}, {CC_0xXF});
     249
    234250    //// Generate Extract/Deposit Markers, M0_Start, M0_End, MatchOffset
    235 
    236     M0_Start = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
    237     M0_End = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
    238251
    239252    //TODO handle uncompressed part
     
    243256
    244257    DeletionMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    245     StreamSetBuffer * const M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     258    M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
     259    M0CountMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    246260    DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    247     Match_Offset = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->getInputBufferBlocks());
    248261
    249262    Kernel* Lz4IndexBuilderK = pxDriver.addKernelInstance<LZ4IndexBuilderKernel>(iBuilder);
     
    254267                    ByteStream,
    255268                    Extenders,
     269//                    CC_0xFX,
     270//                    CC_0xXF,
    256271
    257272                    // Block Data
     
    266281
    267282                    DeletionMarker,
    268                     M0_Start,
    269                     M0_End,
    270                     Match_Offset,
    271                     M0Marker
     283                    M0Marker,
     284                    M0CountMarker,
     285                    MatchOffsetMarker
    272286            });
    273287
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.h

    r5957 r5974  
    6060    parabix::StreamSetBuffer * DeletionMarker; //TODO rename to ExtarctMarker
    6161    parabix::StreamSetBuffer * DepositMarker;
    62     parabix::StreamSetBuffer * Match_Offset;
     62    parabix::StreamSetBuffer * MatchOffsetMarker;
    6363
    64     parabix::StreamSetBuffer * M0_Start;  // TODO M0_Start and M0_End should be changed to Deposit_Start and Deposit_End
    65     parabix::StreamSetBuffer * M0_End;
     64    // M0CountMarker will not contain anything, it will only be used to pass producedItemCount and manage processedItemCount between different kernel
     65    parabix::StreamSetBuffer * M0CountMarker;
     66    parabix::StreamSetBuffer * M0Marker;
    6667};
    6768
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r5957 r5974  
    374374
    375375    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    376     swizzledMatchCopyK->setInitialArguments({fileSize});
    377     pxDriver.makeKernelCall(swizzledMatchCopyK, {M0_Start, M0_End, Match_Offset, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     376    pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, M0CountMarker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    378377
    379378
     
    441440
    442441    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    443     swizzledMatchCopyK->setInitialArguments({fileSize});
    444     pxDriver.makeKernelCall(swizzledMatchCopyK, {M0_Start, M0_End, Match_Offset, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     442    pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, M0CountMarker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    445443
    446444
Note: See TracChangeset for help on using the changeset viewer.