Ignore:
Timestamp:
Apr 11, 2018, 5:48:08 PM (17 months ago)
Author:
nmedfort
Message:

Updated LZ4SwizzledMatchCopy + minor changes

Location:
icGREP/icgrep-devel/icgrep
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/attributes.h

    r5948 r5967  
    9696        // is enough data to execute a stride rather than the upper bound.)
    9797
    98         DisableTemporaryBuffer,
    99 
    100         // Workaround attribute, force disable temporary buffer
    101 
    10298        DisableSufficientChecking,
    10399
    104100        // Workaround attribute, force disable sufficient data or sufficient space checking in pipelilne, always assume that
    105101        // the data or space is sufficient
    106 
    107         DisableAvailableItemCountAdjustment,
    108 
    109         // Workaround attribute, keep original availableItemCount in multiblock kernel (do not replace it by linear available item count)
    110102
    111103        /** OUTPUT STREAM ATTRIBUTES **/
     
    283275    friend Attribute Principal();
    284276    friend Attribute AlwaysConsume();
    285     friend Attribute DisableTemporaryBuffer();
    286277    friend Attribute DisableSufficientChecking();
    287     friend Attribute DisableAvailableItemCountAdjustment();
    288278    friend Attribute RoundUpTo(const unsigned);
    289279    friend Attribute LookAhead(const unsigned);
     
    361351}
    362352
    363 inline Attribute DisableTemporaryBuffer() {
    364     return Attribute(Attribute::KindId::DisableTemporaryBuffer, 0);
    365 }
    366 
    367 inline Attribute DisableAvailableItemCountAdjustment() {
    368     return Attribute(Attribute::KindId::DisableAvailableItemCountAdjustment, 0);
    369 }
    370 
    371353inline Attribute DisableSufficientChecking() {
    372354    return Attribute(Attribute::KindId::DisableSufficientChecking, 0);
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5941 r5967  
    7474    }
    7575
    76     bool isDisableTemporaryBuffer() const {
    77         return hasAttribute(AttributeId::DisableTemporaryBuffer);
    78     }
    79 
    8076    bool isDisableSufficientChecking() const {
    8177        return hasAttribute(AttributeId::DisableSufficientChecking);
    82     }
    83 
    84     bool isDisableAvailableItemCountAdjustment() const {
    85         return hasAttribute(AttributeId::DisableAvailableItemCountAdjustment);
    8678    }
    8779
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5941 r5967  
    618618 ** ------------------------------------------------------------------------------------------------------------- */
    619619inline bool LLVM_READNONE MultiBlockKernel::requiresTemporaryInputBuffer(const Binding & binding, const ProcessingRate & rate) const {
    620     if (binding.isDisableTemporaryBuffer()) {
    621         return false;
    622     }
    623620    if (requiresBufferedFinalStride(binding)) {
    624621        return true;
     
    634631 ** ------------------------------------------------------------------------------------------------------------- */
    635632inline bool LLVM_READNONE MultiBlockKernel::requiresTemporaryOutputBuffer(const Binding & binding, const ProcessingRate & rate) const {
    636     if (binding.isDisableTemporaryBuffer()) {
    637         return false;
    638     }
    639633    if (requiresBufferedFinalStride(binding)) {
    640634        return true;
     
    10741068    for (unsigned i = 0; i < inputSetCount; i++) {
    10751069        const Binding & input = mStreamSetInputs[i];
    1076         if (input.isDisableAvailableItemCountAdjustment()) {
    1077             continue;
    1078         }
    1079 
    10801070        if (input.getRate().isFixed() && input.nonDeferred()) {
    10811071            Value * const processable = b->CreateMul(numOfStrides, inputStrideSize[i]);
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5957 r5967  
    8282            itemCount = CreateExactUDiv(itemCount, ConstantInt::get(itemCount->getType(), r.denominator()));
    8383        }
    84     } else if (LLVM_UNLIKELY(rate.isPopCount())) {
    85         Port port; unsigned index;
    86         std::tie(port, index) = mKernel->getStreamPort(rate.getReference());
    87 
    88 
    89 
    90 
    9184    } else {
    9285        itemCount = getScalarField(name + suffix);
     
    422415    Value * const addr = mKernel->getStreamSetInputAddress(name);
    423416    if (addr) {
    424         return CreateGEP(addr, {blockOffset, streamIndex});
     417        return CreateGEP(addr, {blockOffset ? blockOffset : getInt32(0), streamIndex});
    425418    } else {
    426419        const StreamSetBuffer * const buf = mKernel->getInputStreamSetBuffer(name);
    427420        Value * blockIndex = CreateLShr(getProcessedItemCount(name), std::log2(getBitBlockWidth()));
    428         blockIndex = CreateAdd(blockIndex, blockOffset);
     421        if (blockOffset) {
     422            assert (blockIndex->getType() == blockOffset->getType());
     423            blockIndex = CreateAdd(blockIndex, blockOffset);
     424        }
    429425        return buf->getStreamBlockPtr(this, getStreamHandle(name), getBaseAddress(name), streamIndex, blockIndex, true);
    430426    }
     
    434430    Value * const addr = mKernel->getStreamSetOutputAddress(name);
    435431    if (addr) {
    436         return CreateGEP(addr, {blockOffset, streamIndex});
     432        return CreateGEP(addr, {blockOffset ? blockOffset : getInt32(0), streamIndex});
    437433    } else {
    438434        const StreamSetBuffer * const buf = mKernel->getOutputStreamSetBuffer(name);
    439         Value * const blockIndex = CreateLShr(getProducedItemCount(name), std::log2(getBitBlockWidth()));
     435        Value * blockIndex = CreateLShr(getProducedItemCount(name), std::log2(getBitBlockWidth()));
     436        if (blockOffset) {
     437            assert (blockIndex->getType() == blockOffset->getType());
     438            blockIndex = CreateAdd(blockIndex, blockOffset);
     439        }
    440440        return buf->getStreamBlockPtr(this, getStreamHandle(name), getBaseAddress(name), streamIndex, blockIndex, false);
    441441    }
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r5865 r5967  
    7272
    7373    llvm::Value * getInputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) {
    74         return getInputStreamBlockPtr(name, streamIndex, getInt32(0));
     74        return getInputStreamBlockPtr(name, streamIndex, nullptr);
    7575    }
    7676
     
    8686
    8787    llvm::Value * getOutputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) {
    88         return getOutputStreamBlockPtr(name, streamIndex, getInt32(0));
     88        return getOutputStreamBlockPtr(name, streamIndex, nullptr);
    8989    }
    9090
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r5961 r5967  
    429429
    430430        Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
     431        startShiftAmount = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
    431432        startValue = iBuilder->CreateLShr(iBuilder->CreateShl(startValue, startShiftAmount), startShiftAmount);
    432433
     
    441442        Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
    442443        Value* endValue = iBuilder->CreateLoad(endPtr);
     444        endRemain = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
    443445        endValue = iBuilder->CreateShl(iBuilder->CreateLShr(endValue, endRemain), endRemain);
    444446        iBuilder->CreateStore(endValue, endPtr);
     
    469471
    470472        Value* SIZE_0 = iBuilder->getSize(0);
     473        Value* SIZE_1 = iBuilder->getSize(1);
    471474        Value* SIZE_8 = iBuilder->getSize(8);
    472         Value* INT8_0 = iBuilder->getInt8(0);
    473         Value* INT8_1 = iBuilder->getInt8(1);
     475//        Value* INT8_0 = iBuilder->getInt8(0);
     476//        Value* INT8_1 = iBuilder->getInt8(1);
    474477        Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
    475478
     
    493496        Value* targetPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
    494497        Value* targetValue = iBuilder->CreateLoad(targetPtr);
    495         targetValue = iBuilder->CreateOr(iBuilder->CreateSub(
    496                 iBuilder->CreateShl(INT8_1, endRemain),
    497                 iBuilder->CreateShl(INT8_1, startRemain)
    498         ), targetValue);
     498        Value* rangeMask = iBuilder->CreateSub(iBuilder->CreateShl(SIZE_1, endRemain), iBuilder->CreateShl(SIZE_1, startRemain));
     499        rangeMask = iBuilder->CreateZExtOrTrunc(rangeMask, targetValue->getType());
     500        targetValue = iBuilder->CreateOr(rangeMask, targetValue);
     501
    499502//        targetValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(targetValue), startShiftAmount), startShiftAmount));
    500503//        targetValue = iBuilder->CreateShl(iBuilder->CreateLShr(targetValue, endRemain), endRemain);
     
    514517        Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
    515518        Value* startValue = iBuilder->CreateLoad(startPtr);
    516         startValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(startValue), startShiftAmount), startShiftAmount));
     519
     520        Value* startShiftAmount2 = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
     521        startValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(startValue), startShiftAmount2), startShiftAmount2));
    517522
    518523        iBuilder->CreateStore(startValue, startPtr);
     
    526531        Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
    527532        Value* endValue = iBuilder->CreateLoad(endPtr);
    528         endValue = iBuilder->CreateNot(iBuilder->CreateShl(iBuilder->CreateLShr(iBuilder->CreateNot(endValue), endRemain), endRemain));
     533        Value* endRemain2 = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
     534        endValue = iBuilder->CreateNot(iBuilder->CreateShl(iBuilder->CreateLShr(iBuilder->CreateNot(endValue), endRemain2), endRemain2));
    529535        iBuilder->CreateStore(endValue, endPtr);
    530536        iBuilder->CreateBr(memsetBlock);
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp

    r5966 r5967  
    55#include "lz4_swizzled_match_copy_kernel.h"
    66#include <kernels/kernel_builder.h>
    7 #include <kernels/streamset.h>
    8 #include <toolchain/toolchain.h>
    9 
    107
    118using namespace llvm;
    12 using namespace kernel;
    13 using namespace std;
    14 
    15 Value* LZ4SwizzledMatchCopyKernel::loadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string bufferName, Value* offset) {
    16     // GEP here is safe
    17     Constant* SIZE_ZERO = iBuilder->getSize(0);
    18     Type* int64PtrType = iBuilder->getInt64Ty()->getPointerTo();
    19 
    20     Value* tmpOffset = iBuilder->CreateURem(offset, iBuilder->getSize(this->getAnyStreamSetBuffer(bufferName)->getBufferBlocks() * iBuilder->getBitBlockWidth()));
    21     Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(bufferName, SIZE_ZERO), int64PtrType);
    22     Value* ptr2 = iBuilder->CreateGEP(outputRawPtr, tmpOffset);
    23 
    24     return iBuilder->CreateLoad(ptr2);
    25 }
     9
     10namespace kernel {
    2611
    2712void LZ4SwizzledMatchCopyKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    28 //void LZ4SwizzledMatchCopyKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value * const numOfStrides)  {
    29     // Const
    30     Constant *SIZE_ZERO = iBuilder->getSize(0);
    31     Constant *SIZE_ONE = iBuilder->getSize(1);
    32     Constant *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    33     Constant *SIZE_PDEP_WIDTH = iBuilder->getSize(mPDEPWidth);
    34 
    35     BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
    36 
    37     Value *totalItemCount = iBuilder->getAvailableItemCount("sourceStreamSet0");
    38     Value *itemsToDo = iBuilder->CreateSub(totalItemCount, iBuilder->getProcessedItemCount("sourceStreamSet0"));
    39 
    40     Value *isFinalBlock = iBuilder->CreateICmpULT(itemsToDo, iBuilder->getSize(4 * 1024 * 1024));
    41     this->mIsFinalBlock = isFinalBlock;
    42     iBuilder->setTerminationSignal(isFinalBlock);
    43 
    44     Value *previousProducedItemCount = iBuilder->getProducedItemCount("outputStreamSet0");
    45 
    46     // Space Calculation
    47     Value *outputBufferBlocks = iBuilder->getSize(
    48             this->getAnyStreamSetBuffer("outputStreamSet0")->getBufferBlocks());
    49 
    50     Value *outputBlocks = iBuilder->getSize(4 * 1024 * 1024 / iBuilder->getBitBlockWidth()); // Always be 4MB
    51 
    52 
    53     BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock");
    54     Value* isInputEnough = iBuilder->CreateOr(isFinalBlock, iBuilder->CreateICmpUGE(itemsToDo, iBuilder->getSize(4 * 1024 * 1024)));
    55 
    56     iBuilder->CreateCondBr(isInputEnough, processBlock, exitBlock);
    57 
    58     iBuilder->SetInsertPoint(processBlock);
     13
     14    ConstantInt * const SIZE_ZERO = iBuilder->getSize(0);
     15    ConstantInt * const SIZE_ONE = iBuilder->getSize(1);
     16    ConstantInt * const SIZE_PDEP_WIDTH = iBuilder->getSize(mPDEPWidth);
     17    ConstantInt * const SIZE_4_MEGS = iBuilder->getSize(4 * 1024 * 1024);
     18    ConstantInt * const SIZE_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     19
     20    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
     21
     22    Value * const available = iBuilder->getAvailableItemCount("sourceStreamSet0");
     23    Value * const processed = iBuilder->getProcessedItemCount("sourceStreamSet0");
     24
     25    Value * const itemsToDo = iBuilder->CreateUMin(iBuilder->CreateSub(available, processed), SIZE_4_MEGS);
     26
     27    iBuilder->setTerminationSignal(iBuilder->CreateICmpULT(itemsToDo, SIZE_4_MEGS));
     28
     29    Value * previousProducedItemCount = iBuilder->getProducedItemCount("outputStreamSet0");
     30
    5931    // Output Copy
    60     this->generateOutputCopy(iBuilder, outputBlocks);
    61 
    62     Value *newProducedItemCount = iBuilder->getProducedItemCount("outputStreamSet0");
    63 
    64     BasicBlock *copyEndBlock = iBuilder->CreateBasicBlock("copyEnd");
    65     iBuilder->CreateBr(copyEndBlock);
    66     iBuilder->SetInsertPoint(copyEndBlock);
     32    generateOutputCopy(iBuilder);
     33
     34    Value * const toProcessItemCount = iBuilder->CreateAdd(processed, itemsToDo);
    6735
    6836    // Match Copy
    69     BasicBlock *processExitBlock = iBuilder->CreateBasicBlock("exit_block");
    70 
    71     Value *initM0StartProcessIndex = iBuilder->getProcessedItemCount("m0Start");
    72     Value *totalM0StartItemsCount = iBuilder->getAvailableItemCount("m0Start");
    73 
    74     Value *initMatchOffset = iBuilder->getScalarField("pendingMatchOffset");
    75     Value *initMatchLength = iBuilder->getScalarField("pendingMatchLength");
    76     Value *initMatchPos = iBuilder->getScalarField("pendingMatchPos");
    77 
    78     BasicBlock *matchCopyLoopCon = iBuilder->CreateBasicBlock("matchCopyLoopCon");
     37    Value * const initM0StartProcessIndex = iBuilder->getProcessedItemCount("m0Start");
     38    Value * const totalM0StartItemsCount = iBuilder->getAvailableItemCount("m0Start");
     39
     40    Value * const initMatchOffset = iBuilder->getScalarField("pendingMatchOffset");
     41    Value * const initMatchLength = iBuilder->getScalarField("pendingMatchLength");
     42    Value * const initMatchPos = iBuilder->getScalarField("pendingMatchPos");
     43
     44    BasicBlock * const matchCopyLoopCon = iBuilder->CreateBasicBlock("matchCopyLoopCon");
    7945    iBuilder->CreateBr(matchCopyLoopCon);
    8046
    8147    iBuilder->SetInsertPoint(matchCopyLoopCon);
    82 
    83 
    84     PHINode *phiProcessIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
    85     phiProcessIndex->addIncoming(initM0StartProcessIndex, copyEndBlock);
    86 
    87     PHINode *phiMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
    88     phiMatchOffset->addIncoming(initMatchOffset, copyEndBlock);
    89 
    90     PHINode *phiMatchLength = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
    91     phiMatchLength->addIncoming(initMatchLength, copyEndBlock);
    92 
    93     PHINode *phiMatchPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
    94     phiMatchPos->addIncoming(initMatchPos, copyEndBlock);
    95 
    96     BasicBlock *loadNextMatchInfoConBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoConBlock");
    97     BasicBlock *loadNextMatchInfoBodyBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoBodyBlock");
    98 
    99     BasicBlock *matchCopyConBlock = iBuilder->CreateBasicBlock("matchCopyConBlock");
    100     BasicBlock *matchCopyBodyBlock = iBuilder->CreateBasicBlock("matchCopyBodyBlock");
    101 
    102 
    103     iBuilder->CreateCondBr(
    104             iBuilder->CreateICmpEQ(phiMatchLength, iBuilder->getSize(0)),
    105             loadNextMatchInfoConBlock,
    106             matchCopyConBlock
    107     );
    108 
     48    PHINode * const phiProcessIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
     49    phiProcessIndex->addIncoming(initM0StartProcessIndex, entryBlock);
     50    PHINode * const phiMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
     51    phiMatchOffset->addIncoming(initMatchOffset, entryBlock);
     52    PHINode * const phiMatchLength = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
     53    phiMatchLength->addIncoming(initMatchLength, entryBlock);
     54    PHINode * const phiMatchPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 3);
     55    phiMatchPos->addIncoming(initMatchPos, entryBlock);
     56
     57    BasicBlock * const loadNextMatchInfoConBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoConBlock");
     58    BasicBlock * const loadNextMatchInfoBodyBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoBodyBlock");
     59
     60    BasicBlock * const matchCopyConBlock = iBuilder->CreateBasicBlock("matchCopyConBlock");
     61    BasicBlock * const matchCopyBodyBlock = iBuilder->CreateBasicBlock("matchCopyBodyBlock");
     62
     63    iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(phiMatchLength, SIZE_ZERO), loadNextMatchInfoConBlock, matchCopyConBlock);
    10964
    11065    iBuilder->SetInsertPoint(loadNextMatchInfoConBlock);
    111 
    112 
    113 
    114     Value *hasMoreMatchInfo = iBuilder->CreateICmpULT(phiProcessIndex, totalM0StartItemsCount);
     66    Value * const hasMoreMatchInfo = iBuilder->CreateICmpULT(phiProcessIndex, totalM0StartItemsCount);
     67    BasicBlock * const processExitBlock = iBuilder->CreateBasicBlock("exit_block");
    11568    iBuilder->CreateCondBr(hasMoreMatchInfo, loadNextMatchInfoBodyBlock, processExitBlock);
    11669
    11770    iBuilder->SetInsertPoint(loadNextMatchInfoBodyBlock);
    11871
    119     Value *newM0Start = this->loadInt64NumberInput(iBuilder, "m0Start", phiProcessIndex);
    120     Value *newM0End = this->loadInt64NumberInput(iBuilder, "m0End", phiProcessIndex);
    121     Value *newMatchOffset = this->loadInt64NumberInput(iBuilder, "matchOffset", phiProcessIndex);
    122 
    123     Value *depositStart = newM0Start;
    124 
    125     Value *depositEnd = iBuilder->CreateAdd(newM0End, iBuilder->getInt64(1));
    126     Value *newMatchLength = iBuilder->CreateSub(depositEnd, depositStart);
    127     phiProcessIndex->addIncoming(iBuilder->CreateAdd(phiProcessIndex, SIZE_ONE), iBuilder->GetInsertBlock());
    128 
    129     phiMatchPos->addIncoming(depositStart, iBuilder->GetInsertBlock());
    130     phiMatchOffset->addIncoming(newMatchOffset, iBuilder->GetInsertBlock());
    131     phiMatchLength->addIncoming(newMatchLength, iBuilder->GetInsertBlock());
     72    Value * const newM0Start = loadOffset(iBuilder, "m0Start", phiProcessIndex);
     73    Value * const newM0End = loadOffset(iBuilder, "m0End", phiProcessIndex);
     74    Value * const newMatchOffset = loadOffset(iBuilder, "matchOffset", phiProcessIndex);
     75    Value * const newMatchLength = iBuilder->CreateAdd(iBuilder->CreateSub(newM0End, newM0Start), iBuilder->getInt64(1));
     76
     77    phiProcessIndex->addIncoming(iBuilder->CreateAdd(phiProcessIndex, SIZE_ONE), loadNextMatchInfoBodyBlock);
     78
     79    phiMatchPos->addIncoming(newM0Start, loadNextMatchInfoBodyBlock);
     80    phiMatchOffset->addIncoming(newMatchOffset, loadNextMatchInfoBodyBlock);
     81    phiMatchLength->addIncoming(newMatchLength, loadNextMatchInfoBodyBlock);
    13282
    13383    iBuilder->CreateBr(matchCopyLoopCon);
    13484
    135 
    13685    iBuilder->SetInsertPoint(matchCopyConBlock);
    137     Value *hasNotReachEnd = iBuilder->CreateICmpULT(phiMatchPos, newProducedItemCount);
    138 //    iBuilder->CallPrintInt("newProducedItemCount", newProducedItemCount);
     86
     87    Value * const hasNotReachEnd = iBuilder->CreateICmpULT(phiMatchPos, toProcessItemCount);
    13988    iBuilder->CreateCondBr(hasNotReachEnd, matchCopyBodyBlock, processExitBlock);
    14089
    14190    iBuilder->SetInsertPoint(matchCopyBodyBlock);
    14291
    143 
    144     Value* matchCopyFromPos = iBuilder->CreateSub(phiMatchPos, phiMatchOffset);
    145     Value* outputBufferSize = iBuilder->CreateMul(outputBufferBlocks, SIZE_BIT_BLOCK_WIDTH);
    146     Value* matchCopyFromOffset = iBuilder->CreateURem(matchCopyFromPos, outputBufferSize);
    147     Value* matchCopyFromBlockIndex = iBuilder->CreateUDiv(matchCopyFromOffset, SIZE_PDEP_WIDTH);
    148     Value* matchCopyFromBlockOffset = iBuilder->CreateURem(matchCopyFromOffset, SIZE_PDEP_WIDTH);
    149 
    150 
    151     Value* matchCopyTargetOffset = iBuilder->CreateURem(phiMatchPos, outputBufferSize);
    152     Value* matchCopyTargetBlockIndex = iBuilder->CreateUDiv(matchCopyTargetOffset, SIZE_PDEP_WIDTH);
    153     Value* matchCopyTargetBlockOffset = iBuilder->CreateURem(matchCopyTargetOffset, SIZE_PDEP_WIDTH);
    154 
    155 
    156     Value* matchCopyFromRemain = iBuilder->CreateSub(SIZE_PDEP_WIDTH, matchCopyFromBlockOffset);
    157     Value* matchCopyTargetRemain = iBuilder->CreateSub(SIZE_PDEP_WIDTH, matchCopyTargetBlockOffset);
    158 
    159     Value* currentCopySize = iBuilder->CreateUMin(matchCopyFromRemain, matchCopyTargetRemain);
     92    Value * const matchCopyTargetPos = iBuilder->CreateSub(phiMatchPos, previousProducedItemCount);
     93    Value * const matchCopyTargetBlockIndex = iBuilder->CreateUDiv(matchCopyTargetPos, SIZE_BLOCK_WIDTH);
     94    Value * const matchCopyTargetStreamIndex = iBuilder->CreateUDiv(iBuilder->CreateURem(matchCopyTargetPos, SIZE_BLOCK_WIDTH), SIZE_PDEP_WIDTH); // should SIZE_PDEP_WIDTH be SIZE_STREAM_COUNT?
     95    Value * const matchCopyTargetBlockOffset = iBuilder->CreateURem(phiMatchPos, SIZE_PDEP_WIDTH);
     96
     97    Value * const matchCopyFromPos = iBuilder->CreateSub(matchCopyTargetPos, phiMatchOffset);
     98    Value * const matchCopyFromBlockIndex = iBuilder->CreateUDiv(matchCopyFromPos, SIZE_BLOCK_WIDTH);
     99    Value * const matchCopyFromStreamIndex = iBuilder->CreateUDiv(iBuilder->CreateURem(matchCopyFromPos, SIZE_BLOCK_WIDTH), SIZE_PDEP_WIDTH);
     100    Value * const matchCopyFromBlockOffset = iBuilder->CreateURem(matchCopyFromPos, SIZE_PDEP_WIDTH);
     101
     102    Value * currentCopySize = iBuilder->CreateSub(SIZE_PDEP_WIDTH, iBuilder->CreateUMax(matchCopyFromBlockOffset, matchCopyTargetBlockOffset));
    160103    currentCopySize = iBuilder->CreateUMin(currentCopySize, phiMatchOffset);
    161104    currentCopySize = iBuilder->CreateUMin(currentCopySize, phiMatchLength);
    162     currentCopySize = iBuilder->CreateUMin(currentCopySize, iBuilder->CreateSub(newProducedItemCount, phiMatchPos));
     105    currentCopySize = iBuilder->CreateUMin(currentCopySize, iBuilder->CreateSub(toProcessItemCount, phiMatchPos));
    163106    currentCopySize = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentCopySize, SIZE_ZERO), SIZE_ONE, currentCopySize); //Workaround for the last byte
    164     Value* singleMask = iBuilder->CreateSub(
    165             iBuilder->CreateSelect( // When currentCopySize == SIZE_PDEP_WIDTH, shl will cause overflow
    166                     iBuilder->CreateICmpEQ(currentCopySize, SIZE_PDEP_WIDTH),
    167                     SIZE_ZERO,
    168                     iBuilder->CreateShl(SIZE_ONE, iBuilder->CreateAdd(matchCopyFromBlockOffset, currentCopySize))
    169             ),
    170             iBuilder->CreateShl(SIZE_ONE, matchCopyFromBlockOffset)
    171     );
    172     Value* fullMask = iBuilder->simd_fill(mPDEPWidth, singleMask);
    173 
    174     for (int i = 0; i < mStreamSize; i++) {
    175         Value* rawOutputBasePtr = iBuilder->getRawOutputPointer("outputStreamSet" + std::to_string(i), SIZE_ZERO);
    176         rawOutputBasePtr = iBuilder->CreatePointerCast(rawOutputBasePtr, iBuilder->getBitBlockType()->getPointerTo());
    177         Value* matchCopyFromBlockPtr = iBuilder->CreateGEP(rawOutputBasePtr, matchCopyFromBlockIndex);
    178 
    179         Value* fromBlockValue = iBuilder->CreateLoad(matchCopyFromBlockPtr);
    180 
    181         Value* copiedValue = iBuilder->simd_and(fromBlockValue, fullMask);
    182 
    183         Value* outputTargetBlockPtr = iBuilder->CreateGEP(rawOutputBasePtr, matchCopyTargetBlockIndex);
    184 
    185 //        iBuilder->CallPrintInt("outputTargetBlockPtr", outputTargetBlockPtr);
    186         Value* targetOriginalValue = iBuilder->CreateLoad(outputTargetBlockPtr);
    187 
    188         Value* finalValue = iBuilder->simd_or(
    189                 targetOriginalValue,
    190                 iBuilder->CreateShl(
    191                         iBuilder->CreateLShr(
    192                                 copiedValue,
    193                                 iBuilder->simd_fill(mPDEPWidth, matchCopyFromBlockOffset)
    194                         ),
    195                         iBuilder->simd_fill(mPDEPWidth, matchCopyTargetBlockOffset)
    196                 )
    197         );
    198 
    199 
    200 //        iBuilder->CallPrintRegister("targetOriginalValue", targetOriginalValue);
    201 //        iBuilder->CallPrintRegister("finalValue", finalValue);
    202 //        iBuilder->CallPrintInt("matchCopyTargetBlockOffset", matchCopyTargetBlockOffset);
    203 //        iBuilder->CallPrintInt("currentCopySize", currentCopySize);
     107
     108    Value * const shiftOffset = iBuilder->CreateAdd(matchCopyFromBlockOffset, currentCopySize);
     109    Value * highOffset = iBuilder->CreateShl(SIZE_ONE, shiftOffset);
     110    highOffset = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentCopySize, SIZE_PDEP_WIDTH), SIZE_ZERO, highOffset); // When currentCopySize == SIZE_PDEP_WIDTH, shl will overflow
     111    Value * const lowOffset = iBuilder->CreateShl(SIZE_ONE, matchCopyFromBlockOffset);
     112    Value * const maskVector = iBuilder->simd_fill(mPDEPWidth, iBuilder->CreateSub(highOffset, lowOffset));
     113    Value * const fromBlockOffsetVector = iBuilder->simd_fill(mPDEPWidth, matchCopyFromBlockOffset);
     114    Value * const targetBlockOffsetVector = iBuilder->simd_fill(mPDEPWidth, matchCopyTargetBlockOffset);
     115
     116    for (unsigned i = 0; i < mStreamSize; i++) {
     117        Value * const matchCopyFromBlockPtr = iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), matchCopyFromStreamIndex, matchCopyFromBlockIndex);
     118        Value * const fromBlockValue = iBuilder->CreateBlockAlignedLoad(matchCopyFromBlockPtr);
     119
     120        Value * const outputTargetBlockPtr = iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), matchCopyTargetStreamIndex, matchCopyTargetBlockIndex);
     121        Value * const targetOriginalValue = iBuilder->CreateBlockAlignedLoad(outputTargetBlockPtr);
     122
     123        Value * copiedValue = iBuilder->simd_and(fromBlockValue, maskVector);
     124        copiedValue = iBuilder->CreateLShr(copiedValue, fromBlockOffsetVector);
     125        copiedValue = iBuilder->CreateShl(copiedValue, targetBlockOffsetVector);
     126        Value * const finalValue = iBuilder->CreateOr(targetOriginalValue, copiedValue);
     127
    204128        iBuilder->CreateStore(finalValue, outputTargetBlockPtr);
    205129    }
    206130
    207     phiProcessIndex->addIncoming(phiProcessIndex, iBuilder->GetInsertBlock());
    208     phiMatchOffset->addIncoming(phiMatchOffset, iBuilder->GetInsertBlock());
    209     phiMatchPos->addIncoming(iBuilder->CreateAdd(phiMatchPos, currentCopySize), iBuilder->GetInsertBlock());
    210     phiMatchLength->addIncoming(iBuilder->CreateSub(phiMatchLength, currentCopySize), iBuilder->GetInsertBlock());
     131    phiProcessIndex->addIncoming(phiProcessIndex, matchCopyBodyBlock);
     132    phiMatchOffset->addIncoming(phiMatchOffset, matchCopyBodyBlock);
     133    phiMatchPos->addIncoming(iBuilder->CreateAdd(phiMatchPos, currentCopySize), matchCopyBodyBlock);
     134    phiMatchLength->addIncoming(iBuilder->CreateSub(phiMatchLength, currentCopySize), matchCopyBodyBlock);
    211135
    212136    iBuilder->CreateBr(matchCopyLoopCon);
     
    219143    iBuilder->setProcessedItemCount("m0End", phiProcessIndex);
    220144    iBuilder->setProcessedItemCount("matchOffset", phiProcessIndex);
    221 
    222     iBuilder->CreateBr(exitBlock);
    223     iBuilder->SetInsertPoint(exitBlock);
    224 //    iBuilder->CallPrintInt("totalM0StartItemsCount", totalM0StartItemsCount);
    225 }
    226 
    227 void LZ4SwizzledMatchCopyKernel::generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* outputBlocks) {
    228     Value *SIZE_ZERO = iBuilder->getSize(0);
    229     Value *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    230     Constant *INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(iBuilder->getBitBlockWidth());
    231     Type* bytePtrType = iBuilder->getInt8PtrTy();
    232 
    233     Value *previousProcessed = iBuilder->getProcessedItemCount("sourceStreamSet0");
    234 
    235     Value *itemsToDo = iBuilder->CreateSub(iBuilder->getAvailableItemCount("sourceStreamSet0"), iBuilder->getProcessedItemCount("sourceStreamSet0"));
    236     Value *copySize = iBuilder->CreateMul(outputBlocks, SIZE_BIT_BLOCK_WIDTH);
    237     Value* actualCopySize = iBuilder->CreateUMin(itemsToDo, copySize);
    238     Value* copyByte = iBuilder->CreateUDivCeil(iBuilder->CreateMul(copySize, iBuilder->getSize(mStreamCount)), iBuilder->getSize(8)); // i8
    239 
    240     Value* outputBufferSize = iBuilder->getSize(this->getAnyStreamSetBuffer("sourceStreamSet0")->getBufferBlocks() * iBuilder->getBitBlockWidth());
    241     Value* inputOffset = iBuilder->CreateMul(
    242             iBuilder->CreateAnd(iBuilder->CreateURem(previousProcessed, outputBufferSize), ConstantExpr::getNeg(INT64_BIT_BLOCK_WIDTH)), iBuilder->getInt64(mStreamCount)
    243     );
    244 
    245     for (int i = 0; i < mStreamSize; i++) {
    246 
    247         Value * inputBasePtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer("sourceStreamSet" + std::to_string(i), inputOffset), iBuilder->getBitBlockType()->getPointerTo());
    248         Value * outputBasePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("outputStreamSet" + std::to_string(i), inputOffset), iBuilder->getBitBlockType()->getPointerTo());
    249 
    250         iBuilder->CreateMemCpy(
    251                 iBuilder->CreatePointerCast(outputBasePtr, bytePtrType),
    252                 iBuilder->CreatePointerCast(inputBasePtr, bytePtrType),
    253                 copyByte,
    254                 1 // Not align guaranteed in final block
    255         );
     145    iBuilder->setProcessedItemCount("sourceStreamSet0", toProcessItemCount);
     146}
     147
     148void LZ4SwizzledMatchCopyKernel::generateOutputCopy(const std::unique_ptr<KernelBuilder> & iBuilder) {
     149    Constant * SIZE_ZERO = iBuilder->getSize(0);
     150    Constant * COPY_BYTES = iBuilder->getSize(4 * 1024 * 1024 * mStreamCount / 8);
     151    for (unsigned i = 0; i < mStreamSize; i++) {
     152        Value * inputBasePtr = iBuilder->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), SIZE_ZERO);
     153        Value * outputBasePtr = iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_ZERO);
     154        iBuilder->CreateMemCpy(outputBasePtr, inputBasePtr, COPY_BYTES, 1); // Not align guaranteed in final block
    256155    }
    257     Value *newProcessed = iBuilder->CreateAdd(previousProcessed, actualCopySize);
    258     iBuilder->setProcessedItemCount("sourceStreamSet0", newProcessed);
    259 //    iBuilder->CallPrintInt("swizzledMatchCopy:newProcessed", newProcessed);
    260     iBuilder->setProducedItemCount("outputStreamSet0", newProcessed);
     156}
     157
     158Value* LZ4SwizzledMatchCopyKernel::loadOffset(const std::unique_ptr<KernelBuilder> & iBuilder, const std::string & bufferName, Value* offset) {
     159    return iBuilder->CreateLoad(iBuilder->getRawInputPointer(bufferName, offset));
    261160}
    262161
    263162LZ4SwizzledMatchCopyKernel::LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned streamCount/*=4*/, unsigned streamSize/*=2*/, unsigned swizzleFactor/*=4*/, unsigned PDEP_width/*64*/)
    264         : SegmentOrientedKernel("LZ4SwizzledMatchCopyKernel",
    265         // Inputs
    266                            {
    267                                    Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), {DisableTemporaryBuffer(), DisableAvailableItemCountAdjustment(), DisableSufficientChecking()}},
    268                                    Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), {DisableTemporaryBuffer(), DisableAvailableItemCountAdjustment(), DisableSufficientChecking()}},
    269                                    Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), {DisableTemporaryBuffer(), DisableAvailableItemCountAdjustment(), DisableSufficientChecking()}},
    270 
    271                            },
    272         // Outputs
    273                            {},
    274         // Arguments
    275                            {
    276                                    Binding{iBuilder->getSizeTy(), "fileSize"} //TODO remove
    277                            },
    278                            {},
    279                            {
    280                                    Binding{iBuilder->getSizeTy(), "currentProcessIndex"},
    281                                    Binding{iBuilder->getSizeTy(), "pendingMatchPos"},
    282                                    Binding{iBuilder->getSizeTy(), "pendingMatchOffset"},
    283                                    Binding{iBuilder->getSizeTy(), "pendingMatchLength"},
    284                            })
    285         , mSwizzleFactor(swizzleFactor)
    286         , mPDEPWidth(PDEP_width)
    287         , mStreamSize(streamSize)
    288         , mStreamCount(streamCount) {
     163: SegmentOrientedKernel("LZ4SwizzledMatchCopyKernel",
     164// Inputs
     165{
     166       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), DisableSufficientChecking()},
     167       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), DisableSufficientChecking()},
     168       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), DisableSufficientChecking()},
     169},
     170// Outputs
     171{},
     172// Arguments
     173{
     174       Binding{iBuilder->getSizeTy(), "fileSize"} //TODO remove
     175},
     176{},
     177{
     178       Binding{iBuilder->getSizeTy(), "currentProcessIndex"},
     179       Binding{iBuilder->getSizeTy(), "pendingMatchPos"},
     180       Binding{iBuilder->getSizeTy(), "pendingMatchOffset"},
     181       Binding{iBuilder->getSizeTy(), "pendingMatchLength"},
     182})
     183, mSwizzleFactor(swizzleFactor)
     184, mPDEPWidth(PDEP_width)
     185, mStreamSize(streamSize)
     186, mStreamCount(streamCount) {
    289187
    290188    assert((mSwizzleFactor == (iBuilder->getBitBlockWidth() / PDEP_width)) && "swizzle factor must equal bitBlockWidth / PDEP_width");
    291189    assert((mPDEPWidth == 64 || mPDEPWidth == 32) && "PDEP width must be 32 or 64");
    292     this->setStride(4 * 1024 * 1024);
     190    setStride(4 * 1024 * 1024);
    293191    addAttribute(MustExplicitlyTerminate());
    294192
    295     mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", BoundedRate(0, 1), {Swizzled(), DisableTemporaryBuffer()}});
    296     mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", BoundedRate(0, 1), DisableTemporaryBuffer()});
    297 
    298     for (int i = 1; i < streamSize; i++) {
    299         mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0"), {Swizzled(), DisableTemporaryBuffer()}});
    300         mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("outputStreamSet0"), DisableTemporaryBuffer()});
     193    mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", BoundedRate(0, 1), Swizzled()});
     194    mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", RateEqualTo("sourceStreamSet0")});
     195
     196    for (unsigned i = 1; i < streamSize; i++) {
     197        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0"), Swizzled()});
     198        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0")});
    301199    }
    302200}
     201
     202}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.h

    r5966 r5967  
    1212
    1313namespace kernel {
    14     class LZ4SwizzledMatchCopyKernel final: public SegmentOrientedKernel {
     14    class LZ4SwizzledMatchCopyKernel: public SegmentOrientedKernel {
    1515    public:
    1616        LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned streamCount, unsigned streamSize, unsigned swizzleFactor, unsigned PDEP_width = 64);
    1717    protected:
    18 //        void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value * const numOfStrides) override;
     18
    1919        void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) override;
     20
     21        void generateOutputCopy(const std::unique_ptr<KernelBuilder> & iBuilder);
     22
     23        llvm::Value * loadOffset(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string & bufferName, llvm::Value* offset);
     24
    2025    private:
    2126
     
    2429        const unsigned mStreamSize;
    2530        const unsigned mStreamCount;
    26 
    27 
    28 
    29         void generateOutputCopy(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* outputBlocks);
    30 
    31         llvm::Value* mIsFinalBlock;
    32         llvm::Value* loadInt64NumberInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string bufferName, llvm::Value* offset);
    3331    };
    3432}
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5948 r5967  
    694694        Constant * const segmentLength = b->getSize(strideLength * codegen::SegmentSize);
    695695
    696         if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts) && !isa<SourceBuffer>(buffer))) {
    697             b->CreateAssert(b->CreateICmpULE(segmentLength, b->getCapacity(name)),
    698                             kernel->getName() + ": " + name + " upper bound of segment length exceeds buffer capacity");
    699         }
     696//        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts) && !isa<SourceBuffer>(buffer))) {
     697//            b->CreateAssert(b->CreateICmpULE(segmentLength, b->getCapacity(name)),
     698//                            kernel->getName() + ": " + name + " upper bound of segment length exceeds buffer capacity");
     699//        }
    700700
    701701//        Value * limit = nullptr;
Note: See TracChangeset for help on using the changeset viewer.