Changeset 5982 for icGREP/icgrep-devel


Ignore:
Timestamp:
Apr 19, 2018, 11:18:53 PM (15 months ago)
Author:
xwa163
Message:

Improve swizzled match copy kernel

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp

    r5981 r5982  
    188188    Value * const matchCopyFromPos = iBuilder->CreateSub(phiMatchPos, phiMatchOffset);
    189189
     190
    190191    Value* matchCopyFromLocalBlockIndex = iBuilder->CreateURem(iBuilder->CreateUDiv(matchCopyFromPos, SIZE_BLOCK_WIDTH), outputBufferBlocks);
    191192    Value * const matchCopyFromStreamIndex = iBuilder->CreateURem(iBuilder->CreateUDiv(matchCopyFromPos, SIZE_PDEP_WIDTH), iBuilder->getSize(mStreamCount));
    192193    Value * const matchCopyFromBlockOffset = iBuilder->CreateURem(matchCopyFromPos, SIZE_PDEP_WIDTH);
     194
     195    Value* fromBlockRemain = iBuilder->CreateSub(SIZE_PDEP_WIDTH, matchCopyFromBlockOffset);
    193196
    194197    Value * currentCopySize = iBuilder->CreateSub(SIZE_PDEP_WIDTH, iBuilder->CreateUMax(matchCopyFromBlockOffset, matchCopyTargetBlockOffset));
     
    197200    currentCopySize = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentCopySize, SIZE_ZERO), SIZE_ONE, currentCopySize); //Workaround for the last byte
    198201
    199     Value * const shiftOffset = iBuilder->CreateAdd(matchCopyFromBlockOffset, currentCopySize);
    200     Value * highOffset = iBuilder->CreateShl(SIZE_ONE, shiftOffset);
    201     highOffset = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentCopySize, SIZE_PDEP_WIDTH), SIZE_ZERO, highOffset); // When currentCopySize == SIZE_PDEP_WIDTH, shl will overflow
    202     Value * const lowOffset = iBuilder->CreateShl(SIZE_ONE, matchCopyFromBlockOffset);
    203     Value * const maskVector = iBuilder->simd_fill(mPDEPWidth, iBuilder->CreateSub(highOffset, lowOffset));
     202    Value * newCurrentCopySize = iBuilder->CreateSub(SIZE_PDEP_WIDTH, matchCopyTargetBlockOffset);
     203    newCurrentCopySize = iBuilder->CreateUMin(newCurrentCopySize, phiMatchOffset);
     204    newCurrentCopySize = iBuilder->CreateUMin(newCurrentCopySize, phiMatchLength);
     205
    204206    Value * const fromBlockOffsetVector = iBuilder->simd_fill(mPDEPWidth, matchCopyFromBlockOffset);
    205     Value * const targetBlockOffsetVector = iBuilder->simd_fill(mPDEPWidth, matchCopyTargetBlockOffset);
     207    Value * const fromBlockRemainVector = iBuilder->simd_fill(mPDEPWidth, fromBlockRemain);
     208
     209    Value * const targetLeftShiftVector = iBuilder->simd_fill(mPDEPWidth, iBuilder->CreateSub(SIZE_PDEP_WIDTH, newCurrentCopySize));
     210    Value * const targetRightShiftVector = iBuilder->simd_fill(mPDEPWidth, iBuilder->CreateSub(SIZE_PDEP_WIDTH, iBuilder->CreateAdd(newCurrentCopySize, matchCopyTargetBlockOffset)));
    206211
    207212    for (unsigned i = 0; i < mStreamSize; i++) {
     
    210215        Value * const matchCopyFromBlockPtr = iBuilder->CreateGEP(basePtr, iBuilder->CreateAdd(iBuilder->CreateMul(matchCopyFromLocalBlockIndex, iBuilder->getSize(mStreamCount)), matchCopyFromStreamIndex));
    211216        Value * const fromBlockValue = iBuilder->CreateBlockAlignedLoad(matchCopyFromBlockPtr);
     217        Value * const fromNextBlockValue = iBuilder->CreateBlockAlignedLoad(iBuilder->CreateGEP(matchCopyFromBlockPtr, iBuilder->CreateSelect(iBuilder->CreateICmpULE(newCurrentCopySize, fromBlockRemain), SIZE_ZERO, SIZE_ONE)));
     218
     219        Value * allFromValue = iBuilder->CreateOr(
     220                iBuilder->CreateLShr(fromBlockValue, fromBlockOffsetVector),
     221                iBuilder->CreateShl(fromNextBlockValue, fromBlockRemainVector)
     222        );
     223        Value * allTargetValue = iBuilder->CreateLShr(iBuilder->CreateShl(allFromValue, targetLeftShiftVector), targetRightShiftVector);
    212224
    213225        Value * const outputTargetBlockPtr = iBuilder->CreateGEP(basePtr, iBuilder->CreateAdd(iBuilder->CreateMul(matchPosLocalBlockIndex, iBuilder->getSize(mStreamCount)), matchCopyTargetStreamIndex));
    214226        Value * const targetOriginalValue = iBuilder->CreateBlockAlignedLoad(outputTargetBlockPtr);
    215227
    216         Value * copiedValue = iBuilder->simd_and(fromBlockValue, maskVector);
    217         copiedValue = iBuilder->CreateLShr(copiedValue, fromBlockOffsetVector);
    218         copiedValue = iBuilder->CreateShl(copiedValue, targetBlockOffsetVector);
    219         Value * const finalValue = iBuilder->CreateOr(targetOriginalValue, copiedValue);
     228        Value * const finalValue = iBuilder->CreateOr(targetOriginalValue, allTargetValue);
    220229
    221230        iBuilder->CreateStore(finalValue, outputTargetBlockPtr);
Note: See TracChangeset for help on using the changeset viewer.