Changeset 6020


Ignore:
Timestamp:
May 7, 2018, 1:15:37 PM (2 weeks ago)
Author:
xwa163
Message:
  1. New version of lz4_swizzled_match_copy kernel with higher performance
  2. Adjust related pipeline code
  3. Remove legacy comments
Location:
icGREP/icgrep-devel/icgrep
Files:
19 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5985 r6020  
    102102add_library(UCDlib UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    103103add_library(GrepEngine  ${GREP_CORE_SRC} grep/grep_engine.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/charclasses.cpp kernels/streams_merge.cpp kernels/until_n.cpp kernels/UCD_property_kernel.cpp kernels/grapheme_kernel.cpp)
    104 add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp kernels/lz4/lz4_match_copy_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_multiple_pdep_kernel.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp lz4/LZ4GrepEngine.cpp)
     104add_library(LZ4_Lib lz4FrameDecoder.cpp kernels/cc_kernel.cpp kernels/lz4/lz4_deposit_uncompressed.cpp kernels/lz4/lz4_generate_deposit_stream.cpp kernels/pdep_kernel.cpp kernels/lz4/lz4_match_copy_kernel.cpp lz4/LZ4Generator.cpp kernels/lz4/lz4_multiple_pdep_kernel.cpp kernels/lz4/lz4_block_decoder.cpp kernels/lz4/lz4_index_builder.cpp lz4/LZ4GrepEngine.cpp kernels/lz4/lz4_swizzled_match_copy_kernel.cpp)
     105
    105106
    106107# force the compiler to compile the object cache to ensure that the versioning information is up to date
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder.cpp

    r5984 r6020  
    1 //
    2 // Created by wxy325 on 2018/3/16.
    3 //
     1
    42
    53#include "lz4_block_decoder.h"
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_deposit_uncompressed.cpp

    r5939 r6020  
    1 //
    2 // Created by wxy325 on 2017/7/9.
    3 //
     1
    42
    53#include "lz4_deposit_uncompressed.h"
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_deposit_uncompressed.h

    r5864 r6020  
    1 //
    2 // Created by wxy325 on 2017/7/9.
    3 //
    41
    52#ifndef ICGREP_LZ4_DEPOSIT_UNCOMPRESSED_H
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_generate_deposit_stream.cpp

    r5864 r6020  
    1 //
    2 // Created by wxy325 on 2017/7/10.
    3 //
    41
    52#include <pablo/builder.hpp>
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r5985 r6020  
    1 //
    2 // Created by wxy325 on 2018/3/16.
    3 //
    41
    52#include "lz4_index_builder.h"
     
    4340           Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)},
    4441           Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
    45            Binding{iBuilder->getStreamSetTy(1, 1), "M0CountMarker", BoundedRate(0, 1)},
    4642           Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", RateEqualTo("byteStream")}
    4743    },
     
    6258
    6359    void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
    64 //        iBuilder->CallPrintInt("IndexBuilder:entry", iBuilder->getSize(0));
    65 
    6660        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
    6761        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
     
    170164        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
    171165
    172         Value* matchLengthStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
     166        Value* extendMatchStartPos = iBuilder->CreateAdd(offsetPos, INT64_ONE);
    173167        Value* extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
    174168
     
    181175
    182176        //ExtendMatchBodyBlock
    183         Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(matchLengthStartPos, INT64_ONE), blockEnd);
     177        Value* newCursorPos = this->advanceUntilNextZero(iBuilder, "extender", iBuilder->CreateAdd(extendMatchStartPos, INT64_ONE), blockEnd);
    184178        BasicBlock* advanceFinishBlock = iBuilder->GetInsertBlock();
    185179
    186         // ----May be in a different segment now
    187180        iBuilder->CreateBr(extendMatchExitBlock);
    188181
     
    191184        PHINode* phiCursorPosAfterMatch = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
    192185        phiCursorPosAfterMatch->addIncoming(newCursorPos, advanceFinishBlock);
    193         phiCursorPosAfterMatch->addIncoming(matchLengthStartPos, entryBlock);
    194 
    195         Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
    196         extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
    197         Value* matchExtensionSize = iBuilder->CreateSelect(
    198                 iBuilder->CreateICmpEQ(extendedMatchValue, iBuilder->getInt1(true)),
    199                 oldMatchExtensionSize,
    200                 iBuilder->getSize(0)
    201         );
     186        phiCursorPosAfterMatch->addIncoming(extendMatchStartPos, entryBlock);
     187
     188        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, extendMatchStartPos);
     189//        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
     190        Value* matchExtensionSize = iBuilder->CreateSelect(extendedMatchValue, oldMatchExtensionSize, iBuilder->getSize(0));
    202191        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
    203192        Value* matchLength = iBuilder->CreateAdd(matchLengthBase, iBuilder->getInt64(4));
     
    235224
    236225
    237         iBuilder->setProducedItemCount("M0CountMarker", iBuilder->CreateAdd(iBuilder->getProducedItemCount("M0CountMarker"), iBuilder->getSize(1)));
     226
    238227        this->markCircularOutputBitstream(iBuilder, "MatchOffsetMarker", offsetPos);
    239228        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
     
    244233
    245234    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
    246         // Constant
    247235        Value* clearPos = iBuilder->getScalarField("compressedSpaceClearPos");
    248236        // We can not only clear [blockStart, blockEnd), since there are 4 bytes between blockEnd and nextBlockStart
     
    372360
    373361    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value * globalOffset) {
    374         Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(getStride());
     362//        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(getStride());
     363        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(this->getInputStreamSetBuffer(inputBufferName)->getBufferBlocks() * iBuilder->getBitBlockWidth());
    375364        Value * processed = iBuilder->getProcessedItemCount(inputBufferName);
    376365        processed = iBuilder->CreateAnd(processed, ConstantExpr::getNeg(SIZE_STRIDE_SIZE));
     
    389378        fieldValue = iBuilder->CreateAdd(fieldValue, value);
    390379        iBuilder->setScalarField(fieldName, fieldValue);
    391     }
    392 
    393     void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
    394                                                           const string & outputBufferName,
    395                                                           Value * value) {
    396 
    397         Value * outputOffset = iBuilder->getProducedItemCount(outputBufferName);
    398         Value * outputRawPtr = iBuilder->getRawOutputPointer(outputBufferName, outputOffset);
    399         iBuilder->CreateStore(value, outputRawPtr);
    400         iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, iBuilder->getSize(1)));
    401380    }
    402381
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.h

    r5974 r6020  
    1 //
    2 // Created by wxy325 on 2018/3/16.
    3 //
    41
    52#ifndef ICGREP_LZ4_INDEX_BUILDER_H
     
    5249                     llvm::Value *blockEnd);
    5350
    54         void generateStoreNumberOutput(const std::unique_ptr<KernelBuilder> &iBuilder,
    55                                        const std::string &outputBufferName,
    56                                        llvm::Value *value);
    5751
    5852        void clearCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_multiple_pdep_kernel.cpp

    r5985 r6020  
    1 //
    2 // Created by wxy325 on 2018/2/9.
    3 //
     1
    42
    53#include "lz4_multiple_pdep_kernel.h"
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_multiple_pdep_kernel.h

    r5873 r6020  
    1 //
    2 // Created by wxy325 on 2018/2/9.
    3 //
    41
    52#ifndef ICGREP_LZ4_MULTIPLE_PDEP_KERNEL_H
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.cpp

    r5985 r6020  
    1 //
    2 // Created by wxy325 on 2018/3/9.
    3 //
     1
    42
    53#include "lz4_swizzled_match_copy_kernel.h"
     
    108#include <llvm/Support/raw_ostream.h>
    119
     10
    1211using namespace llvm;
    1312using namespace std;
     
    7271}
    7372
    74 pair<Value*, Value*> LZ4SwizzledMatchCopyKernel::loadNextM0StartEnd(const unique_ptr<KernelBuilder> &iBuilder) {
    75     Value* initCurrentPos = iBuilder->getScalarField("currentM0MarkerPos");
    76     Value* m0Start = this->advanceUntilNextBit(iBuilder, "M0Marker", initCurrentPos, true);
    77     Value* m0End = this->advanceUntilNextBit(iBuilder, "M0Marker", m0Start, false);
    78     return std::make_pair(m0Start, m0End);
    79 };
    80 
    8173
    8274void LZ4SwizzledMatchCopyKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     75    // ---- Contant
    8376    ConstantInt * const SIZE_4_MEGS = iBuilder->getSize(4 * 1024 * 1024);
    84 
     77    ConstantInt * const SIZE_0 = iBuilder->getSize(0);
     78    ConstantInt * const SIZE_1 = iBuilder->getSize(1);
     79    ConstantInt * const SIZE_64 = iBuilder->getSize(64);
     80    ConstantInt * const INT64_0 = iBuilder->getInt64(0);
     81    ConstantInt * const INT64_1 = iBuilder->getInt64(1);
     82
     83    Value * BITBLOCK_0 = iBuilder->CreateBitCast(ConstantInt::get(iBuilder->getIntNTy(iBuilder->getBitBlockWidth()), 0), iBuilder->getBitBlockType());
     84
     85    // ---- Type
     86    Type* BITBLOCK_TYPE = iBuilder->getBitBlockType();
     87    Type* BITBLOCK_PTR_TYPE = BITBLOCK_TYPE->getPointerTo();
     88    Type* I64_TY = iBuilder->getInt64Ty();
     89    Type* I64_PTR_TY = I64_TY->getPointerTo();
     90
     91    Value * PDEP_func = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_bmi_pdep_64); //TODO for now only consider 64 bits
     92
     93    // ---- EntryBlock
    8594    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
     95    BasicBlock * const exitBlock = iBuilder->CreateBasicBlock("exitBlock");
    8696
    8797    Value * const available = iBuilder->getAvailableItemCount("sourceStreamSet0");
     
    92102
    93103
    94     // Output Copy
    95     generateOutputCopy(iBuilder);
    96 
    97     Value * const toProcessItemCount = iBuilder->CreateAdd(processed, itemsToDo);
    98 
    99     // Match Copy
    100     Value *initM0StartProcessIndex = iBuilder->getProcessedItemCount("M0CountMarker");
    101     Value *totalM0StartItemsCount = iBuilder->getAvailableItemCount("M0CountMarker");
    102 
    103     BasicBlock * const matchCopyLoopCon = iBuilder->CreateBasicBlock("matchCopyLoopCon");
    104     BasicBlock * const processExitBlock = iBuilder->CreateBasicBlock("exit_block");
    105 
    106     BasicBlock * const loadNextMatchInfoBodyBlock = iBuilder->CreateBasicBlock("loadNewMatchInfoBodyBlock");
    107     BasicBlock * const matchCopyConBlock = iBuilder->CreateBasicBlock("matchCopyConBlock");
    108     BasicBlock * const matchCopyBodyBlock = iBuilder->CreateBasicBlock("matchCopyBodyBlock");
    109 
    110 
     104    Value* m0MarkerBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("M0Marker", SIZE_0), I64_PTR_TY); // i64*
     105    vector<Value*> sourceStreamBasePtrs, outputStreamBasePtrs; // <4 * i64>*
     106    for (int i = 0; i < mStreamSize; i++) {
     107        sourceStreamBasePtrs.push_back(iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
     108        outputStreamBasePtrs.push_back(iBuilder->CreatePointerCast(iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_0), BITBLOCK_PTR_TYPE));
     109    }
     110
     111
     112    BasicBlock * const processLoopCon = iBuilder->CreateBasicBlock("processLoopCon");
     113    BasicBlock * const processLoopBody = iBuilder->CreateBasicBlock("processLoopBody");
     114    BasicBlock * const processLoopExit = iBuilder->CreateBasicBlock("processLoopExit");
     115
     116    iBuilder->CreateBr(processLoopCon);
     117
     118    // ---- ProcessLoopCon
     119    // ProcessLoop will process one block of data each time (64bit m0, <4 * i64> input and output data)
     120    iBuilder->SetInsertPoint(processLoopCon);
     121
     122    // carryBit === 0x1 only when the most significant bit of the target M0 block is one, which means the first position of next block need to be deposited (match copy)
     123
     124    PHINode* phiCarryBit = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2);
     125    PHINode* phiCurrentPosition = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 2); // 0~4mb, and all M0 related
     126    PHINode* phiCarryMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     127
     128    phiCarryBit->addIncoming(INT64_0, entryBlock);
     129    phiCurrentPosition->addIncoming(INT64_0, entryBlock);
     130    phiCarryMatchOffset->addIncoming(SIZE_0, entryBlock);
     131
     132    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpULT(phiCurrentPosition, itemsToDo), processLoopBody, processLoopExit);
     133
     134    // ---- ProcessLoopBody
     135    iBuilder->SetInsertPoint(processLoopBody);
     136
     137    Value* dataBlockIndex = iBuilder->CreateUDiv(phiCurrentPosition, SIZE_64);
     138    Value* currentInitM0 = iBuilder->CreateLoad(iBuilder->CreateGEP(m0MarkerBasePtr, dataBlockIndex));
     139    vector<Value*> initSourceData;
     140    for (int i = 0; i < mStreamSize; i++) {
     141        // Because of swizzled form, the sourceStream can be accessed linearly
     142        initSourceData.push_back(iBuilder->CreateLoad(iBuilder->CreateGEP(sourceStreamBasePtrs[i], dataBlockIndex)));
     143    }
     144
     145    BasicBlock* carryBitProcessBlock = iBuilder->CreateBasicBlock("CarryBitProcessBlock");
     146
     147    BasicBlock* matchCopyLoopCon = iBuilder->CreateBasicBlock("MatchCopyLoopCon");
     148    BasicBlock* matchCopyLoopBody = iBuilder->CreateBasicBlock("MatchCopyLoopBody");
     149    BasicBlock* matchCopyLoopExit = iBuilder->CreateBasicBlock("MatchCopyLoopExit");
     150
     151    //
     152    // The carry bit will need to be processed specially only when
     153    // the most significant bit of previous block is 1 (the carry bit is 0x1) and the
     154    // least significant bit of current block is 0
     155    // e.g.
     156    //   Assume the most significant bit is on the right side
     157    //
     158    //                    i64_1       i64_2
     159    //   M0         ... 0000 0011 | 0111 0000 ...  - Carry bit need to be handle specially
     160    //   M0         ... 0000 0011 | 1011 0000 ...  - Carry bit will be handle in the loop of i64_2
     161    //   Carry Bit                  1000 0000 ...  - 0x1
     162
     163    Value* needProcessCarryBit = iBuilder->CreateAnd(phiCarryBit, iBuilder->CreateNot(iBuilder->CreateAnd(currentInitM0, iBuilder->getInt64(1))));
     164    needProcessCarryBit = iBuilder->CreateICmpNE(needProcessCarryBit, INT64_0);
     165
     166    iBuilder->CreateUnlikelyCondBr(needProcessCarryBit, carryBitProcessBlock, matchCopyLoopCon);
     167
     168    // ---- CarryBitProcessBlock
     169    iBuilder->SetInsertPoint(carryBitProcessBlock);
     170    vector<Value*> initSourceDataWithCarry;
     171    Value* carryCopyFromPos = iBuilder->CreateSub(phiCurrentPosition, phiCarryMatchOffset);
     172    Value* carryCopyFromBlockIndex = iBuilder->CreateUDiv(carryCopyFromPos, SIZE_64);
     173    Value* carryCopyFromOffset = iBuilder->CreateURem(carryCopyFromPos, SIZE_64);
     174    for (int i = 0; i < mStreamSize; i++) {
     175        Value* v = iBuilder->CreateLoad(iBuilder->CreateGEP(outputStreamBasePtrs[i], carryCopyFromBlockIndex));
     176        v = iBuilder->CreateLShr(v, iBuilder->simd_fill(mPDEPWidth, carryCopyFromOffset));
     177        v = iBuilder->CreateAnd(v, iBuilder->simd_fill(mPDEPWidth, INT64_1));
     178        initSourceDataWithCarry.push_back(iBuilder->CreateOr(v, initSourceData[i]));
     179    }
    111180    iBuilder->CreateBr(matchCopyLoopCon);
    112181
     182    // ---- MatchCopyLoopCon
     183    // MatchCopy Loop will handle one continuous data deposit each time
    113184    iBuilder->SetInsertPoint(matchCopyLoopCon);
    114     PHINode * const phiProcessIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    115     phiProcessIndex->addIncoming(initM0StartProcessIndex, entryBlock);
    116 
    117     Value * const hasMoreMatchInfo = iBuilder->CreateICmpULT(phiProcessIndex, totalM0StartItemsCount);
    118 
    119     iBuilder->CreateCondBr(hasMoreMatchInfo, loadNextMatchInfoBodyBlock, processExitBlock);
    120 
    121     iBuilder->SetInsertPoint(loadNextMatchInfoBodyBlock);
    122 
    123     auto ret = this->loadNextM0StartEnd(iBuilder);
    124     Value *newM0Start = ret.first;
    125     Value *newM0End = ret.second;
    126 
     185
     186    PHINode* phiLatestMatchOffset = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
     187    phiLatestMatchOffset->addIncoming(phiCarryMatchOffset, processLoopBody);
     188    phiLatestMatchOffset->addIncoming(phiCarryMatchOffset, carryBitProcessBlock);
     189
     190    PHINode* phiRemainingM0Marker = iBuilder->CreatePHI(iBuilder->getInt64Ty(), 3);
     191    phiRemainingM0Marker->addIncoming(currentInitM0, processLoopBody);
     192    phiRemainingM0Marker->addIncoming(currentInitM0, carryBitProcessBlock);
     193
     194    vector<PHINode*> outputData;
     195    for (int i = 0; i < mStreamSize; i++) {
     196        PHINode* outputValue = iBuilder->CreatePHI(iBuilder->getBitBlockType(), 3);
     197        outputValue->addIncoming(initSourceData[i], processLoopBody);
     198        outputValue->addIncoming(initSourceDataWithCarry[i], carryBitProcessBlock);
     199        outputData.push_back(outputValue);
     200    }
     201//    iBuilder->CreateOr()
     202    iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(phiRemainingM0Marker, INT64_0), matchCopyLoopBody, matchCopyLoopExit);
     203
     204    // ---- MatchCopyLoopBody
     205    iBuilder->SetInsertPoint(matchCopyLoopBody);
     206
     207    // Match Offset
     208    // M0      0111 1000  - load new match offset
     209    // M0      1100 0011  - use carryMatchOffset
     210    Value* remainM0ForwardZero = iBuilder->CreateCountForwardZeroes(phiRemainingM0Marker);
     211
     212    BasicBlock* loadNextMatchOffsetBlock = iBuilder->CreateBasicBlock("loadNextMatchOffsetBlock");
     213    BasicBlock* doMatchCopyBlock = iBuilder->CreateBasicBlock("DoMatchCopyBlock");
     214
     215    iBuilder->CreateLikelyCondBr(
     216            iBuilder->CreateOr(iBuilder->CreateICmpEQ(phiLatestMatchOffset, INT64_0),
     217                               iBuilder->CreateICmpNE(remainM0ForwardZero, INT64_0)
     218            ),
     219            loadNextMatchOffsetBlock, doMatchCopyBlock
     220    );
     221
     222    // ---- loadNextMatchOffsetBlock
     223    iBuilder->SetInsertPoint(loadNextMatchOffsetBlock);
    127224    auto matchOffsetRet = this->loadNextMatchOffset(iBuilder);
    128     Value *newMatchOffset = matchOffsetRet.first;
     225    BasicBlock* loadNextMatchOffsetExitBlock = iBuilder->GetInsertBlock();
     226    Value* newMatchOffset = matchOffsetRet.first;
    129227    Value* newMatchOffsetPos = matchOffsetRet.second;
    130 
    131     Value * const newMatchLength = iBuilder->CreateAdd(iBuilder->CreateSub(newM0End, newM0Start), iBuilder->getInt64(1));
    132 
    133     iBuilder->CreateBr(matchCopyConBlock);
    134     iBuilder->SetInsertPoint(matchCopyConBlock);
    135 
    136     Value * const hasNotReachEnd = iBuilder->CreateICmpULT(newM0Start, toProcessItemCount);
    137     iBuilder->CreateLikelyCondBr(hasNotReachEnd, matchCopyBodyBlock, processExitBlock);
    138 
    139     iBuilder->SetInsertPoint(matchCopyBodyBlock);
    140 
    141228    iBuilder->setScalarField("currentOffsetMarkerPos", newMatchOffsetPos);
    142229    iBuilder->setProcessedItemCount("MatchOffsetMarker", newMatchOffsetPos);
    143     iBuilder->setScalarField("currentM0MarkerPos", newM0End);
    144     iBuilder->setProcessedItemCount("M0Marker", newM0End);
    145 
    146 
    147     BasicBlock* copyLoopCon = iBuilder->CreateBasicBlock("copyLoopCon");
    148     BasicBlock* copyLoopBody = iBuilder->CreateBasicBlock("copyLoopBody");
    149     iBuilder->CreateBr(copyLoopCon);
    150     iBuilder->SetInsertPoint(copyLoopCon);
    151     PHINode* phiMatchLength = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    152     PHINode* phiMatchPos = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    153 
    154     phiMatchLength->addIncoming(newMatchLength, matchCopyBodyBlock);
    155     phiMatchPos->addIncoming(newM0Start, matchCopyBodyBlock);
    156 
    157     phiProcessIndex->addIncoming(iBuilder->CreateAdd(phiProcessIndex, iBuilder->getSize(1)), iBuilder->GetInsertBlock());
    158 
    159 
    160     iBuilder->CreateLikelyCondBr(iBuilder->CreateICmpNE(phiMatchLength, iBuilder->getSize(0)), copyLoopBody, matchCopyLoopCon);
    161 
    162     iBuilder->SetInsertPoint(copyLoopBody);
    163     Value* copySize = this->doMatchCopy(iBuilder, phiMatchPos, newMatchOffset, phiMatchLength);
    164     phiMatchLength->addIncoming(iBuilder->CreateSub(phiMatchLength, copySize), iBuilder->GetInsertBlock());
    165     phiMatchPos->addIncoming(iBuilder->CreateAdd(phiMatchPos, copySize), iBuilder->GetInsertBlock());
    166     iBuilder->CreateBr(copyLoopCon);
    167 
    168     iBuilder->SetInsertPoint(processExitBlock);
    169     iBuilder->setProcessedItemCount("M0CountMarker", phiProcessIndex);
     230    iBuilder->CreateBr(doMatchCopyBlock);
     231
     232    // ---- doMatchCopyBlock
     233    iBuilder->SetInsertPoint(doMatchCopyBlock);
     234
     235    PHINode* phiTargetMatchOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     236    phiTargetMatchOffset->addIncoming(phiLatestMatchOffset, matchCopyLoopBody);
     237    phiTargetMatchOffset->addIncoming(newMatchOffset, loadNextMatchOffsetExitBlock);
     238
     239    //
     240    // M0            0011 0010
     241    // boundary      0000 1000
     242    // nextMask      0000 0111
     243    // deposit       0011 1000
     244    // newM0         0000 0010
     245
     246    Value* remainStart = iBuilder->CreateShl(INT64_1, remainM0ForwardZero);
     247
     248    Value* boundaryMarker = iBuilder->CreateAnd(iBuilder->CreateAdd(phiRemainingM0Marker, remainStart), iBuilder->CreateNot(phiRemainingM0Marker));
     249
     250    Value* nextMask = iBuilder->CreateSub(INT64_0, iBuilder->CreateShl(boundaryMarker, INT64_1));
     251    Value* depositMarker = iBuilder->CreateAnd(
     252            iBuilder->CreateOr(phiRemainingM0Marker, boundaryMarker),
     253            iBuilder->CreateNot(nextMask)
     254    );
     255    Value* newM0Marker = iBuilder->CreateAnd(phiRemainingM0Marker, nextMask);
     256    Value* depositMarkerPopcount = iBuilder->CreatePopcount(depositMarker);
     257
     258    Value* matchCopyFromStart = iBuilder->CreateSub(iBuilder->CreateAdd(phiCurrentPosition, remainM0ForwardZero), phiTargetMatchOffset);
     259    Value* matchCopyFromBlockIndex = iBuilder->CreateUDiv(matchCopyFromStart, SIZE_64);
     260
     261    Value* matchCopyFromOffset = iBuilder->CreateURem(matchCopyFromStart, SIZE_64);
     262    Value* matchCopyFromRemaining = iBuilder->CreateSub(SIZE_64, matchCopyFromOffset);
     263    Value* matchCopyFromNextBlockIndex = iBuilder->CreateAdd(matchCopyFromBlockIndex, iBuilder->CreateSelect(iBuilder->CreateICmpULE(depositMarkerPopcount, matchCopyFromRemaining), SIZE_0, SIZE_1));
     264
     265
     266    vector<Value*> pdepSourceData;
     267
     268    for (int i = 0; i < mStreamSize; i++) {
     269        Value* fromPtr = iBuilder->CreateGEP(outputStreamBasePtrs[i], matchCopyFromBlockIndex);
     270        Value* fromBlockValue = iBuilder->CreateLoad(fromPtr);
     271        // when dataBlockIndex == matchCopyFromBlockIndex, we need to use current output value as input
     272        fromBlockValue = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(dataBlockIndex, matchCopyFromBlockIndex), outputData[i], fromBlockValue);
     273
     274        Value* fromNextPtr = iBuilder->CreateGEP(outputStreamBasePtrs[i], matchCopyFromNextBlockIndex);
     275        Value* fromNextBlockValue = iBuilder->CreateLoad(fromNextPtr);
     276        fromNextBlockValue = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(dataBlockIndex, matchCopyFromNextBlockIndex), outputData[i], fromNextBlockValue);
     277
     278
     279        Value * allFromValue = iBuilder->CreateOr(
     280                iBuilder->CreateLShr(fromBlockValue, iBuilder->simd_fill(mPDEPWidth, matchCopyFromOffset)),
     281                iBuilder->CreateShl(fromNextBlockValue, iBuilder->simd_fill(mPDEPWidth, matchCopyFromRemaining))
     282        );
     283        pdepSourceData.push_back(allFromValue);
     284    }
     285
     286    BasicBlock* doubleSourceDataCon = iBuilder->CreateBasicBlock("doubleSourceDataCon");
     287    BasicBlock* doubleSourceDataBody = iBuilder->CreateBasicBlock("doubleSourceDataBody");
     288    BasicBlock* doubleSourceDataExit = iBuilder->CreateBasicBlock("doubleSourceDataExit");
     289
     290    iBuilder->CreateBr(doubleSourceDataCon);
     291
     292    //
     293    // When matchOffset < depositMarkerPopcount, we need to use log2 approach to double the source data
     294    // e.g.
     295    // Assume that match copy start position is 1, matchOffset is 1, match length is 5
     296    //     outputBuffer              a000 0000 0000 0000
     297    //     sourceDataBeforeDouble    a000 0000 0000 0000
     298    // At this point, only 1 bit of source data is accessable, so it will double the source data 3 times until we have
     299    // 1 * 2 ^ 3 = 8 bits accessable
     300    //     sourceDataAfterDouble     aaaa aaaa 0000 0000
     301    //     outputBuffer(after copy)  aaaa aa00 0000 0000
     302    //
     303
     304    // ---- doubleSourceDataCon
     305    iBuilder->SetInsertPoint(doubleSourceDataCon);
     306    PHINode* phiSourceDataAccessable = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     307    phiSourceDataAccessable->addIncoming(phiTargetMatchOffset, doMatchCopyBlock);
     308    vector<PHINode*> phiPdepSourceData;
     309    for (int i = 0; i < mStreamSize; i++) {
     310        PHINode* v = iBuilder->CreatePHI(iBuilder->getBitBlockType(), 2);
     311        v->addIncoming(pdepSourceData[i], doMatchCopyBlock);
     312        phiPdepSourceData.push_back(v);
     313    }
     314    iBuilder->CreateUnlikelyCondBr(iBuilder->CreateICmpULT(phiSourceDataAccessable, depositMarkerPopcount), doubleSourceDataBody, doubleSourceDataExit);
     315
     316    // ---- doubleSourceDataBody
     317    iBuilder->SetInsertPoint(doubleSourceDataBody);
     318    for (int i = 0; i < mStreamSize; i++) {
     319        PHINode* v = phiPdepSourceData[i];
     320        Value* newValue = iBuilder->CreateOr(v, iBuilder->CreateShl(v, iBuilder->simd_fill(mPDEPWidth, phiSourceDataAccessable)));
     321        v->addIncoming(newValue, doubleSourceDataBody);
     322    }
     323    phiSourceDataAccessable->addIncoming(iBuilder->CreateShl(phiSourceDataAccessable, SIZE_1), doubleSourceDataBody);
     324
     325    iBuilder->CreateBr(doubleSourceDataCon);
     326
     327    // ---- doubleSourceDataExit
     328    iBuilder->SetInsertPoint(doubleSourceDataExit);
     329    // At this point, we can guarantee we have enough data for pdep
     330    for (int i = 0; i < mStreamSize; i++) {
     331        // Do Match Copy by PDEP
     332        Value* allFromValue = phiPdepSourceData[i];
     333        Value* newValue = BITBLOCK_0;
     334        for (uint64_t j = 0; j < 4; j++) { // For now, we assume bit block type is always <4 * i64>
     335            Value* source_field = iBuilder->CreateExtractElement(allFromValue, j);
     336            Value * PDEP_field = iBuilder->CreateCall(PDEP_func, {source_field, depositMarker});
     337            newValue = iBuilder->CreateInsertElement(newValue, PDEP_field, j);
     338        }
     339        PHINode* outputValue = outputData[i];
     340        Value* newOutputValue = iBuilder->CreateOr(outputValue, newValue);
     341        outputValue->addIncoming(newOutputValue, iBuilder->GetInsertBlock());
     342    }
     343    phiRemainingM0Marker->addIncoming(newM0Marker, iBuilder->GetInsertBlock());
     344    phiLatestMatchOffset->addIncoming(phiTargetMatchOffset, iBuilder->GetInsertBlock());
     345
     346    iBuilder->CreateBr(matchCopyLoopCon);
     347
     348    // ---- MatchCopyLoopExit
     349    iBuilder->SetInsertPoint(matchCopyLoopExit);
     350    for (int i = 0; i < mStreamSize; i++) {
     351        iBuilder->CreateStore(outputData[i], iBuilder->CreateGEP(outputStreamBasePtrs[i], dataBlockIndex));
     352    }
     353    Value* hasNewCarryBit = iBuilder->CreateAnd(currentInitM0, iBuilder->CreateShl(INT64_1, iBuilder->getInt64(63)));
     354    hasNewCarryBit = iBuilder->CreateICmpNE(hasNewCarryBit, INT64_0);
     355    Value* newCarryBit = iBuilder->CreateSelect(hasNewCarryBit, INT64_1, INT64_0);
     356    phiCarryBit->addIncoming(newCarryBit, iBuilder->GetInsertBlock());
     357
     358    phiCarryMatchOffset->addIncoming(iBuilder->CreateSelect(hasNewCarryBit, phiLatestMatchOffset, iBuilder->getSize(0)), iBuilder->GetInsertBlock());
     359
     360    phiCurrentPosition->addIncoming(iBuilder->CreateAdd(phiCurrentPosition, SIZE_64), iBuilder->GetInsertBlock());
     361
     362    iBuilder->CreateBr(processLoopCon);
     363
     364    // ---- ProcessLoopExit
     365    iBuilder->SetInsertPoint(processLoopExit);
     366    Value * const toProcessItemCount = iBuilder->CreateAdd(processed, itemsToDo);
    170367    iBuilder->setProcessedItemCount("M0Marker", toProcessItemCount);
    171     iBuilder->setProcessedItemCount("sourceStreamSet0", toProcessItemCount);
    172     iBuilder->setScalarField("currentM0MarkerPos", toProcessItemCount);
     368    iBuilder->CreateBr(exitBlock);
     369
     370    // ---- ExitBlock
     371    iBuilder->SetInsertPoint(exitBlock);
     372
    173373
    174374}
    175 
    176 llvm::Value* LZ4SwizzledMatchCopyKernel::doMatchCopy(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* phiMatchPos, llvm::Value* phiMatchOffset, llvm::Value* phiMatchLength) {
    177     ConstantInt * const SIZE_ZERO = iBuilder->getSize(0);
    178     ConstantInt * const SIZE_ONE = iBuilder->getSize(1);
    179     ConstantInt * const SIZE_PDEP_WIDTH = iBuilder->getSize(mPDEPWidth);
    180     ConstantInt * const SIZE_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    181 
    182     ConstantInt * const outputBufferBlocks = iBuilder->getSize(this->getAnyStreamSetBuffer("outputStreamSet0")->getBufferBlocks());
    183 
    184     Value* matchPosLocalBlockIndex = iBuilder->CreateURem(iBuilder->CreateUDiv(phiMatchPos, SIZE_BLOCK_WIDTH), outputBufferBlocks);
    185     Value * const matchCopyTargetStreamIndex = iBuilder->CreateURem(iBuilder->CreateUDiv(phiMatchPos, SIZE_PDEP_WIDTH), iBuilder->getSize(mStreamCount));
    186     Value * const matchCopyTargetBlockOffset = iBuilder->CreateURem(phiMatchPos, SIZE_PDEP_WIDTH);
    187 
    188     Value * const matchCopyFromPos = iBuilder->CreateSub(phiMatchPos, phiMatchOffset);
    189 
    190 
    191     Value* matchCopyFromLocalBlockIndex = iBuilder->CreateURem(iBuilder->CreateUDiv(matchCopyFromPos, SIZE_BLOCK_WIDTH), outputBufferBlocks);
    192     Value * const matchCopyFromStreamIndex = iBuilder->CreateURem(iBuilder->CreateUDiv(matchCopyFromPos, SIZE_PDEP_WIDTH), iBuilder->getSize(mStreamCount));
    193     Value * const matchCopyFromBlockOffset = iBuilder->CreateURem(matchCopyFromPos, SIZE_PDEP_WIDTH);
    194 
    195     Value* fromBlockRemain = iBuilder->CreateSub(SIZE_PDEP_WIDTH, matchCopyFromBlockOffset);
    196 
    197     Value * currentCopySize = iBuilder->CreateSub(SIZE_PDEP_WIDTH, iBuilder->CreateUMax(matchCopyFromBlockOffset, matchCopyTargetBlockOffset));
    198     currentCopySize = iBuilder->CreateUMin(currentCopySize, phiMatchOffset);
    199     currentCopySize = iBuilder->CreateUMin(currentCopySize, phiMatchLength);
    200     currentCopySize = iBuilder->CreateSelect(iBuilder->CreateICmpEQ(currentCopySize, SIZE_ZERO), SIZE_ONE, currentCopySize); //Workaround for the last byte
    201 
    202     Value * newCurrentCopySize = iBuilder->CreateSub(SIZE_PDEP_WIDTH, matchCopyTargetBlockOffset);
    203     newCurrentCopySize = iBuilder->CreateUMin(newCurrentCopySize, phiMatchOffset);
    204     newCurrentCopySize = iBuilder->CreateUMin(newCurrentCopySize, phiMatchLength);
    205 
    206     Value * const fromBlockOffsetVector = iBuilder->simd_fill(mPDEPWidth, matchCopyFromBlockOffset);
    207     Value * const fromBlockRemainVector = iBuilder->simd_fill(mPDEPWidth, fromBlockRemain);
    208 
    209     Value * const targetLeftShiftVector = iBuilder->simd_fill(mPDEPWidth, iBuilder->CreateSub(SIZE_PDEP_WIDTH, newCurrentCopySize));
    210     Value * const targetRightShiftVector = iBuilder->simd_fill(mPDEPWidth, iBuilder->CreateSub(SIZE_PDEP_WIDTH, iBuilder->CreateAdd(newCurrentCopySize, matchCopyTargetBlockOffset)));
    211 
    212     for (unsigned i = 0; i < mStreamSize; i++) {
    213         Value* basePtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer("outputStreamSet" + std::to_string(i), SIZE_ZERO), iBuilder->getBitBlockType()->getPointerTo());
    214 
    215         Value * const matchCopyFromBlockPtr = iBuilder->CreateGEP(basePtr, iBuilder->CreateAdd(iBuilder->CreateMul(matchCopyFromLocalBlockIndex, iBuilder->getSize(mStreamCount)), matchCopyFromStreamIndex));
    216         Value * const fromBlockValue = iBuilder->CreateBlockAlignedLoad(matchCopyFromBlockPtr);
    217         Value * const fromNextBlockValue = iBuilder->CreateBlockAlignedLoad(iBuilder->CreateGEP(matchCopyFromBlockPtr, iBuilder->CreateSelect(iBuilder->CreateICmpULE(newCurrentCopySize, fromBlockRemain), SIZE_ZERO, SIZE_ONE)));
    218 
    219         Value * allFromValue = iBuilder->CreateOr(
    220                 iBuilder->CreateLShr(fromBlockValue, fromBlockOffsetVector),
    221                 iBuilder->CreateShl(fromNextBlockValue, fromBlockRemainVector)
    222         );
    223         Value * allTargetValue = iBuilder->CreateLShr(iBuilder->CreateShl(allFromValue, targetLeftShiftVector), targetRightShiftVector);
    224 
    225         Value * const outputTargetBlockPtr = iBuilder->CreateGEP(basePtr, iBuilder->CreateAdd(iBuilder->CreateMul(matchPosLocalBlockIndex, iBuilder->getSize(mStreamCount)), matchCopyTargetStreamIndex));
    226         Value * const targetOriginalValue = iBuilder->CreateBlockAlignedLoad(outputTargetBlockPtr);
    227 
    228         Value * const finalValue = iBuilder->CreateOr(targetOriginalValue, allTargetValue);
    229 
    230         iBuilder->CreateStore(finalValue, outputTargetBlockPtr);
    231     }
    232     return currentCopySize;
    233 }
    234 
    235 void LZ4SwizzledMatchCopyKernel::generateOutputCopy(const std::unique_ptr<KernelBuilder> & iBuilder) {
    236     Constant * SIZE_ZERO = iBuilder->getSize(0);
    237     Constant * COPY_BYTES = iBuilder->getSize(4 * 1024 * 1024 * mStreamCount / 8);
    238     for (unsigned i = 0; i < mStreamSize; i++) {
    239         Value * inputBasePtr = iBuilder->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), SIZE_ZERO);
    240         Value * outputBasePtr = iBuilder->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), SIZE_ZERO);
    241         iBuilder->CreateMemCpy(outputBasePtr, inputBasePtr, COPY_BYTES, 1); // Not align guaranteed in final block
    242     }
    243 }
    244 
    245375
    246376LZ4SwizzledMatchCopyKernel::LZ4SwizzledMatchCopyKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder, unsigned streamCount/*=4*/, unsigned streamSize/*=2*/, unsigned swizzleFactor/*=4*/, unsigned PDEP_width/*64*/)
     
    250380                                   Binding{iBuilder->getStreamSetTy(1, 1), "MatchOffsetMarker", BoundedRate(0, 1)},
    251381                                   Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)},
    252                                    Binding{iBuilder->getStreamSetTy(1, 1), "M0CountMarker", BoundedRate(0, 1)},
    253                                    Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}
     382                                   Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", RateEqualTo("MatchOffsetMarker")}
    254383},
    255384// Outputs
     
    260389{},
    261390{
    262        Binding{iBuilder->getSizeTy(), "currentProcessIndex"},
    263        Binding{iBuilder->getSizeTy(), "pendingMatchPos"},
    264        Binding{iBuilder->getSizeTy(), "pendingMatchOffset"},
    265        Binding{iBuilder->getSizeTy(), "pendingMatchLength"},
    266391       Binding(iBuilder->getSizeTy(), "currentOffsetMarkerPos"),
    267        Binding(iBuilder->getSizeTy(), "currentM0MarkerPos")
     392//       Binding(iBuilder->getSizeTy(), "currentOffsetMarkerPos"),
    268393})
    269394, mSwizzleFactor(swizzleFactor)
     
    277402    addAttribute(MustExplicitlyTerminate());
    278403
    279     mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", BoundedRate(0, 1), Swizzled()});
    280     mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", RateEqualTo("sourceStreamSet0")});
     404    mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet0", RateEqualTo("M0Marker"), Swizzled()});
     405    mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet0", RateEqualTo("M0Marker")});
    281406
    282407    for (unsigned i = 1; i < streamSize; i++) {
    283         mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0"), Swizzled()});
    284         mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("sourceStreamSet0")});
     408        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "sourceStreamSet" + std::to_string(i), RateEqualTo("M0Marker"), Swizzled()});
     409        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet" + std::to_string(i), RateEqualTo("M0Marker")});
    285410    }
    286411}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_swizzled_match_copy_kernel.h

    r5981 r6020  
    1 //
    2 // Created by wxy325 on 2018/3/9.
    3 //
    41
    5 #ifndef ICGREP_LZ4_SWIZZLED_MATCH_COPY_KERNEL_H
    6 #define ICGREP_LZ4_SWIZZLED_MATCH_COPY_KERNEL_H
     2#ifndef ICGREP_LZ4_SWIZZLED_MATCH_COPY_KERNEL2_H
     3#define ICGREP_LZ4_SWIZZLED_MATCH_COPY_KERNEL2_H
     4
    75
    86#include "kernels/kernel.h"
     
    1715    protected:
    1816        void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) override;
    19         void generateOutputCopy(const std::unique_ptr<KernelBuilder> & iBuilder);
    2017
    2118    private:
     
    2623        const unsigned mStreamCount;
    2724        std::pair<llvm::Value*, llvm::Value*> loadNextMatchOffset(const std::unique_ptr<KernelBuilder> &iBuilder);
    28         std::pair<llvm::Value*, llvm::Value*> loadNextM0StartEnd(const std::unique_ptr<KernelBuilder> &iBuilder);
    2925        llvm::Value *advanceUntilNextBit(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName,
    3026                                          llvm::Value *startPos, bool isNextOne);
    31 
    32         llvm::Value* doMatchCopy(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* matchPos, llvm::Value* matchOffset, llvm::Value* matchLength);
    33 
    3427    };
    3528}
    3629
    37 
    38 
    39 
    40 #endif //ICGREP_LZ4_SWIZZLED_MATCH_COPY_KERNEL_H
     30#endif //ICGREP_LZ4_SWIZZLED_MATCH_COPY_KERNEL2_H
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r6008 r6020  
    137137    auto swizzle = this->generateSwizzleExtractData(iBuilder);
    138138
    139     StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    140     StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     139    StreamSetBuffer * depositedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     140    StreamSetBuffer * depositedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    141141
    142142    Kernel * multiplePdepK = pxDriver.addKernelInstance<LZ4MultiplePDEPkernel>(iBuilder, 4, 2, 4);
     
    144144
    145145
    146     StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
    147     StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getInputBufferBlocks(), 1);
     146    StreamSetBuffer * matchCopiedSwizzle0 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
     147    StreamSetBuffer * matchCopiedSwizzle1 = pxDriver.addBuffer<CircularCopybackBuffer>(iBuilder, iBuilder->getStreamSetTy(4), this->getDecompressedBufferBlocks(), 1);
    148148
    149149    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    150     pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, M0CountMarker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     150    pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    151151
    152152
     
    195195    hasBlockChecksum = &*(args++);
    196196    hasBlockChecksum->setName("hasBlockChecksum");
     197    // TODO for now, we do not handle blockCheckSum
     198    hasBlockChecksum = iBuilder->getInt1(false);
    197199
    198200    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", main, 0));
     
    255257    DeletionMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    256258    M0Marker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    257     M0CountMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getInputBufferBlocks());
    258259    DepositMarker = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->getDecompressedBufferBlocks());
    259260
     
    280281                    DeletionMarker,
    281282                    M0Marker,
    282                     M0CountMarker,
    283283                    MatchOffsetMarker
    284284            });
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.h

    r5974 r6020  
    6363
    6464    // M0CountMarker will not contain anything, it will only be used to pass producedItemCount and manage processedItemCount between different kernel
    65     parabix::StreamSetBuffer * M0CountMarker;
    6665    parabix::StreamSetBuffer * M0Marker;
    6766};
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepEngine.cpp

    r5955 r6020  
    1 //
    2 // Created by wxy325 on 2018/3/27.
    3 //
    41
    52#include "LZ4GrepEngine.h"
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepEngine.h

    r5955 r6020  
    1 //
    2 // Created by wxy325 on 2018/3/27.
    3 //
    41
    52#ifndef ICGREP_LZ4GREPENGINE_H
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r5998 r6020  
    1 //
    2 // Created by wxy325 on 2018/3/15.
    3 //
    41
    52#include "LZ4GrepGenerator.h"
     
    373370
    374371    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    375     pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, M0CountMarker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
     372    pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    376373
    377374
     
    427424
    428425    Kernel * swizzledMatchCopyK = pxDriver.addKernelInstance<LZ4SwizzledMatchCopyKernel>(iBuilder, 4, 2, 4);
    429     pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, M0CountMarker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    430 
     426    pxDriver.makeKernelCall(swizzledMatchCopyK, {MatchOffsetMarker, M0Marker, ByteStream, depositedSwizzle0, depositedSwizzle1}, {matchCopiedSwizzle0, matchCopiedSwizzle1});
    431427
    432428    // Produce unswizzled bit streams
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.h

    r5957 r6020  
    1 //
    2 // Created by wxy325 on 2018/3/15.
    3 //
    41
    52#ifndef ICGREP_LZ4GREPGENERATOR_H
  • icGREP/icgrep-devel/icgrep/lz4FrameDecoder.cpp

    r5706 r6020  
    1111
    1212namespace {
    13 const size_t minFilesize =
    14     4 +         // Magic number
    15     3 +         // Frame descriptor (3-11 bytes)
    16     4;          // End mark
     13
    1714
    1815// Little-endian.
     
    2017}
    2118
     19LZ4FrameDecoder::LZ4FrameDecoder() {
     20
     21}
     22
    2223LZ4FrameDecoder::LZ4FrameDecoder(const std::string & filename) {
     24    this->init(filename);
     25}
     26
     27void LZ4FrameDecoder::init(const std::string &filename) {
     28    const size_t minFilesize = this->getMinFileSize();
     29
    2330    std::ifstream f(filename, std::ios::binary | std::ios::ate);
    2431    if (f.fail()) {
     
    4451
    4552    mBlocksStart = 4 + mFDLength;       // MagicNb & FD
    46     long long blocksEnd = mFilesize - 4 - (mHasContentChecksum ? 4 : 0);      // EndMark & checksum
     53    long long blocksEnd = mFilesize - this->endMarkSize() - (mHasContentChecksum ? this->contentChecksumSize() : 0);      // EndMark & checksum
    4754    if (blocksEnd > 0 && mBlocksStart <= static_cast<size_t>(blocksEnd)) {
    4855        mBlocksLength = blocksEnd - mBlocksStart;
     
    5259
    5360bool LZ4FrameDecoder::decodeFrameDescriptor(std::ifstream & f) {
     61    const size_t minFilesize = this->getMinFileSize();
     62
    5463    char flag, blockDescriptor, headerChecksum;
    5564    f.get(flag);
     
    6574
    6675    if (mFilesize < minFilesize +
    67             (mHasContentChecksum ? 4 : 0) +
     76            (mHasContentChecksum ? this->contentChecksumSize() : 0) +
    6877            (hasContentSize ? 8 : 0)
    6978       ) {
     
    8190    return true;
    8291}
     92
     93size_t LZ4FrameDecoder::endMarkSize() const {
     94    return 4;
     95}
     96
     97size_t LZ4FrameDecoder::contentChecksumSize() const {
     98    return 4;
     99}
     100
  • icGREP/icgrep-devel/icgrep/lz4FrameDecoder.h

    r5849 r6020  
    1818class LZ4FrameDecoder {
    1919public:
    20     LZ4FrameDecoder(const std::string & filename);
     20
     21    LZ4FrameDecoder(const std::string &filename);
     22    LZ4FrameDecoder();
     23
     24    void init(const std::string &filename);
    2125
    2226    size_t getBlocksStart() const {
     
    3943    }
    4044
     45protected:
     46    virtual size_t endMarkSize() const;
     47    virtual size_t contentChecksumSize() const;
     48
    4149private:
    4250    bool mValid = false;
     
    4856    bool mHasBlockChecksum;
    4957
    50     bool decodeFrameDescriptor(std::ifstream & f);
     58    bool decodeFrameDescriptor(std::ifstream &f);
     59
     60    size_t getMinFileSize() {
     61        return 4 +         // Magic number
     62               3 +         // Frame descriptor (3-11 bytes)
     63               this->endMarkSize();          // End mark
     64    }
    5165};
    5266
Note: See TracChangeset for help on using the changeset viewer.