Changeset 5958 for icGREP/icgrep-devel


Ignore:
Timestamp:
Apr 9, 2018, 11:11:01 PM (15 months ago)
Author:
nmedfort
Message:

made LZ4IndexBuilderKernel a segment-oriented kernel + code clean up.

Location:
icGREP/icgrep-devel/icgrep/kernels/lz4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r5948 r5958  
    1717
    1818namespace kernel{
     19
    1920    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
    20             : MultiBlockKernel("LZ4IndexBuilderKernel",
    21             // Inputs
    22                                {
    23                                        Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
    24                                        Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream"), {DisableTemporaryBuffer(), DisableAvailableItemCountAdjustment(), DisableSufficientChecking()}},
    25 //                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
    26 //                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
    27 
    28                                        // block data
    29                                        Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
    30                                                AlwaysConsume()},
    31                                        Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
    32                                                AlwaysConsume()},
    33                                        Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
    34                                                AlwaysConsume()}
    35 
    36                                },
    37             //Outputs
    38                                {
    39                                        // Uncompressed_data
    40                                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
    41                                                BoundedRate(0, 1)},
    42                                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
    43                                                BoundedRate(0, 1)},
    44                                        Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
    45                                                BoundedRate(0, 1)},
    46 
    47                                        Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1), {DisableTemporaryBuffer(), DisableSufficientChecking()}},
    48                                        Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), DisableSufficientChecking()}, //TODO disable temporary buffer for all output streams
    49                                        Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), DisableSufficientChecking()},
    50                                        Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), DisableSufficientChecking()},
    51                                        Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1), {DisableTemporaryBuffer()}}
    52                                },
    53             //Arguments
    54                                {
    55                                        Binding{iBuilder->getSizeTy(), "fileSize"}
    56                                },
    57                                {},
    58             //Internal states:
    59                                {
    60                                        Binding{iBuilder->getSizeTy(), "blockDataIndex"},
    61                                        Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
    62                                }) {
     21    : SegmentOrientedKernel("LZ4IndexBuilderKernel",
     22    // Inputs
     23    {
     24           Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
     25           Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
     26
     27           // block data
     28           Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
     29                   AlwaysConsume()},
     30           Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
     31                   AlwaysConsume()},
     32           Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
     33                   AlwaysConsume()}
     34
     35    },
     36    //Outputs
     37    {
     38           // Uncompressed_data
     39           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedStartPos",
     40                   BoundedRate(0, 1)},
     41           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedLength",
     42                   BoundedRate(0, 1)},
     43           Binding{iBuilder->getStreamSetTy(1, 64), "uncompressedOutputPos",
     44                   BoundedRate(0, 1)},
     45
     46           Binding{iBuilder->getStreamSetTy(1, 1), "deletionMarker", BoundedRate(0, 1)},
     47           Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
     48           Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
     49           Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)},
     50           Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1)}
     51    },
     52    //Arguments
     53    {
     54           Binding{iBuilder->getSizeTy(), "fileSize"}
     55    },
     56    {},
     57    //Internal states:
     58    {
     59           Binding{iBuilder->getSizeTy(), "blockDataIndex"},
     60           Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
     61    }) {
    6362        this->setStride(4 * 1024 * 1024);
    6463        addAttribute(MustExplicitlyTerminate());
    6564    }
    6665
    67     void LZ4IndexBuilderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *const numOfStrides) {
     66    void LZ4IndexBuilderKernel::generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> &iBuilder) {
     67
    6868        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
    6969        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
    7070
    71         this->resetPreviousProducedMap(iBuilder, {"deletionMarker", "m0Start", "m0End", "matchOffset", "M0Marker"});
    72 
    73         Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
    74 
    75         Value* totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
    76         Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
    77 
    78         Value* blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
     71        Value * blockDataIndex = iBuilder->getScalarField("blockDataIndex");
     72
     73        Value * totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
     74        Value * totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
     75
     76        Value * blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
    7977
    8078        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
    8179
    8280        iBuilder->SetInsertPoint(blockEndConBlock);
    83 
    84 
    85         Value* blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
    86 
    87         BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock");
    88 //        iBuilder->CallPrintInt("----totalExtender", totalExtender);
    89 //        iBuilder->CallPrintInt("----blockStart", blockStart);
    90 //        iBuilder->CallPrintInt("----blockEnd", blockEnd);
    91 
     81        Value * blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
     82        BasicBlock * processBlock = iBuilder->CreateBasicBlock("processBlock");
    9283        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
    93 //        iBuilder->CreateBr(processBlock);
    9484
    9585        iBuilder->SetInsertPoint(processBlock);
     
    9989        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
    10090
    101         Value* newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
     91        Value * newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
    10292        iBuilder->setScalarField("blockDataIndex", newBlockDataIndex);
    10393        iBuilder->setProcessedItemCount("blockEnd", newBlockDataIndex);
     
    10595        iBuilder->setProcessedItemCount("isCompressed", newBlockDataIndex);
    10696
    107 
    10897        iBuilder->setProcessedItemCount("byteStream", blockEnd);
    109 
    110 
    111 //        iBuilder->setProcessedItemCount("extender", blockEnd);
    112 //        iBuilder->setProcessedItemCount("CC_0xFX", blockEnd);
    113 //        iBuilder->setProcessedItemCount("CC_0xXF", blockEnd);
    114 
    11598        iBuilder->CreateBr(exitBlock);
    11699
     
    121104        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
    122105
    123         Value* extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
    124 //        iBuilder->CallPrintInt("token", token);
     106        Value * extendedLiteralValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf0)), iBuilder->getInt8(0xf0));
    125107
    126108        BasicBlock* extendLiteralLengthBody = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_extend_literal_length_body");
     
    141123        phiCursorPosAfterLiteral->addIncoming(tokenPos, entryBlock);
    142124
    143         Value* literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
    144 //        iBuilder->CallPrintInt("literalExtensionSize", literalExtensionSize);
    145         Value* finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
     125        Value * literalExtensionSize = iBuilder->CreateSub(phiCursorPosAfterLiteral, tokenPos);
     126        Value * finalLengthByte = this->generateLoadSourceInputByte(iBuilder, phiCursorPosAfterLiteral);
    146127        finalLengthByte = iBuilder->CreateZExt(finalLengthByte, iBuilder->getInt64Ty());
    147         Value* literalLengthExtendValue = iBuilder->CreateSelect(
     128        Value * literalLengthExtendValue = iBuilder->CreateSelect(
    148129                iBuilder->CreateICmpUGT(literalExtensionSize, iBuilder->getSize(0)),
    149130                iBuilder->CreateAdd(
     
    168149        // TODO Clear Output Buffer at the beginning instead of marking 0
    169150        this->markCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->getProducedItemCount("deletionMarker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), true);
    170 //        iBuilder->CallPrintInt("markStart", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)));
    171 //        iBuilder->CallPrintInt("phiCursorPosAfterLiteral", phiCursorPosAfterLiteral);
    172151        this->markCircularOutputBitstream(iBuilder, "deletionMarker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, false);
    173152        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
     
    204183
    205184        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, matchLengthStartPos);
    206 //        iBuilder->CallPrintInt("totalExtender", iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender")));
    207 //        iBuilder->CallPrintInt("aaa", oldMatchExtensionSize);
    208 
    209185        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
    210186        Value* matchExtensionSize = iBuilder->CreateSelect(
     
    251227                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
    252228        );
    253         this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
    254         this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
    255         this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
     229        this->generateStoreNumberOutput(iBuilder, "m0Start", outputPos);
     230        this->generateStoreNumberOutput(iBuilder, "m0End", outputEndPos);
     231        this->generateStoreNumberOutput(iBuilder, "matchOffset", matchOffset);
    256232        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
    257233        this->markCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos, true, false);
     
    259235        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
    260236    }
    261 
    262237
    263238    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
     
    292267        //TODO add acceleration here
    293268        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
    294 
    295 //        iBuilder->CallPrintInt("tokenPos", phiCursorValue);
    296 //        iBuilder->CallPrintInt("token", token);
    297 
    298269        // Process Literal
    299270        BasicBlock* processLiteralBlock = iBuilder->CreateBasicBlock("processLiteralBlock");
     
    302273
    303274        Value* offsetPos = this->processLiteral(iBuilder, token, phiCursorValue, blockEnd);
    304 //        iBuilder->CallPrintInt("offsetPos", offsetPos);
    305275        // Process Match
    306276        BasicBlock* handleM0BodyBlock = iBuilder->CreateBasicBlock("block_data_loop_handle_compressed_block_loop_handle_m0_body");
     
    316286        iBuilder->SetInsertPoint(handleM0BodyBlock);
    317287        Value* nextTokenPos = this->processMatch(iBuilder, offsetPos, token, blockEnd);
    318 //        iBuilder->CallPrintInt("nextTokenPos", nextTokenPos);
    319288        phiCursorValue->addIncoming(nextTokenPos, iBuilder->GetInsertBlock());
    320289
     
    328297        // Store final M0 pos to make sure the bit stream will be long enough
    329298        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
    330 //        iBuilder->CallPrintInt("finalM0OutputPos", finalM0OutputPos);
    331         this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
    332         this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
    333         this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
     299        this->generateStoreNumberOutput(iBuilder, "m0Start", finalM0OutputPos);
     300        this->generateStoreNumberOutput(iBuilder, "m0End", finalM0OutputPos);
     301        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64(0));
    334302        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
    335303        // finalM0OutputPos should always be 4MB * n except for the final block
     
    341309    }
    342310
    343     Value *LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
    344         return advanceUntilNextValue(iBuilder, inputName, startPos, true, maxPos);
    345     }
    346 
    347     Value *LZ4IndexBuilderKernel::advanceUntilNextOne(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, Value* maxPos) {
    348         return advanceUntilNextValue(iBuilder, inputName, startPos, false, maxPos);
    349     }
    350 
    351     Value *LZ4IndexBuilderKernel::advanceUntilNextValue(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value* startPos, bool isNextZero, Value* maxPos) {
     311    Value * LZ4IndexBuilderKernel::advanceUntilNextZero(const unique_ptr<KernelBuilder> &iBuilder, string inputName, Value * startPos, Value * maxPos) {
     312
    352313        unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
    353314        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
    354         Constant* SIZE_ZERO = iBuilder->getSize(0);
    355315        Type* bitBlockType = iBuilder->getBitBlockType();
    356316        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
    357 
    358         Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputName), INT64_BIT_BLOCK_WIDTH);
    359317
    360318        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
     
    376334        iBuilder->SetInsertPoint(advanceBodyBlock);
    377335
    378 
    379         Value* currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
    380         Value* currentPosBitBlockIndex = iBuilder->CreateSub(currentBlockGlobalPos, baseInputBlockIndex);
    381 
    382         Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
    383 
    384         Value* ptr = iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO, currentPosBitBlockIndex);
    385         Value* rawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_ZERO), bitBlockType->getPointerTo());
    386         Value* ptr2 = iBuilder->CreateGEP(rawPtr, iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks())));
    387         ptr = ptr2; //TODO workaround here
    388 
    389 
    390         Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
    391 
     336        Value * currentBlockGlobalPos = iBuilder->CreateAnd(phiCurrentPos, ConstantExpr::getNeg(INT64_BIT_BLOCK_WIDTH));
     337        Value * currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
     338
     339        Value * ptr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, currentBlockGlobalPos), bitBlockType->getPointerTo());
     340
     341        Value * currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
    392342        currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
    393         if (isNextZero) {
    394             currentBitValue = iBuilder->CreateNot(currentBitValue);
    395         }
    396         Value* forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
    397         Value* newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
     343        currentBitValue = iBuilder->CreateNot(currentBitValue);
     344
     345        Value * forwardZeroCount = iBuilder->CreateTrunc(iBuilder->CreateCountForwardZeroes(currentBitValue), iBuilder->getInt64Ty());
     346        Value * newOffset = iBuilder->CreateAdd(currentPosBitBlockOffset, forwardZeroCount);
    398347        newOffset = iBuilder->CreateUMin(newOffset, INT64_BIT_BLOCK_WIDTH);
    399348
    400         Value* actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
    401         Value* newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
     349        Value * actualAdvanceValue = iBuilder->CreateSub(newOffset, currentPosBitBlockOffset);
     350        Value * newPos = iBuilder->CreateAdd(phiCurrentPos, actualAdvanceValue);
    402351        if (maxPos) {
    403352            newPos = iBuilder->CreateUMin(maxPos, newPos);
     
    406355        }
    407356
    408         phiIsFinish->addIncoming(iBuilder->CreateNot(iBuilder->CreateICmpEQ(newOffset, INT64_BIT_BLOCK_WIDTH)), iBuilder->GetInsertBlock());
     357        phiIsFinish->addIncoming(iBuilder->CreateICmpNE(newOffset, INT64_BIT_BLOCK_WIDTH), iBuilder->GetInsertBlock());
    409358        phiCurrentPos->addIncoming(newPos, iBuilder->GetInsertBlock());
    410359        iBuilder->CreateBr(advanceConBlock);
     
    414363    }
    415364
    416     Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) {
    417         Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(this->getStride());
    418         Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    419         Constant* SIZE_ZERO = iBuilder->getSize(0);
    420 
    421 //        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_BIT_BLOCK_WIDTH);
    422 
    423         //TODO possible bug here, maybe we need to use iBuilder->getStride()
    424         Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->CreateMul(iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_STRIDE_SIZE), SIZE_STRIDE_SIZE));
    425 
    426         Value* targetBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
    427         Value* localOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
    428 
    429         //[64 x <4 x i64>]*
    430         Value* ptr = iBuilder->getInputStreamBlockPtr(inputBufferName, SIZE_ZERO, targetBlockIndex);
    431         ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt64Ty()->getPointerTo());
    432         //GEP here is safe
    433         Value* valuePtr = iBuilder->CreateGEP(ptr, localOffset);
     365    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value * globalOffset) {
     366        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(getStride());
     367        Value * processed = iBuilder->getProcessedItemCount(inputBufferName);
     368        processed = iBuilder->CreateAnd(processed, ConstantExpr::getNeg(SIZE_STRIDE_SIZE));
     369        Value * offset = iBuilder->CreateSub(globalOffset, processed);
     370        Value * valuePtr = iBuilder->getRawInputPointer(inputBufferName, offset);
    434371        return iBuilder->CreateLoad(valuePtr);
    435372    }
    436373
    437     Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value *offset) {
    438         // The external buffer is always linear accessible, so the GEP here is safe
    439         Value *blockStartPtr = iBuilder->CreatePointerCast(
    440                 iBuilder->getRawInputPointer("byteStream", iBuilder->getInt32(0)),
    441                 iBuilder->getInt8PtrTy()
    442         );
    443         Value *ptr = iBuilder->CreateGEP(blockStartPtr, offset);
     374    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value * offset) {
     375        Value * ptr = iBuilder->getRawInputPointer("byteStream", offset);
    444376        return iBuilder->CreateLoad(ptr);
    445377    }
     
    463395        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth);
    464396        Value* SIZE_ONE = iBuilder->getSize(1);
    465         Value* SIZE_ZERO = iBuilder->getSize(0);
    466397        Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(bitBlockWidth);
    467398        Type * const BIT_BLOCK_TY = iBuilder->getBitBlockType();
     
    469400        Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0);
    470401
    471         Value* previousProduced = this->previousProducedMap.find(bitstreamName)->second;
    472         Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
    473 
    474402        BasicBlock *entryBlock = iBuilder->GetInsertBlock();
    475403        BasicBlock *conBlock = iBuilder->CreateBasicBlock("mark_bit_one_con");
     
    477405        BasicBlock *exitBlock = iBuilder->CreateBasicBlock("mark_bit_one_exit");
    478406
    479         Value* startBlockLocalIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH), blockIndexBase);
     407        Value * startBlockLocalIndex = iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH);
    480408
    481409        iBuilder->CreateBr(conBlock);
     
    486414        PHINode *curBlockLocalIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    487415        curBlockLocalIndex->addIncoming(startBlockLocalIndex, entryBlock);
     416
     417
    488418        iBuilder->CreateCondBr(
    489                 iBuilder->CreateICmpULT(iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH), end),
     419                iBuilder->CreateICmpULT(iBuilder->CreateMul(curBlockLocalIndex, SIZE_BIT_BLOCK_WIDTH), end),
    490420                bodyBlock,
    491421                exitBlock
     
    495425        iBuilder->SetInsertPoint(bodyBlock);
    496426
    497         Value *outputLowestBitValue = iBuilder->CreateSelect(
    498                 iBuilder->CreateICmpULE(
    499                         iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH),
    500                         start
    501                 ),
    502                 iBuilder->CreateShl(INT_BIT_BLOCK_ONE, iBuilder->CreateZExt(iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)),
    503                 INT_BIT_BLOCK_ONE
    504         );
    505 
    506         Value *hasNotReachEnd = iBuilder->CreateICmpULE(
    507                 iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
    508                 end
    509         );
    510         Value *producedItemsCount = iBuilder->CreateSelect(
    511                 hasNotReachEnd,
    512                 iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
    513                 end
    514         );
    515 
    516 
    517         Value *outputHighestBitValue = iBuilder->CreateSelect(
    518                 hasNotReachEnd,
    519                 INT_BIT_BLOCK_ZERO,
    520                 iBuilder->CreateShl(
    521                         INT_BIT_BLOCK_ONE,
    522                         iBuilder->CreateZExt(iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)
    523                 )
    524         );
    525 
    526 
    527         Value *bitMask = iBuilder->CreateSub(
    528                 outputHighestBitValue,
    529                 outputLowestBitValue
    530         );
    531 
    532         if (!isOne) {
    533             bitMask = iBuilder->CreateNot(bitMask);
    534         }
    535 
    536         Value *targetPtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, SIZE_ZERO, curBlockLocalIndex);
    537         Value *rawInputPointer = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_ZERO), iBuilder->getBitBlockType()->getPointerTo());
    538         Value * ptr = iBuilder->CreateGEP(rawInputPointer, iBuilder->CreateURem(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks())));
    539 //        iBuilder->CallPrintInt("targetPtr", targetPtr);
    540 //        iBuilder->CallPrintInt("targetPtr2", ptr);
    541         targetPtr = ptr; //TODO workaround here
    542 
    543 
    544         //TODO fixed circular here
    545 
    546         Value *oldValue = iBuilder->CreateLoad(targetPtr);
    547         oldValue = iBuilder->CreateBitCast(oldValue, INT_BIT_BLOCK_TY);
    548         Value *newValue = NULL;
     427        Value * const currentPosition = iBuilder->CreateMul(curBlockLocalIndex, SIZE_BIT_BLOCK_WIDTH);
     428        Value * lowestBitPosition = iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH);
     429        lowestBitPosition = iBuilder->CreateZExt(lowestBitPosition, INT_BIT_BLOCK_TY);
     430        Value * outputLowestBitValue = iBuilder->CreateShl(INT_BIT_BLOCK_ONE, lowestBitPosition);
     431        Value * const hasNotReachedStart = iBuilder->CreateICmpULE(currentPosition, start);
     432        outputLowestBitValue = iBuilder->CreateSelect(hasNotReachedStart, outputLowestBitValue, INT_BIT_BLOCK_ONE);
     433
     434        Value * const nextPosition = iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, SIZE_ONE), SIZE_BIT_BLOCK_WIDTH);
     435        Value * const hasNotReachEnd = iBuilder->CreateICmpULE(nextPosition, end);
     436        Value * producedItemsCount = iBuilder->CreateSelect(hasNotReachEnd, nextPosition, end);
     437        Value * highestBitPosition = iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH);
     438        highestBitPosition = iBuilder->CreateZExt(highestBitPosition, INT_BIT_BLOCK_TY);
     439        Value * outputHighestBitValue = iBuilder->CreateShl(INT_BIT_BLOCK_ONE, highestBitPosition);
     440        outputHighestBitValue = iBuilder->CreateSelect(hasNotReachEnd, INT_BIT_BLOCK_ZERO, outputHighestBitValue);
     441        Value * bitMask = iBuilder->CreateSub(outputHighestBitValue, outputLowestBitValue);
     442        bitMask = iBuilder->CreateBitCast(bitMask, BIT_BLOCK_TY);
     443
     444        Value * targetPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, currentPosition), iBuilder->getBitBlockType()->getPointerTo());
     445        Value * oldValue = iBuilder->CreateBlockAlignedLoad(targetPtr);
     446        Value * newValue = nullptr;
    549447        if (isOne) {
    550448            newValue = iBuilder->CreateOr(oldValue, bitMask);
    551449        } else {
    552             newValue = iBuilder->CreateAnd(oldValue, bitMask);
     450            newValue = iBuilder->CreateAnd(oldValue, iBuilder->CreateNot(bitMask));
    553451        }
    554 
    555         iBuilder->CreateStore(
    556                 iBuilder->CreateBitCast(newValue, BIT_BLOCK_TY),
    557                 targetPtr
    558         );
     452        iBuilder->CreateStore(newValue, targetPtr);
     453
    559454        if (setProduced) {
    560455            iBuilder->setProducedItemCount(bitstreamName, producedItemsCount);
     
    570465
    571466
    572 
    573467    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
    574                                                           const string &outputBufferName, Type *pointerType,
    575                                                           Value *value) {
    576 
    577         Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    578         Value* SIZE_ZERO = iBuilder->getSize(0);
    579         Value* SIZE_ONE = iBuilder->getSize(1);
    580 
    581         Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
    582 //        iBuilder->CallPrintInt("previousProduced", previousProduced);
    583 
    584         Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
    585         Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName);
    586         Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH);
    587 
    588         Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH);
    589 
    590         // i8, [8 x <4 x i64>]*
    591         // i64, [64 x <4 x i64>]*
    592         Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
    593         ptr = iBuilder->CreatePointerCast(ptr, pointerType);
    594         ptr = iBuilder->CreateGEP(ptr, blockOffset);
    595 
    596         Value* tmpOffset = iBuilder->CreateURem(outputOffset, iBuilder->getSize(this->getAnyStreamSetBuffer(outputBufferName)->getBufferBlocks() * iBuilder->getBitBlockWidth()));
    597         Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(outputBufferName, SIZE_ZERO), pointerType);
    598         Value* ptr2 = iBuilder->CreateGEP(outputRawPtr, tmpOffset);
    599         ptr = ptr2;
    600 //        iBuilder->CallPrintInt("ptr", ptr);
    601 //        iBuilder->CallPrintInt("ptr2", ptr2);
    602 
    603         // GEP here is safe
    604         iBuilder->CreateStore(value, ptr);
    605 
    606         if (outputBufferName == "m0End") {
    607 //            iBuilder->CallPrintInt("output:m0End", value);
    608         }
    609 
    610         iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
    611     }
    612 
    613 
    614     void LZ4IndexBuilderKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
    615                                                          std::vector<std::string> outputList) {
    616         previousProducedMap.clear();
    617         for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
    618             previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter)));
    619         }
    620     }
     468                                                          const string & outputBufferName,
     469                                                          Value * value) {
     470
     471        Value * outputOffset = iBuilder->getProducedItemCount(outputBufferName);
     472        Value * outputRawPtr = iBuilder->getRawOutputPointer(outputBufferName, outputOffset);
     473        iBuilder->CreateStore(value, outputRawPtr);
     474        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, iBuilder->getSize(1)));
     475    }
     476
    621477}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.h

    r5923 r5958  
    2121
    2222namespace kernel {
    23     class LZ4IndexBuilderKernel final : public MultiBlockKernel {
     23    class LZ4IndexBuilderKernel final : public SegmentOrientedKernel {
    2424    public:
    2525        LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder);
    2626
    2727    protected:
    28         void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder,
    29                                      llvm::Value *const numOfStrides) override;
     28        void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) override;
    3029
    3130    private:
     
    4140        llvm::Value *advanceUntilNextZero(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName,
    4241                                          llvm::Value *startPos, llvm::Value *maxPos = nullptr);
    43 
    44         llvm::Value *advanceUntilNextOne(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName,
    45                                          llvm::Value *startPos, llvm::Value *maxPos = nullptr);
    46 
    47         llvm::Value *advanceUntilNextValue(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName,
    48                                            llvm::Value *startPos, bool isNextZero, llvm::Value *maxPos = nullptr);
    4942
    5043        void increaseScalarField(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string &fieldName,
     
    6861
    6962        void generateStoreNumberOutput(const std::unique_ptr<KernelBuilder> &iBuilder,
    70                                        const std::string &outputBufferName, llvm::Type *pointerType,
     63                                       const std::string &outputBufferName,
    7164                                       llvm::Value *value);
    7265
    73         void resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder, std::vector<std::string> outputList);
    74         std::map<std::string, llvm::Value*> previousProducedMap;
    7566    };
    7667}
Note: See TracChangeset for help on using the changeset viewer.