Ignore:
Timestamp:
Apr 5, 2018, 4:03:37 AM (14 months ago)
Author:
xwa163
Message:
  1. Add attributes to disable some features of multiblock kernel
  2. Fix bug for lz4d new approach in large data, pass all test cases
  3. Disable lz4d related test cases for old approach
File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r5939 r5941  
    1515using namespace kernel;
    1616using namespace std;
     17
    1718namespace kernel{
    1819    LZ4IndexBuilderKernel::LZ4IndexBuilderKernel(const std::unique_ptr<kernel::KernelBuilder> &iBuilder)
     
    2122                               {
    2223                                       Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)},
    23                                        Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream")},
    24                                        Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
    25                                        Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
     24                                       Binding{iBuilder->getStreamSetTy(1, 1), "extender", RateEqualTo("byteStream"), {DisableTemporaryBuffer(), DisableAvailableItemCountAdjustment(), DisableSufficientChecking()}},
     25//                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xFX", RateEqualTo("byteStream")},
     26//                                       Binding{iBuilder->getStreamSetTy(1, 1), "CC_0xXF", RateEqualTo("byteStream")},
    2627
    2728                                       // block data
    2829                                       Binding{iBuilder->getStreamSetTy(1, 1), "isCompressed", BoundedRate(0, 1),
    29                                                ConstantStrideLengthOne()},
     30                                               AlwaysConsume()},
    3031                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockStart", BoundedRate(0, 1),
    31                                                ConstantStrideLengthOne()},
     32                                               AlwaysConsume()},
    3233                                       Binding{iBuilder->getStreamSetTy(1, 64), "blockEnd", BoundedRate(0, 1),
    33                                                ConstantStrideLengthOne()}
     34                                               AlwaysConsume()}
    3435
    3536                               },
     
    4445                                               BoundedRate(0, 1)},
    4546
    46                                        Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1)},
    47                                        Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1)},
    48                                        Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1)},
    49                                        Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1)}
     47                                       Binding{iBuilder->getStreamSetTy(1, 1), "e1Marker", BoundedRate(0, 1), {DisableTemporaryBuffer(), DisableSufficientChecking()}},
     48                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0Start", BoundedRate(0, 1), DisableSufficientChecking()}, //TODO disable temporary buffer for all output streams
     49                                       Binding{iBuilder->getStreamSetTy(1, 64), "m0End", BoundedRate(0, 1), DisableSufficientChecking()},
     50                                       Binding{iBuilder->getStreamSetTy(1, 64), "matchOffset", BoundedRate(0, 1), DisableSufficientChecking()},
     51                                       Binding{iBuilder->getStreamSetTy(1, 1), "M0Marker", BoundedRate(0, 1), {DisableTemporaryBuffer()}}
    5052                               },
    5153            //Arguments
    52                                {},
     54                               {
     55                                       Binding{iBuilder->getSizeTy(), "fileSize"}
     56                               },
    5357                               {},
    5458            //Internal states:
     
    5761                                       Binding{iBuilder->getInt64Ty(), "m0OutputPos"}
    5862                               }) {
    59 //        addAttribute(MustExplicitlyTerminate());
     63        this->setStride(4 * 1024 * 1024);
     64        addAttribute(MustExplicitlyTerminate());
    6065    }
    6166
    6267    void LZ4IndexBuilderKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *const numOfStrides) {
    63 //        iBuilder->CallPrintInt("entry", iBuilder->getSize(0));
    64 //        iBuilder->CallPrintInt("aaa", iBuilder->getProducedItemCount("e1Marker"));
    65 
    6668
    6769
     
    7476
    7577        Value* totalNumber = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("blockEnd"), iBuilder->getProcessedItemCount("blockEnd"));
    76 
     78        Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
    7779//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
    78 //        iBuilder->CallPrintInt("totalNumber", totalNumber);
    79 //        iBuilder->setTerminationSignal(iBuilder->CreateICmpEQ(availableBlockEnd, iBuilder->getSize(0)));
     80
    8081
    8182        iBuilder->CreateCondBr(iBuilder->CreateICmpULT(blockDataIndex, totalNumber), blockEndConBlock, exitBlock);
     
    8384        iBuilder->SetInsertPoint(blockEndConBlock);
    8485        Value* blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
    85         iBuilder->CallPrintInt("blockEnd", blockEnd);
    86 
    87         Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
    88 //        iBuilder->CallPrintInt("totalExtender", totalExtender);
    89 
    90 //        iBuilder->CallPrintInt("processByteStream", iBuilder->getProcessedItemCount("byteStream"));
    91 //        iBuilder->CallPrintInt("availableByteStream", iBuilder->getAvailableItemCount("byteStream"));
    92 
    93 
    94 //        iBuilder->CallPrintInt("consumedExtender", iBuilder->getConsumedItemCount("extender"));
    95 //        iBuilder->CallPrintInt("processExtender", iBuilder->getProcessedItemCount("extender"));
    96 //        iBuilder->CallPrintInt("availableExtender", iBuilder->getAvailableItemCount("extender"));
    97 //        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
    98 
    9986
    10087        Value* blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
     
    10592//        iBuilder->CallPrintInt("----blockEnd", blockEnd);
    10693
    107         iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
     94//        iBuilder->CreateCondBr(iBuilder->CreateICmpULE(blockEnd, totalExtender), processBlock, exitBlock);
     95        iBuilder->CreateBr(processBlock);
    10896
    10997        iBuilder->SetInsertPoint(processBlock);
    11098
    111 
    11299        //TODO handle uncompressed block
     100
    113101        this->generateProcessCompressedBlock(iBuilder, blockStart, blockEnd);
    114 
    115 
    116102
    117103        Value* newBlockDataIndex = iBuilder->CreateAdd(blockDataIndex, iBuilder->getInt64(1));
     
    123109
    124110        iBuilder->setProcessedItemCount("byteStream", blockEnd);
     111
     112
    125113//        iBuilder->setProcessedItemCount("extender", blockEnd);
    126114//        iBuilder->setProcessedItemCount("CC_0xFX", blockEnd);
     
    182170        // TODO Clear Output Buffer at the beginning instead of marking 0
    183171        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), false);
     172//        iBuilder->CallPrintInt("markStart", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)));
     173//        iBuilder->CallPrintInt("phiCursorPosAfterLiteral", phiCursorPosAfterLiteral);
    184174        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, true);
    185175        this->increaseScalarField(iBuilder, "m0OutputPos", literalLength); //TODO m0OutputPos may be removed from scalar fields
     
    263253                iBuilder->CreateShl(iBuilder->CreateZExt(this->generateLoadSourceInputByte(iBuilder, iBuilder->CreateAdd(offsetPos, iBuilder->getSize(1))), iBuilder->getSizeTy()), iBuilder->getSize(8))
    264254        );
    265 //        iBuilder->CallPrintInt("matchOffset", matchOffset);
    266255        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
    267 //    iBuilder->CallPrintInt("m0Start", outputPos);
    268256        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
    269 //    iBuilder->CallPrintInt("m0End", outputEndPos);
    270257        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
    271 //    iBuilder->CallPrintInt("matchOffset", matchOffset);
    272258        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
     259        this->markCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos, true, false);
     260
    273261        return iBuilder->CreateAdd(phiCursorPosAfterMatch, INT64_ONE);
    274262    }
     
    276264
    277265    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
    278 
     266        // Constant
    279267        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
    280         //TODO use memset to clear output buffer
     268
     269        Value* m0OutputBlockPtr = iBuilder->getOutputStreamBlockPtr("M0Marker", iBuilder->getSize(0));
     270        iBuilder->CreateMemSet(m0OutputBlockPtr, iBuilder->getInt8(0), 4 * 1024 * 1024 / 8, true);
     271
     272
     273        Value* isTerminal = iBuilder->CreateICmpEQ(blockEnd, iBuilder->getScalarField("fileSize"));
     274
     275        iBuilder->setTerminationSignal(isTerminal);
     276
     277        //TODO use memset to clear output buffer for extract marker
     278
    281279        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("processCompressedExitBlock");
    282280
     
    297295        //TODO add acceleration here
    298296        Value* token = this->generateLoadSourceInputByte(iBuilder, phiCursorValue);
     297
    299298//        iBuilder->CallPrintInt("tokenPos", phiCursorValue);
    300299//        iBuilder->CallPrintInt("token", token);
     
    336335        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
    337336        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
     337        iBuilder->setProducedItemCount("M0Marker", finalM0OutputPos);
     338        // finalM0OutputPos should always be 4MB * n except for the final block
    338339
    339340        iBuilder->CreateBr(processCon);
     
    355356        Constant* INT64_BIT_BLOCK_WIDTH = iBuilder->getInt64(bitBlockWidth);
    356357        Constant* SIZE_ZERO = iBuilder->getSize(0);
     358        Type* bitBlockType = iBuilder->getBitBlockType();
    357359        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
    358360
     
    377379        iBuilder->SetInsertPoint(advanceBodyBlock);
    378380
    379         Value* currentPosBitBlockIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH), baseInputBlockIndex);
     381
     382        Value* currentBlockGlobalPos = iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
     383        Value* currentPosBitBlockIndex = iBuilder->CreateSub(currentBlockGlobalPos, baseInputBlockIndex);
    380384
    381385        Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
    382386
    383387        Value* ptr = iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO, currentPosBitBlockIndex);
     388        Value* rawPtr = iBuilder->CreatePointerCast(iBuilder->getRawInputPointer(inputName, SIZE_ZERO), bitBlockType->getPointerTo());
     389        Value* ptr2 = iBuilder->CreateGEP(rawPtr, iBuilder->CreateURem(currentBlockGlobalPos, iBuilder->getSize(this->getAnyStreamSetBuffer(inputName)->getBufferBlocks())));
     390        ptr = ptr2; //TODO workaround here
     391
    384392
    385393        Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
     
    410418
    411419    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) {
    412         // Stride Size here is Constant 1 instead of BitBlockWidth
     420        Constant* SIZE_STRIDE_SIZE = iBuilder->getSize(this->getStride());
    413421        Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    414422        Constant* SIZE_ZERO = iBuilder->getSize(0);
     
    416424//        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_BIT_BLOCK_WIDTH);
    417425
    418         Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->getProcessedItemCount(inputBufferName));
     426        //TODO possible bug here, maybe we need to use iBuilder->getStride()
     427        Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->CreateMul(iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_STRIDE_SIZE), SIZE_STRIDE_SIZE));
    419428
    420429        Value* targetBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
     
    451460    // Assume we have enough output buffer
    452461    llvm::BasicBlock *LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
    453                                                                     const std::string &bitstreamName,
    454                                                                     llvm::Value *start, llvm::Value *end, bool isOne,
    455                                                                     bool setProduced) {
     462                                                                         const std::string &bitstreamName,
     463                                                                         llvm::Value *start, llvm::Value *end, bool isOne,
     464                                                                         bool setProduced) {
    456465        const unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
    457466        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth);
     
    529538
    530539        Value *targetPtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, SIZE_ZERO, curBlockLocalIndex);
     540        Value *rawInputPointer = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_ZERO), iBuilder->getBitBlockType()->getPointerTo());
     541        Value * ptr = iBuilder->CreateGEP(rawInputPointer, iBuilder->CreateURem(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks())));
     542//        iBuilder->CallPrintInt("targetPtr", targetPtr);
     543//        iBuilder->CallPrintInt("targetPtr2", ptr);
     544        targetPtr = ptr; //TODO workaround here
     545
     546
     547        //TODO fixed circular here
     548
    531549        Value *oldValue = iBuilder->CreateLoad(targetPtr);
    532550        oldValue = iBuilder->CreateBitCast(oldValue, INT_BIT_BLOCK_TY);
     
    557575
    558576    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
    559                                                              const string &outputBufferName, Type *pointerType,
    560                                                              Value *value) {
     577                                                          const string &outputBufferName, Type *pointerType,
     578                                                          Value *value) {
     579
    561580        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    562581        Value* SIZE_ZERO = iBuilder->getSize(0);
     
    564583
    565584        Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
     585//        iBuilder->CallPrintInt("previousProduced", previousProduced);
    566586
    567587        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
     
    575595        Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
    576596        ptr = iBuilder->CreatePointerCast(ptr, pointerType);
     597        ptr = iBuilder->CreateGEP(ptr, blockOffset);
     598
     599        Value* tmpOffset = iBuilder->CreateURem(outputOffset, iBuilder->getSize(this->getAnyStreamSetBuffer(outputBufferName)->getBufferBlocks() * iBuilder->getBitBlockWidth()));
     600        Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(outputBufferName, SIZE_ZERO), pointerType);
     601        Value* ptr2 = iBuilder->CreateGEP(outputRawPtr, tmpOffset);
     602        ptr = ptr2;
     603//        iBuilder->CallPrintInt("ptr", ptr);
     604//        iBuilder->CallPrintInt("ptr2", ptr2);
     605
    577606        // GEP here is safe
    578         iBuilder->CreateStore(value, iBuilder->CreateGEP(ptr, blockOffset));
     607        iBuilder->CreateStore(value, ptr);
     608
     609        if (outputBufferName == "m0End") {
     610//            iBuilder->CallPrintInt("output:m0End", value);
     611        }
    579612
    580613        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
     
    583616
    584617    void LZ4IndexBuilderKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
    585                                                             std::vector<std::string> outputList) {
     618                                                         std::vector<std::string> outputList) {
    586619        previousProducedMap.clear();
    587620        for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
Note: See TracChangeset for help on using the changeset viewer.