Changeset 5923 for icGREP


Ignore:
Timestamp:
Mar 21, 2018, 1:22:57 AM (12 months ago)
Author:
xwa163
Message:

Fix some GEP instructions in lz4_index_builder and lz4_block_decoder_new

Location:
icGREP/icgrep-devel/icgrep
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder_new.cpp

    r5921 r5923  
    4848}
    4949
     50void LZ4BlockDecoderNewKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
     51                                                        std::vector<std::string> outputList) {
     52    previousProducedMap.clear();
     53    for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
     54        previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter)));
     55    }
     56}
     57
    5058void LZ4BlockDecoderNewKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder, Value * const numOfStrides) {
    5159    // Constant
     
    5765    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
    5866    BasicBlock * exitBlock = iBuilder->CreateBasicBlock("exit");
     67
     68    this->resetPreviousProducedMap(iBuilder, {"isCompressed", "blockStart", "blockEnd"});
    5969
    6070    // Skip Header
     
    185195
    186196    Value* LZ4BlockDecoderNewKernel::generateLoadInput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value* offset) {
     197        // The external buffer is always linear accessible, so the GEP here is safe
    187198        Value * inputBufferBasePtr = iBuilder->getRawInputPointer("byteStream", iBuilder->getSize(0));
    188199        Value* targetPtr = iBuilder->CreateGEP(inputBufferBasePtr, offset);
     
    192203    void LZ4BlockDecoderNewKernel::appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, Value* isCompressed, Value* blockStart, Value* blockEnd) {
    193204        // Constant
    194         this->generateStoreCircularOutput(iBuilder, "isCompressed", iBuilder->getInt8Ty()->getPointerTo(), isCompressed);
    195         this->generateStoreCircularOutput(iBuilder, "blockStart", iBuilder->getInt64Ty()->getPointerTo(), blockStart);
    196         this->generateStoreCircularOutput(iBuilder, "blockEnd", iBuilder->getInt64Ty()->getPointerTo(), blockEnd);
    197     }
    198 
    199     void LZ4BlockDecoderNewKernel::generateStoreCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, const string& outputBufferName, Type* pointerType, Value* value) {
    200         Value* offset = iBuilder->getProducedItemCount(outputBufferName);
    201 
    202         size_t inputSize = this->getOutputBufferSize(iBuilder, outputBufferName);
    203         Value* offsetMask = iBuilder->getSize(inputSize - 1);
    204         Value* maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
    205 
    206         Value* outputBufferPtr = iBuilder->getRawOutputPointer(outputBufferName, iBuilder->getSize(0));
    207 
    208         outputBufferPtr = iBuilder->CreatePointerCast(outputBufferPtr, pointerType);
    209         iBuilder->CreateStore(value, iBuilder->CreateGEP(outputBufferPtr, maskedOffset));
    210 
    211         offset = iBuilder->CreateAdd(offset, iBuilder->getSize(1));
    212         iBuilder->setProducedItemCount(outputBufferName, offset);
     205        this->generateStoreNumberOutput(iBuilder, "isCompressed", iBuilder->getInt8Ty()->getPointerTo(), isCompressed);
     206        this->generateStoreNumberOutput(iBuilder, "blockStart", iBuilder->getInt64Ty()->getPointerTo(), blockStart);
     207        this->generateStoreNumberOutput(iBuilder, "blockEnd", iBuilder->getInt64Ty()->getPointerTo(), blockEnd);
     208    }
     209
     210    void LZ4BlockDecoderNewKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
     211                                                             const string &outputBufferName, Type *pointerType,
     212                                                             Value *value) {
     213        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     214        Value* SIZE_ZERO = iBuilder->getSize(0);
     215        Value* SIZE_ONE = iBuilder->getSize(1);
     216
     217        Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
     218
     219        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
     220        Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName);
     221        Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH);
     222
     223        Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH);
     224
     225        // i8, [8 x <4 x i64>]*
     226        // i64, [64 x <4 x i64>]*
     227        Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
     228        ptr = iBuilder->CreatePointerCast(ptr, pointerType);
     229        // GEP here is safe
     230        iBuilder->CreateStore(value, iBuilder->CreateGEP(ptr, blockOffset));
     231
     232        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
    213233    }
    214234
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_block_decoder_new.h

    r5921 r5923  
    1 //
    2 // Created by wxy325 on 2018/3/16.
    3 //
    41
    52#ifndef ICGREP_LZ4_BLOCK_DECODER_NEW_H
     
    85
    96#include "kernels/kernel.h"
     7#include <map>
     8#include <vector>
     9#include <string>
    1010
    1111namespace llvm {
     
    3333    void appendOutput(const std::unique_ptr<KernelBuilder> & iBuilder, llvm::Value *isCompressed, llvm::Value *blockStart, llvm::Value *blockEnd);
    3434
    35     void generateStoreCircularOutput(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string& outputBufferName,
    36                                      llvm::Type *pointerType, llvm::Value *value);
     35    void generateStoreNumberOutput(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string &outputBufferName,
     36                                   llvm::Type *pointerType, llvm::Value *value);
    3737    size_t getOutputBufferSize(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string& bufferName);
     38
     39    std::map<std::string, llvm::Value*> previousProducedMap;
     40
     41    void resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder, std::vector<std::string> outputList);
    3842};
    3943
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r5921 r5923  
    6464//        iBuilder->CallPrintInt("aaa", iBuilder->getProducedItemCount("e1Marker"));
    6565
    66         // Clear Output Buffer
    67         previousE1Produced = iBuilder->getProducedItemCount("e1Marker");
     66
    6867
    6968        BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
    7069        BasicBlock* blockEndConBlock = iBuilder->CreateBasicBlock("blockEndConBlock");
     70
     71        this->resetPreviousProducedMap(iBuilder, {"e1Marker", "m0Start", "m0End", "matchOffset"});
     72
    7173        Value* blockDataIndex = iBuilder->getScalarField("blockDataIndex");
    7274
     
    8082
    8183        iBuilder->SetInsertPoint(blockEndConBlock);
    82         Value* blockEnd = this->generateLoadCircularInput(iBuilder, "blockEnd", blockDataIndex, iBuilder->getInt64Ty()->getPointerTo());
    83 
     84        Value* blockEnd = this->generateLoadInt64NumberInput(iBuilder, "blockEnd", blockDataIndex);
     85        iBuilder->CallPrintInt("blockEnd", blockEnd);
    8486
    8587        Value* totalExtender = iBuilder->CreateAdd(iBuilder->getAvailableItemCount("extender"), iBuilder->getProcessedItemCount("extender"));
     
    9496//        iBuilder->CallPrintInt("availableExtender", iBuilder->getAvailableItemCount("extender"));
    9597//        iBuilder->CallPrintInt("blockDataIndex", blockDataIndex);
    96 //        iBuilder->CallPrintInt("blockEnd", blockEnd);
    97 
    98         Value* blockStart = this->generateLoadCircularInput(iBuilder, "blockStart", blockDataIndex, iBuilder->getInt64Ty()->getPointerTo());
     98
     99
     100        Value* blockStart = this->generateLoadInt64NumberInput(iBuilder, "blockStart", blockDataIndex);
    99101
    100102        BasicBlock* processBlock = iBuilder->CreateBasicBlock("processBlock");
     
    177179                        literalLength),
    178180                iBuilder->getSize(1));
    179 //        iBuilder->CallPrintInt("offsetPos", offsetPos);
     181
     182        // TODO Clear Output Buffer at the beginning instead of marking 0
    180183        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->getProducedItemCount("e1Marker"), iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), false);
    181184        this->markCircularOutputBitstream(iBuilder, "e1Marker", iBuilder->CreateAdd(phiCursorPosAfterLiteral, iBuilder->getSize(1)), offsetPos, true);
     
    261264        );
    262265//        iBuilder->CallPrintInt("matchOffset", matchOffset);
    263         this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
     266        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), outputPos);
    264267//    iBuilder->CallPrintInt("m0Start", outputPos);
    265         this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
     268        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), outputEndPos);
    266269//    iBuilder->CallPrintInt("m0End", outputEndPos);
    267         this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
     270        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), matchOffset);
    268271//    iBuilder->CallPrintInt("matchOffset", matchOffset);
    269272        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
     
    332335        Value* finalM0OutputPos = iBuilder->getScalarField("m0OutputPos");
    333336//        iBuilder->CallPrintInt("finalM0OutputPos", finalM0OutputPos);
    334         this->generateStoreCircularOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
    335         this->generateStoreCircularOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
    336         this->generateStoreCircularOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
     337        this->generateStoreNumberOutput(iBuilder, "m0Start", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
     338        this->generateStoreNumberOutput(iBuilder, "m0End", iBuilder->getInt64Ty()->getPointerTo(), finalM0OutputPos);
     339        this->generateStoreNumberOutput(iBuilder, "matchOffset", iBuilder->getInt64Ty()->getPointerTo(), iBuilder->getInt64(0));
    337340
    338341        iBuilder->CreateBr(processCon);
     
    357360        Type* bitBlockWidthIntTy = iBuilder->getIntNTy(bitBlockWidth);
    358361
    359 
    360         Value* baseOffset = iBuilder->getProcessedItemCount(inputName);
    361         baseOffset = iBuilder->CreateSub(baseOffset, iBuilder->CreateURem(baseOffset, INT64_BIT_BLOCK_WIDTH));
     362        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputName), INT64_BIT_BLOCK_WIDTH);
    362363
    363364        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
    364 
    365 
    366         Value* inputBasePtr = iBuilder->CreatePointerCast(iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO), bitBlockType->getPointerTo());
    367365
    368366        BasicBlock* advanceConBlock = iBuilder->CreateBasicBlock("advanceConBlock");
     
    382380        iBuilder->SetInsertPoint(advanceBodyBlock);
    383381
    384 
    385 //        iBuilder->CallPrintInt("phiCurrentPos", phiCurrentPos);
    386 //        iBuilder->CallPrintInt("baseOffset", baseOffset);
    387         Value* currentPosBitBlockIndex = iBuilder->CreateUDiv(iBuilder->CreateSub(phiCurrentPos, baseOffset), INT64_BIT_BLOCK_WIDTH);
    388 //        iBuilder->CallPrintInt("currentPosBitBlockIndex", currentPosBitBlockIndex);
     382        Value* currentPosBitBlockIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(phiCurrentPos, INT64_BIT_BLOCK_WIDTH), baseInputBlockIndex);
     383
    389384        Value* currentPosBitBlockOffset = iBuilder->CreateURem(phiCurrentPos, INT64_BIT_BLOCK_WIDTH);
    390385
    391         Value* ptr = iBuilder->CreateGEP(inputBasePtr, iBuilder->CreateTruncOrBitCast(currentPosBitBlockIndex, iBuilder->getSizeTy()));
    392 //        iBuilder->CallPrintInt("ptr", ptr);
    393 //        iBuilder->CallPrintInt("blockBasePtr", iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO));
     386        Value* ptr = iBuilder->getInputStreamBlockPtr(inputName, SIZE_ZERO, currentPosBitBlockIndex);
     387
    394388        Value* currentBitValue = iBuilder->CreateBitCast(iBuilder->CreateLoad(ptr), bitBlockWidthIntTy);
    395 //        iBuilder->CallPrintRegister("ptrValue", currentBitValue);
    396389
    397390        currentBitValue = iBuilder->CreateLShr(currentBitValue, iBuilder->CreateZExt(currentPosBitBlockOffset, bitBlockWidthIntTy));
     
    419412    }
    420413
    421     Value *
    422     LZ4IndexBuilderKernel::generateLoadCircularInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName,
    423                                                 Value *offset, Type *pointerType) {
    424         size_t inputSize = this->getInputStreamSetBuffer(inputBufferName)->getBufferBlocks() * iBuilder->getStride();
    425         Value *offsetMask = iBuilder->getSize(inputSize - 1);
    426         Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
    427 
    428         Value *inputBufferPtr = iBuilder->getRawInputPointer(inputBufferName, iBuilder->getSize(0));
    429 
    430         inputBufferPtr = iBuilder->CreatePointerCast(inputBufferPtr, pointerType);
    431         return iBuilder->CreateLoad(iBuilder->CreateGEP(inputBufferPtr, maskedOffset));
     414    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) {
     415        Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     416        Constant* SIZE_ZERO = iBuilder->getSize(0);
     417
     418//        Value* baseInputBlockIndex = iBuilder->CreateUDiv(iBuilder->getProcessedItemCount(inputBufferName), SIZE_BIT_BLOCK_WIDTH);
     419
     420        Value* offset = iBuilder->CreateSub(globalOffset, iBuilder->getProcessedItemCount(inputBufferName));
     421
     422        Value* targetBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
     423        Value* localOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
     424
     425        //[64 x <4 x i64>]*
     426        Value* ptr = iBuilder->getInputStreamBlockPtr(inputBufferName, SIZE_ZERO, targetBlockIndex);
     427        ptr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt64Ty()->getPointerTo());
     428        //GEP here is safe
     429        Value* valuePtr = iBuilder->CreateGEP(ptr, localOffset);
     430        return iBuilder->CreateLoad(valuePtr);
    432431    }
    433432
    434433    Value *LZ4IndexBuilderKernel::generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, Value *offset) {
     434        // The external buffer is always linear accessible, so the GEP here is safe
    435435        Value *blockStartPtr = iBuilder->CreatePointerCast(
    436436                iBuilder->getRawInputPointer("byteStream", iBuilder->getInt32(0)),
     
    438438        );
    439439        Value *ptr = iBuilder->CreateGEP(blockStartPtr, offset);
    440 
    441440        return iBuilder->CreateLoad(ptr);
    442441    }
     
    457456                                                                    llvm::Value *start, llvm::Value *end, bool isOne,
    458457                                                                    bool setProduced) {
    459         Value* originalEnd = end;
    460         Value* baseOffset = iBuilder->CreateSub(previousE1Produced, iBuilder->CreateURem(previousE1Produced, iBuilder->getInt64(iBuilder->getBitBlockWidth())));;
    461 //        iBuilder->CallPrintInt("baseOffset", baseOffset);
    462 //        iBuilder->CallPrintInt("start", start);
    463 //        iBuilder->CallPrintInt("end", end);
    464         start = iBuilder->CreateSub(start, baseOffset);
    465         end = iBuilder->CreateSub(end, baseOffset);
    466         //TODO possible bug here
     458        const unsigned int bitBlockWidth = iBuilder->getBitBlockWidth();
     459        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(bitBlockWidth);
     460        Value* SIZE_ONE = iBuilder->getSize(1);
     461        Value* SIZE_ZERO = iBuilder->getSize(0);
     462        Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(bitBlockWidth);
     463        Type * const BIT_BLOCK_TY = iBuilder->getBitBlockType();
     464        Constant* INT_BIT_BLOCK_ONE = ConstantInt::get(INT_BIT_BLOCK_TY, 1);
     465        Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0);
     466
     467        Value* previousProduced = this->previousProducedMap.find(bitstreamName)->second;
     468        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
     469
    467470        BasicBlock *entryBlock = iBuilder->GetInsertBlock();
    468 
    469 
    470 
    471         Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, iBuilder->getSize(0));
    472 //        iBuilder->CallPrintInt("outputBasePtr", outputBasePtr);
    473 //        iBuilder->CallPrintInt("a", iBuilder->getRawOutputPointer(bitstreamName, iBuilder->getSize(0)));
    474 
    475         outputBasePtr = iBuilder->CreatePointerCast(outputBasePtr, iBuilder->getInt64Ty()->getPointerTo());
    476 
    477 //        size_t outputBufferSize = this->getOutputBufferSize(iBuilder, bitstreamName);
    478 //        Value *outputMask = iBuilder->getSize(outputBufferSize / 64 - 1);
    479 
    480471        BasicBlock *conBlock = iBuilder->CreateBasicBlock("mark_bit_one_con");
    481472        BasicBlock *bodyBlock = iBuilder->CreateBasicBlock("mark_bit_one_body");
    482473        BasicBlock *exitBlock = iBuilder->CreateBasicBlock("mark_bit_one_exit");
    483474
    484         Value *startOffset = iBuilder->CreateLShr(start, iBuilder->getSize(std::log2(64)), "startOffset");
     475        Value* startBlockLocalIndex = iBuilder->CreateSub(iBuilder->CreateUDiv(start, SIZE_BIT_BLOCK_WIDTH), blockIndexBase);
    485476
    486477        iBuilder->CreateBr(conBlock);
     
    489480        iBuilder->SetInsertPoint(conBlock);
    490481
    491 
    492         PHINode *curOffset = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
    493         curOffset->addIncoming(startOffset, entryBlock);
    494 //        iBuilder->CallPrintInt("curOffset", curOffset);
    495 //        iBuilder->CallPrintInt("end", end);
    496 
     482        PHINode *curBlockLocalIndex = iBuilder->CreatePHI(iBuilder->getSizeTy(), 2);
     483        curBlockLocalIndex->addIncoming(startBlockLocalIndex, entryBlock);
    497484        iBuilder->CreateCondBr(
    498                 iBuilder->CreateICmpULT(iBuilder->CreateShl(curOffset, std::log2(64)), end),
     485                iBuilder->CreateICmpULT(iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH), end),
    499486                bodyBlock,
    500487                exitBlock
     
    503490        // Body
    504491        iBuilder->SetInsertPoint(bodyBlock);
    505         Value *maskedOffset = curOffset;
    506492
    507493        Value *outputLowestBitValue = iBuilder->CreateSelect(
    508494                iBuilder->CreateICmpULE(
    509                         iBuilder->CreateShl(curOffset, std::log2(64)),
     495                        iBuilder->CreateMul(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_BIT_BLOCK_WIDTH),
    510496                        start
    511497                ),
    512                 iBuilder->CreateShl(iBuilder->getSize(1), iBuilder->CreateAnd(start, iBuilder->getSize(64 - 1))),
    513                 iBuilder->getSize(1)
     498                iBuilder->CreateShl(INT_BIT_BLOCK_ONE, iBuilder->CreateZExt(iBuilder->CreateURem(start, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)),
     499                INT_BIT_BLOCK_ONE
    514500        );
    515501
    516502        Value *hasNotReachEnd = iBuilder->CreateICmpULE(
    517                 iBuilder->CreateShl(iBuilder->CreateAdd(curOffset, iBuilder->getSize(1)), std::log2(64)),
     503                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
    518504                end
    519505        );
    520506        Value *producedItemsCount = iBuilder->CreateSelect(
    521507                hasNotReachEnd,
    522                 iBuilder->CreateShl(iBuilder->CreateAdd(curOffset, iBuilder->getSize(1)), std::log2(64)),
     508                iBuilder->CreateMul(iBuilder->CreateAdd(iBuilder->CreateAdd(curBlockLocalIndex, blockIndexBase), SIZE_ONE), SIZE_BIT_BLOCK_WIDTH),
    523509                end
    524510        );
    525         producedItemsCount = iBuilder->CreateAdd(producedItemsCount, baseOffset);
     511
    526512
    527513        Value *outputHighestBitValue = iBuilder->CreateSelect(
    528514                hasNotReachEnd,
    529                 iBuilder->getSize(0),
     515                INT_BIT_BLOCK_ZERO,
    530516                iBuilder->CreateShl(
    531                         iBuilder->getSize(1),
    532                         iBuilder->CreateAnd(end, iBuilder->getSize(64 - 1))
     517                        INT_BIT_BLOCK_ONE,
     518                        iBuilder->CreateZExt(iBuilder->CreateURem(end, SIZE_BIT_BLOCK_WIDTH), INT_BIT_BLOCK_TY)
    533519                )
    534520        );
     
    544530        }
    545531
    546         Value *targetPtr = iBuilder->CreateGEP(outputBasePtr, maskedOffset);
    547 //        iBuilder->CallPrintInt("maskedOffset", maskedOffset);
     532        Value *targetPtr = iBuilder->getOutputStreamBlockPtr(bitstreamName, SIZE_ZERO, curBlockLocalIndex);
    548533        Value *oldValue = iBuilder->CreateLoad(targetPtr);
     534        oldValue = iBuilder->CreateBitCast(oldValue, INT_BIT_BLOCK_TY);
    549535        Value *newValue = NULL;
    550536        if (isOne) {
     
    553539            newValue = iBuilder->CreateAnd(oldValue, bitMask);
    554540        }
     541
    555542        iBuilder->CreateStore(
    556                 newValue,
     543                iBuilder->CreateBitCast(newValue, BIT_BLOCK_TY),
    557544                targetPtr
    558545        );
    559 //        iBuilder->CallPrintInt("targetPtr", targetPtr);
    560546        if (setProduced) {
    561547            iBuilder->setProducedItemCount(bitstreamName, producedItemsCount);
    562548        }
    563549
    564         curOffset->addIncoming(iBuilder->CreateAdd(curOffset, iBuilder->getSize(1)), bodyBlock);
     550        curBlockLocalIndex->addIncoming(iBuilder->CreateAdd(curBlockLocalIndex, SIZE_ONE), bodyBlock);
    565551        iBuilder->CreateBr(conBlock);
    566552
     
    571557
    572558
    573     void
    574     LZ4IndexBuilderKernel::generateStoreCircularOutput(const unique_ptr<KernelBuilder> &iBuilder, string outputBufferName,
    575                                                   Type *pointerType, Value *value) {
    576         //TODO possible bug here
    577         Value *offset = iBuilder->getProducedItemCount(outputBufferName);
    578 
    579         size_t inputSize = this->getOutputBufferSize(iBuilder, outputBufferName);
    580         Value *offsetMask = iBuilder->getSize(inputSize - 1);
    581         Value *maskedOffset = iBuilder->CreateAnd(offsetMask, offset);
    582 
    583         Value *outputBufferPtr = iBuilder->getRawOutputPointer(outputBufferName, iBuilder->getSize(0));
    584 
    585         outputBufferPtr = iBuilder->CreatePointerCast(outputBufferPtr, pointerType);
    586         iBuilder->CreateStore(value, iBuilder->CreateGEP(outputBufferPtr, maskedOffset));
    587 
    588         offset = iBuilder->CreateAdd(offset, iBuilder->getSize(1));
    589         iBuilder->setProducedItemCount(outputBufferName, offset);
     559
     560    void LZ4IndexBuilderKernel::generateStoreNumberOutput(const unique_ptr<KernelBuilder> &iBuilder,
     561                                                             const string &outputBufferName, Type *pointerType,
     562                                                             Value *value) {
     563        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     564        Value* SIZE_ZERO = iBuilder->getSize(0);
     565        Value* SIZE_ONE = iBuilder->getSize(1);
     566
     567        Value* previousProduced = previousProducedMap.find(outputBufferName)->second;
     568
     569        Value* blockIndexBase = iBuilder->CreateUDiv(previousProduced, SIZE_BIT_BLOCK_WIDTH);
     570        Value* outputOffset = iBuilder->getProducedItemCount(outputBufferName);
     571        Value* blockIndex = iBuilder->CreateUDiv(outputOffset, SIZE_BIT_BLOCK_WIDTH);
     572
     573        Value* blockOffset = iBuilder->CreateURem(outputOffset, SIZE_BIT_BLOCK_WIDTH);
     574
     575        // i8, [8 x <4 x i64>]*
     576        // i64, [64 x <4 x i64>]*
     577        Value* ptr = iBuilder->getOutputStreamBlockPtr(outputBufferName, SIZE_ZERO, iBuilder->CreateSub(blockIndex, blockIndexBase));
     578        ptr = iBuilder->CreatePointerCast(ptr, pointerType);
     579        // GEP here is safe
     580        iBuilder->CreateStore(value, iBuilder->CreateGEP(ptr, blockOffset));
     581
     582        iBuilder->setProducedItemCount(outputBufferName, iBuilder->CreateAdd(outputOffset, SIZE_ONE));
     583    }
     584
     585
     586    void LZ4IndexBuilderKernel::resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder,
     587                                                            std::vector<std::string> outputList) {
     588        previousProducedMap.clear();
     589        for (auto iter = outputList.begin(); iter != outputList.end(); ++iter) {
     590            previousProducedMap.insert(std::make_pair(*iter, iBuilder->getProducedItemCount(*iter)));
     591        }
    590592    }
    591593}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.h

    r5921 r5923  
    88#include "kernels/kernel.h"
    99#include <string>
     10#include <map>
     11#include <vector>
    1012
    1113namespace llvm {
     
    2830
    2931    private:
    30         llvm::Value * generateLoadCircularInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputBufferName,
    31                                                          llvm::Value *offset, llvm::Type *pointerType);
    32         void generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* blockStart, llvm::Value* blockEnd);
     32        llvm::Value *
     33        generateLoadInt64NumberInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputBufferName,
     34                                     llvm::Value *globalOffset);
     35
     36        void generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *blockStart,
     37                                            llvm::Value *blockEnd);
    3338
    3439        llvm::Value *generateLoadSourceInputByte(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *offset);
    3540
    36         llvm::Value *advanceUntilNextZero(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName, llvm::Value* startPos, llvm::Value* maxPos = nullptr);
    37         llvm::Value *advanceUntilNextOne(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName, llvm::Value* startPos, llvm::Value* maxPos = nullptr);
    38         llvm::Value *advanceUntilNextValue(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName, llvm::Value* startPos, bool isNextZero, llvm::Value* maxPos = nullptr);
    39         void increaseScalarField(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string &fieldName, llvm::Value *value);
    40         llvm::Value* processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* token, llvm::Value* tokenPos, llvm::Value* blockEnd);
    41         llvm::Value* processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* offsetPos, llvm::Value* token, llvm::Value* blockEnd);
     41        llvm::Value *advanceUntilNextZero(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName,
     42                                          llvm::Value *startPos, llvm::Value *maxPos = nullptr);
     43
     44        llvm::Value *advanceUntilNextOne(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName,
     45                                         llvm::Value *startPos, llvm::Value *maxPos = nullptr);
     46
     47        llvm::Value *advanceUntilNextValue(const std::unique_ptr<KernelBuilder> &iBuilder, std::string inputName,
     48                                           llvm::Value *startPos, bool isNextZero, llvm::Value *maxPos = nullptr);
     49
     50        void increaseScalarField(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string &fieldName,
     51                                 llvm::Value *value);
     52
     53        llvm::Value *
     54        processLiteral(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *token, llvm::Value *tokenPos,
     55                       llvm::Value *blockEnd);
     56
     57        llvm::Value *
     58        processMatch(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *offsetPos, llvm::Value *token,
     59                     llvm::Value *blockEnd);
    4260
    4361
    4462        size_t getOutputBufferSize(const std::unique_ptr<KernelBuilder> &iBuilder, std::string bufferName);
     63
    4564        llvm::BasicBlock *markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
    46                                                                              const std::string &bitstreamName,
    47                                                                              llvm::Value *start, llvm::Value *end, bool isOne,
    48                                                                              bool setProduced = true);
    49         void generateStoreCircularOutput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string outputBufferName, llvm::Type* pointerType, llvm::Value* value);
     65                                                      const std::string &bitstreamName,
     66                                                      llvm::Value *start, llvm::Value *end, bool isOne,
     67                                                      bool setProduced = true);
    5068
    51         llvm::Value* previousE1Produced ;
     69        void generateStoreNumberOutput(const std::unique_ptr<KernelBuilder> &iBuilder,
     70                                       const std::string &outputBufferName, llvm::Type *pointerType,
     71                                       llvm::Value *value);
     72
     73        void resetPreviousProducedMap(const std::unique_ptr<KernelBuilder> &iBuilder, std::vector<std::string> outputList);
     74        std::map<std::string, llvm::Value*> previousProducedMap;
    5275    };
    5376}
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GeneratorNew.cpp

    r5921 r5923  
    5757void LZ4GeneratorNew::generateExtractAndDepositMarkers(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    5858    //// Decode Block Information
    59     StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), this->get4MbBufferBlocks());
     59    StreamSetBuffer * const BlockData_IsCompressed = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), this->get4MbBufferBlocks());
    6060    StreamSetBuffer * const BlockData_BlockStart = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->get4MbBufferBlocks());
    6161    StreamSetBuffer * const BlockData_BlockEnd = pxDriver.addBuffer<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 64), this->get4MbBufferBlocks());
Note: See TracChangeset for help on using the changeset viewer.