Changeset 6042


Ignore:
Timestamp:
May 16, 2018, 11:52:10 AM (5 days ago)
Author:
xwa163
Message:

Improve the performance of output producing logic in LZ4IndexBuilderKernel

Location:
icGREP/icgrep-devel/icgrep/kernels/lz4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r6039 r6042  
    5454           Binding{b->getSizeTy(), "blockDataIndex"},
    5555           Binding{b->getInt64Ty(), "m0OutputPos"},
    56            Binding{b->getInt64Ty(), "compressedSpaceClearPos"},
    57 
    58            // For M0 output
     56
     57           // For MatchOffset Output
     58           Binding{b->getIntNTy(64), "pendingMatchOffsetMarkerBits"},
     59           Binding{b->getInt64Ty(), "pendingMarchOffsetMarkerIndex"},
     60
     61           // For deletionMarker output
     62           Binding{b->getIntNTy(64), "pendingDeletionMarkerStartBits"},
     63           Binding{b->getIntNTy(64), "pendingDeletionMarkerEndBits"},
     64           Binding{b->getIntNTy(64), "pendingDeletionMarkerCarryBit"},
     65           Binding{b->getInt64Ty(), "pendingDeletionMarkerIndex"},
     66
     67           // For M0 Output
    5968           Binding{b->getIntNTy(64), "pendingM0StartBits"},
    6069           Binding{b->getIntNTy(64), "pendingM0EndBits"},
     
    94103        this->generateProcessCompressedBlock(b, blockStart, blockEnd);
    95104        this->storePendingM0(b);
     105        this->storePendingDeletionMarker(b);
     106        this->storePendingMatchOffsetMarker(b);
    96107        Value * newBlockDataIndex = b->CreateAdd(blockDataIndex, b->getInt64(1));
    97108        b->setScalarField("blockDataIndex", newBlockDataIndex);
    98109        b->setProcessedItemCount("isCompressed", newBlockDataIndex);
    99 //        b->setProcessedItemCount("blockEnd", newBlockDataIndex);
    100 //        b->setProcessedItemCount("blockStart", newBlockDataIndex);
    101110
    102111        b->setProcessedItemCount("byteStream", blockEnd);
     
    107116
    108117    Value* LZ4IndexBuilderKernel::processLiteral(const std::unique_ptr<KernelBuilder> &b, Value* token, Value* tokenPos, Value* blockEnd) {
    109 //        b->CallPrintInt("blockEnd", blockEnd);
    110118        BasicBlock* entryBlock = b->GetInsertBlock();
    111119
     
    163171                b->getSize(1));
    164172
    165         this->setCircularOutputBitstream(b, "deletionMarker", b->getProducedItemCount("deletionMarker"), b->CreateAdd(phiCursorPosAfterLiteral, b->getSize(1)));
     173        this->appendDeletionMarkerOutput(b, b->getProducedItemCount("deletionMarker"), b->CreateAdd(phiCursorPosAfterLiteral, b->getSize(1)));
    166174
    167175        b->setProducedItemCount("deletionMarker", offsetPos);
     
    198206
    199207        Value* oldMatchExtensionSize = iBuilder->CreateSub(phiCursorPosAfterMatch, extendMatchStartPos);
    200 //        extendedMatchValue = iBuilder->CreateICmpEQ(iBuilder->CreateAnd(token, iBuilder->getInt8(0xf)), iBuilder->getInt8(0xf));
    201208        Value* matchExtensionSize = iBuilder->CreateSelect(extendedMatchValue, oldMatchExtensionSize, iBuilder->getSize(0));
    202209        Value* matchLengthBase = iBuilder->CreateZExt(iBuilder->CreateAnd(token, iBuilder->getInt8(0x0f)), iBuilder->getInt64Ty());
     
    236243
    237244
    238         this->markCircularOutputBitstream(iBuilder, "MatchOffsetMarker", offsetPos);
     245        this->appendMatchOffsetMarkerOutput(iBuilder, offsetPos);
    239246        this->increaseScalarField(iBuilder, "m0OutputPos", matchLength);
    240 //        this->setCircularOutputBitstream(iBuilder, "M0Marker", outputPos, outputEndPos);
    241247        this->appendM0Output(iBuilder, outputPos, outputEndPos);
    242248
     
    245251
    246252    void LZ4IndexBuilderKernel::generateProcessCompressedBlock(const std::unique_ptr<KernelBuilder> &iBuilder, Value* blockStart, Value* blockEnd) {
    247         Value* clearPos = iBuilder->getScalarField("compressedSpaceClearPos");
    248         // We can not only clear [blockStart, blockEnd), since there are 4 bytes between blockEnd and nextBlockStart
    249         this->clearCircularOutputBitstream(iBuilder, "deletionMarker", clearPos, blockEnd);
    250         this->clearCircularOutputBitstream(iBuilder, "MatchOffsetMarker", clearPos, blockEnd);
    251         iBuilder->setScalarField("compressedSpaceClearPos", blockEnd);
    252 
    253253        BasicBlock* entryBlock = iBuilder->GetInsertBlock();
    254 
    255 //        Value* m0OutputBlockPtr = iBuilder->getOutputStreamBlockPtr("M0Marker", iBuilder->getSize(0));
    256 //        iBuilder->CreateMemSet(m0OutputBlockPtr, iBuilder->getInt8(0), 4 * 1024 * 1024 / 8, true);
    257254
    258255
     
    388385        fieldValue = iBuilder->CreateAdd(fieldValue, value);
    389386        iBuilder->setScalarField(fieldName, fieldValue);
    390     }
    391 
    392 
    393     void LZ4IndexBuilderKernel::clearCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
    394                                                              const std::string &bitstreamName,
    395                                                              llvm::Value *start, llvm::Value *end) {
    396         //TODO currently we assume that start/end pos is not in the same byte because of the requirement of the LZ4 format
    397         Value* SIZE_0 = iBuilder->getSize(0);
    398         Value* SIZE_8 = iBuilder->getSize(8);
    399         Value* INT8_0 = iBuilder->getInt8(0);
    400         Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
    401 
    402         Value* outputBufferBytes = iBuilder->CreateUDiv(iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth()), SIZE_8);
    403         Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
    404 
    405         Value* startRemain = iBuilder->CreateURem(start, SIZE_8);
    406         Value* startBytePos = iBuilder->CreateUDiv(start, SIZE_8);
    407         Value* endRemain = iBuilder->CreateURem(end, SIZE_8);
    408         Value* endBytePos = iBuilder->CreateUDiv(end, SIZE_8);
    409 
    410         BasicBlock* startByteCpyBlock = iBuilder->CreateBasicBlock("startByteCpyBlock");
    411         BasicBlock* endByteCpyConBlock = iBuilder->CreateBasicBlock("endByteCpyConBlock");
    412         BasicBlock* endByteCpyBlock = iBuilder->CreateBasicBlock("endByteCpyBlock");
    413         BasicBlock* memsetBlock = iBuilder->CreateBasicBlock("memsetBlock");
    414 
    415         iBuilder->CreateCondBr(iBuilder->CreateICmpNE(startRemain, SIZE_0), startByteCpyBlock, endByteCpyConBlock);
    416 
    417         // Clear highest {startShiftAmount} bits
    418         iBuilder->SetInsertPoint(startByteCpyBlock);
    419         Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
    420         Value* startValue = iBuilder->CreateLoad(startPtr);
    421 
    422         Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
    423         startShiftAmount = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
    424         startValue = iBuilder->CreateLShr(iBuilder->CreateShl(startValue, startShiftAmount), startShiftAmount);
    425 
    426         iBuilder->CreateStore(startValue, startPtr);
    427         iBuilder->CreateBr(endByteCpyConBlock);
    428 
    429         iBuilder->SetInsertPoint(endByteCpyConBlock);
    430         iBuilder->CreateCondBr(iBuilder->CreateICmpNE(endBytePos, SIZE_0), endByteCpyBlock, memsetBlock);
    431 
    432         // Clear lowest {endRemain} bits
    433         iBuilder->SetInsertPoint(endByteCpyBlock);
    434         Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
    435         Value* endValue = iBuilder->CreateLoad(endPtr);
    436         endRemain = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
    437         endValue = iBuilder->CreateShl(iBuilder->CreateLShr(endValue, endRemain), endRemain);
    438         iBuilder->CreateStore(endValue, endPtr);
    439         iBuilder->CreateBr(memsetBlock);
    440 
    441         iBuilder->SetInsertPoint(memsetBlock);
    442         Value* memsetStartByte = iBuilder->CreateUDivCeil(start, SIZE_8);
    443         Value* memsetEndByte = endBytePos;
    444 
    445         Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
    446 
    447         memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
    448         // We always assume that  (memsetEndByte - memsetStartByte) < outputBufferBytes
    449 
    450         Value* memsetStartByteRem = iBuilder->CreateURem(memsetStartByte, outputBufferBytes);
    451 
    452         Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
    453         Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
    454 
    455         iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), INT8_0, memsetSize1, true);
    456         iBuilder->CreateMemSet(rawOutputPtr, INT8_0, memsetSize2, true);
    457     }
    458 
    459     void LZ4IndexBuilderKernel::setCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
    460                                                              const std::string &bitstreamName,
    461                                                              llvm::Value *start, llvm::Value *end) {
    462         BasicBlock* exitBlock = iBuilder->CreateBasicBlock("exitBlock");
    463 
    464         Value* SIZE_0 = iBuilder->getSize(0);
    465         Value* SIZE_1 = iBuilder->getSize(1);
    466         Value* SIZE_8 = iBuilder->getSize(8);
    467 //        Value* INT8_0 = iBuilder->getInt8(0);
    468 //        Value* INT8_1 = iBuilder->getInt8(1);
    469         Type* INT8_PTR_TY = iBuilder->getInt8PtrTy();
    470 
    471         Value* outputBufferBytes = iBuilder->getSize(this->getAnyStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
    472         Value* rawOutputPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), INT8_PTR_TY);
    473 
    474         Value* startRemain = iBuilder->CreateURem(start, SIZE_8);
    475         Value* startBytePos = iBuilder->CreateUDiv(start, SIZE_8);
    476         Value* endRemain = iBuilder->CreateURem(end, SIZE_8);
    477         Value* endBytePos = iBuilder->CreateUDiv(end, SIZE_8);
    478         Value* startShiftAmount = iBuilder->CreateSub(SIZE_8, startRemain);
    479 
    480         BasicBlock* shortSetBlock = iBuilder->CreateBasicBlock("shortSetBlock");
    481         BasicBlock* longSetBlock = iBuilder->CreateBasicBlock("longSetBlock");
    482 
    483 //        iBuilder->CreateBr(startByteCpyBlock);
    484         iBuilder->CreateCondBr(iBuilder->CreateICmpEQ(startBytePos, endBytePos), shortSetBlock, longSetBlock);
    485 
    486         // When startPos and endPos are in the same byte
    487         iBuilder->SetInsertPoint(shortSetBlock);
    488         Value* targetPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
    489         Value* targetValue = iBuilder->CreateLoad(targetPtr);
    490         Value* rangeMask = iBuilder->CreateSub(iBuilder->CreateShl(SIZE_1, endRemain), iBuilder->CreateShl(SIZE_1, startRemain));
    491         rangeMask = iBuilder->CreateZExtOrTrunc(rangeMask, targetValue->getType());
    492         targetValue = iBuilder->CreateOr(rangeMask, targetValue);
    493 
    494 //        targetValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(targetValue), startShiftAmount), startShiftAmount));
    495 //        targetValue = iBuilder->CreateShl(iBuilder->CreateLShr(targetValue, endRemain), endRemain);
    496         iBuilder->CreateStore(targetValue, targetPtr);
    497         iBuilder->CreateBr(exitBlock);
    498 
    499         iBuilder->SetInsertPoint(longSetBlock);
    500 
    501         BasicBlock* startByteCpyBlock = iBuilder->CreateBasicBlock("startByteCpyBlock");
    502         BasicBlock* endByteCpyConBlock = iBuilder->CreateBasicBlock("endByteCpyConBlock");
    503         BasicBlock* endByteCpyBlock = iBuilder->CreateBasicBlock("endByteCpyBlock");
    504         BasicBlock* memsetBlock = iBuilder->CreateBasicBlock("memsetBlock");
    505 
    506         iBuilder->CreateCondBr(iBuilder->CreateICmpNE(startRemain, SIZE_0), startByteCpyBlock, endByteCpyConBlock);
    507         // Clear highest {startShiftAmount} bits
    508         iBuilder->SetInsertPoint(startByteCpyBlock);
    509         Value* startPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(startBytePos, outputBufferBytes));
    510         Value* startValue = iBuilder->CreateLoad(startPtr);
    511 
    512         Value* startShiftAmount2 = iBuilder->CreateZExtOrTrunc(startShiftAmount, startValue->getType());
    513         startValue = iBuilder->CreateNot(iBuilder->CreateLShr(iBuilder->CreateShl(iBuilder->CreateNot(startValue), startShiftAmount2), startShiftAmount2));
    514 
    515         iBuilder->CreateStore(startValue, startPtr);
    516         iBuilder->CreateBr(endByteCpyConBlock);
    517 
    518         iBuilder->SetInsertPoint(endByteCpyConBlock);
    519         iBuilder->CreateCondBr(iBuilder->CreateICmpNE(endBytePos, SIZE_0), endByteCpyBlock, memsetBlock);
    520 
    521         // Clear lowest {endRemain} bits
    522         iBuilder->SetInsertPoint(endByteCpyBlock);
    523         Value* endPtr = iBuilder->CreateGEP(rawOutputPtr, iBuilder->CreateURem(endBytePos, outputBufferBytes));
    524         Value* endValue = iBuilder->CreateLoad(endPtr);
    525         Value* endRemain2 = iBuilder->CreateZExtOrTrunc(endRemain, endValue->getType());
    526         endValue = iBuilder->CreateNot(iBuilder->CreateShl(iBuilder->CreateLShr(iBuilder->CreateNot(endValue), endRemain2), endRemain2));
    527         iBuilder->CreateStore(endValue, endPtr);
    528         iBuilder->CreateBr(memsetBlock);
    529 
    530         iBuilder->SetInsertPoint(memsetBlock);
    531         Value* memsetStartByte = iBuilder->CreateUDivCeil(start, SIZE_8);
    532         Value* memsetEndByte = endBytePos;
    533 
    534         Value* memsetSize = iBuilder->CreateSub(memsetEndByte, memsetStartByte);
    535 
    536         memsetSize = iBuilder->CreateUMin(memsetSize, outputBufferBytes);
    537 
    538         // We always assume that  (memsetEndByte - memsetStartByte) < outputBufferBytes
    539 
    540         Value* memsetStartByteRem = iBuilder->CreateURem(memsetStartByte, outputBufferBytes);
    541 
    542         Value* memsetSize1 = iBuilder->CreateUMin(iBuilder->CreateSub(outputBufferBytes, memsetStartByteRem), memsetSize);
    543         Value* memsetSize2 = iBuilder->CreateSub(memsetSize, memsetSize1);
    544 
    545         iBuilder->CreateMemSet(iBuilder->CreateGEP(rawOutputPtr, memsetStartByteRem), iBuilder->getInt8(0xff), memsetSize1, true);
    546         iBuilder->CreateMemSet(rawOutputPtr, iBuilder->getInt8(0xff), memsetSize2, true);
    547         iBuilder->CreateBr(exitBlock);
    548 
    549         iBuilder->SetInsertPoint(exitBlock);
    550     }
    551 
    552     void LZ4IndexBuilderKernel::markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder, const string &bitstreamName, Value *pos) {
    553         Value* SIZE_0 = iBuilder->getSize(0);
    554         Value* SIZE_8 = iBuilder->getSize(8);
    555         Value* INT8_1 = iBuilder->getInt8(1);
    556         Type* bytePtrType = iBuilder->getInt8PtrTy();
    557 
    558         Value* outputBufferBytes = iBuilder->getSize(this->getOutputStreamSetBuffer(bitstreamName)->getBufferBlocks() * iBuilder->getBitBlockWidth() / 8);
    559 
    560         Value* bytePos = iBuilder->CreateUDiv(pos, SIZE_8);
    561         bytePos = iBuilder->CreateURem(bytePos, outputBufferBytes);
    562         Value* byteOffset = iBuilder->CreateTrunc(iBuilder->CreateURem(pos, SIZE_8), iBuilder->getInt8Ty());
    563 
    564         Value* outputRawPtr = iBuilder->CreatePointerCast(iBuilder->getRawOutputPointer(bitstreamName, SIZE_0), bytePtrType);
    565         Value* outputTargetPtr = iBuilder->CreateGEP(outputRawPtr, bytePos);
    566 
    567         Value* targetValue = iBuilder->CreateLoad(outputTargetPtr);
    568         targetValue = iBuilder->CreateOr(targetValue, iBuilder->CreateShl(INT8_1, byteOffset));
    569         iBuilder->CreateStore(targetValue, outputTargetPtr);
    570387    }
    571388
     
    655472    }
    656473
     474    void LZ4IndexBuilderKernel::appendDeletionMarkerOutput(const std::unique_ptr<KernelBuilder> &b,
     475                                                           llvm::Value *start, llvm::Value *end) {
     476        // ---- Entry
     477        // Constant
     478
     479        int fw = 64;
     480        BasicBlock* entryBlock = b->GetInsertBlock();
     481        Value* SIZE_1 = b->getSize(1);
     482        Value* SIZE_256 = b->getSize(fw);
     483        Value* INT256_0 = b->getIntN(fw, 0);
     484        Value* INT256_1 = b->getIntN(fw, 1);
     485
     486        Value* startBlockIndex = b->CreateUDiv(start, SIZE_256);
     487        Value* startOffset = b->CreateZExt(b->CreateURem(start, SIZE_256), b->getIntNTy(fw));
     488        Value* endBlockIndex = b->CreateUDiv(end, SIZE_256);
     489        Value* endOffset = b->CreateZExt(b->CreateURem(end, SIZE_256), b->getIntNTy(fw));
     490
     491
     492        BasicBlock* appendDeletionMarkerCon = b->CreateBasicBlock("appendDeletionMarkerCon");
     493        BasicBlock* appendDeletionMarkerBody = b->CreateBasicBlock("appendDeletionMarkerBody");
     494        BasicBlock* appendDeletionMarkerExit = b->CreateBasicBlock("appendDeletionMarkerExit");
     495
     496        Value* pendingDeletionMarkerIndex = b->getScalarField("pendingDeletionMarkerIndex");
     497        Value* pendingDeletionMarkerStartBits = b->getScalarField("pendingDeletionMarkerStartBits");
     498        Value* pendingDeletionMarkerEndBits = b->getScalarField("pendingDeletionMarkerEndBits");
     499        Value* pendingDeletionMarkerCarryBit = b->getScalarField("pendingDeletionMarkerCarryBit");
     500
     501        b->CreateBr(appendDeletionMarkerCon);
     502
     503        // ---- AppendM0Con
     504        b->SetInsertPoint(appendDeletionMarkerCon);
     505        PHINode* phiCurrentIndex = b->CreatePHI(b->getSizeTy(), 2);
     506        phiCurrentIndex->addIncoming(pendingDeletionMarkerIndex, entryBlock);
     507        PHINode* phiStartBits = b->CreatePHI(b->getIntNTy(fw), 2);
     508        phiStartBits->addIncoming(pendingDeletionMarkerStartBits, entryBlock);
     509        PHINode* phiEndBits = b->CreatePHI(b->getIntNTy(fw), 2);
     510        phiEndBits->addIncoming(pendingDeletionMarkerEndBits, entryBlock);
     511        PHINode* phiCarryBit = b->CreatePHI(b->getIntNTy(fw), 2);
     512        phiCarryBit->addIncoming(pendingDeletionMarkerCarryBit, entryBlock);
     513
     514
     515        b->CreateUnlikelyCondBr(b->CreateICmpULT(phiCurrentIndex, endBlockIndex), appendDeletionMarkerBody, appendDeletionMarkerExit);
     516        // ---- AppendM0Body
     517        b->SetInsertPoint(appendDeletionMarkerBody);
     518        Value* actualStartBits = b->CreateSelect(b->CreateICmpEQ(phiCurrentIndex, startBlockIndex), b->CreateOr(phiStartBits, b->CreateShl(INT256_1, startOffset)), phiStartBits);
     519        Value* outputValue = b->CreateSub(b->CreateSub(phiEndBits, actualStartBits), phiCarryBit);
     520        Value* newCarryBit = b->CreateZExt(b->CreateICmpUGT(b->CreateAdd(actualStartBits, phiCarryBit), phiEndBits), b->getIntNTy(fw));
     521
     522        this->storeDeletionMarker(b, phiCurrentIndex, outputValue);
     523
     524        phiCurrentIndex->addIncoming(b->CreateAdd(phiCurrentIndex, SIZE_1), b->GetInsertBlock());
     525        phiStartBits->addIncoming(INT256_0, b->GetInsertBlock());
     526        phiEndBits->addIncoming(INT256_0, b->GetInsertBlock());
     527        phiCarryBit->addIncoming(newCarryBit, b->GetInsertBlock());
     528
     529        b->CreateBr(appendDeletionMarkerCon);
     530
     531        // ---- AppendM0Exit
     532        b->SetInsertPoint(appendDeletionMarkerExit);
     533        Value* finalStartBits = b->CreateSelect(b->CreateICmpEQ(phiCurrentIndex, startBlockIndex), b->CreateOr(phiStartBits, b->CreateShl(INT256_1, startOffset)), phiStartBits);
     534        Value* finalEndBits = b->CreateOr(phiEndBits, b->CreateShl(INT256_1, endOffset));
     535        b->setScalarField("pendingDeletionMarkerIndex", phiCurrentIndex);
     536        b->setScalarField("pendingDeletionMarkerStartBits", finalStartBits);
     537        b->setScalarField("pendingDeletionMarkerEndBits", finalEndBits);
     538        b->setScalarField("pendingDeletionMarkerCarryBit", phiCarryBit);
     539    }
     540
     541    void
     542    LZ4IndexBuilderKernel::storeDeletionMarker(const std::unique_ptr<KernelBuilder> &b, llvm::Value *blockIndex,
     543                                               llvm::Value *value) {
     544        int fw = 64;
     545        Value* m0BufferBlocks = b->getSize(this->getOutputStreamSetBuffer("deletionMarker")->getBufferBlocks() * b->getBitBlockWidth() / fw);
     546        Value* indexRem = b->CreateURem(blockIndex, m0BufferBlocks);
     547
     548        Value* outputBasePtr = b->CreatePointerCast(b->getRawOutputPointer("deletionMarker", b->getSize(0)), b->getIntNTy(fw)->getPointerTo());
     549        b->CreateStore(value, b->CreateGEP(outputBasePtr, indexRem));
     550    }
     551
     552    void LZ4IndexBuilderKernel::storePendingDeletionMarker(const std::unique_ptr<KernelBuilder> &b) {
     553        Value* outputValue = b->CreateSub(
     554                b->CreateSub(
     555                        b->getScalarField("pendingDeletionMarkerEndBits"),
     556                        b->getScalarField("pendingDeletionMarkerStartBits")
     557                ),
     558                b->getScalarField("pendingDeletionMarkerCarryBit")
     559        );
     560        this->storeDeletionMarker(b, b->getScalarField("pendingDeletionMarkerIndex"), outputValue);
     561    }
     562
     563    void LZ4IndexBuilderKernel::appendMatchOffsetMarkerOutput(const std::unique_ptr<KernelBuilder> &b,
     564                                                              llvm::Value *position) {
     565        // ---- Entry
     566        // Constant
     567        int fw = 64;
     568        BasicBlock* entryBlock = b->GetInsertBlock();
     569        Value* SIZE_1 = b->getSize(1);
     570        Value* SIZE_256 = b->getSize(fw);
     571        Value* INT256_0 = b->getIntN(fw, 0);
     572        Value* INT256_1 = b->getIntN(fw, 1);
     573
     574        Value* endBlockIndex = b->CreateUDiv(position, SIZE_256);
     575        Value* endOffset = b->CreateZExt(b->CreateURem(position, SIZE_256), b->getIntNTy(fw));
     576
     577        BasicBlock* appendMatchOffsetMarkerCon = b->CreateBasicBlock("appendMatchOffsetMarkerCon");
     578        BasicBlock* appendMatchOffsetMarkerBody = b->CreateBasicBlock("appendMatchOffsetMarkerBody");
     579        BasicBlock* appendMatchOffsetMarkerExit = b->CreateBasicBlock("appendMatchOffsetMarkerExit");
     580
     581        Value* pendingMatchOffsetMarkerIndex = b->getScalarField("pendingMarchOffsetMarkerIndex");
     582        Value* pendingMatchOffsetMarkerEndBits = b->getScalarField("pendingMatchOffsetMarkerBits");
     583
     584        b->CreateBr(appendMatchOffsetMarkerCon);
     585
     586        // ---- AppendM0Con
     587        b->SetInsertPoint(appendMatchOffsetMarkerCon);
     588        PHINode* phiCurrentIndex = b->CreatePHI(b->getSizeTy(), 2);
     589        phiCurrentIndex->addIncoming(pendingMatchOffsetMarkerIndex, entryBlock);
     590        PHINode* phiEndBits = b->CreatePHI(b->getIntNTy(fw), 2);
     591        phiEndBits->addIncoming(pendingMatchOffsetMarkerEndBits, entryBlock);
     592
     593        b->CreateUnlikelyCondBr(b->CreateICmpULT(phiCurrentIndex, endBlockIndex), appendMatchOffsetMarkerBody, appendMatchOffsetMarkerExit);
     594        // ---- AppendM0Body
     595        b->SetInsertPoint(appendMatchOffsetMarkerBody);
     596        this->storeMatchOffsetMarker(b, phiCurrentIndex, phiEndBits);
     597        phiCurrentIndex->addIncoming(b->CreateAdd(phiCurrentIndex, SIZE_1), b->GetInsertBlock());
     598        phiEndBits->addIncoming(INT256_0, b->GetInsertBlock());
     599
     600        b->CreateBr(appendMatchOffsetMarkerCon);
     601
     602        // ---- AppendM0Exit
     603        b->SetInsertPoint(appendMatchOffsetMarkerExit);
     604        Value* finalEndBits = b->CreateOr(phiEndBits, b->CreateShl(INT256_1, endOffset));
     605        b->setScalarField("pendingMarchOffsetMarkerIndex", phiCurrentIndex);
     606        b->setScalarField("pendingMatchOffsetMarkerBits", finalEndBits);
     607    }
     608
     609    void LZ4IndexBuilderKernel::storeMatchOffsetMarker(const std::unique_ptr<KernelBuilder> &b,
     610                                                       llvm::Value *blockIndex, llvm::Value *value) {
     611        int fw = 64;
     612        Value* m0BufferBlocks = b->getSize(this->getOutputStreamSetBuffer("MatchOffsetMarker")->getBufferBlocks() * b->getBitBlockWidth() / fw);
     613        Value* indexRem = b->CreateURem(blockIndex, m0BufferBlocks);
     614
     615        Value* outputBasePtr = b->CreatePointerCast(b->getRawOutputPointer("MatchOffsetMarker", b->getSize(0)), b->getIntNTy(fw)->getPointerTo());
     616        b->CreateStore(value, b->CreateGEP(outputBasePtr, indexRem));
     617    }
     618
     619    void LZ4IndexBuilderKernel::storePendingMatchOffsetMarker(const std::unique_ptr<KernelBuilder> &b) {
     620        this->storeMatchOffsetMarker(
     621                b,
     622                b->getScalarField("pendingMarchOffsetMarkerIndex"),
     623                b->getScalarField("pendingMatchOffsetMarkerBits")
     624        );
     625    }
    657626}
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.h

    r6022 r6042  
    4949                     llvm::Value *blockEnd);
    5050
     51        // MatchOffset Marker Output
     52        void appendMatchOffsetMarkerOutput(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *position);
     53        void storeMatchOffsetMarker(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* blockIndex, llvm::Value* value);
     54        void storePendingMatchOffsetMarker(const std::unique_ptr<KernelBuilder> &iBuilder);
    5155
    52         void clearCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
    53                                                                  const std::string &bitstreamName,
    54                                                                  llvm::Value *start, llvm::Value *end);
     56        // Deletion Marker Output
     57        void appendDeletionMarkerOutput(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *start, llvm::Value *end);
     58        void storeDeletionMarker(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* blockIndex, llvm::Value* value);
     59        void storePendingDeletionMarker(const std::unique_ptr<KernelBuilder> &iBuilder);
    5560
    56         void setCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder,
    57                                                                const std::string &bitstreamName,
    58                                                                llvm::Value *start, llvm::Value *end);
    59 
    60         void markCircularOutputBitstream(const std::unique_ptr<KernelBuilder> &iBuilder, const std::string &bitstreamName, llvm::Value *pos);
    61 
    62 
     61        // M0 Output
    6362        void appendM0Output(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value *start, llvm::Value *end);
    6463        void storeM0(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* blockIndex, llvm::Value* value);
    6564        void storePendingM0(const std::unique_ptr<KernelBuilder> &iBuilder);
    66 
    6765    };
    6866}
Note: See TracChangeset for help on using the changeset viewer.