Ignore:
Timestamp:
Mar 22, 2018, 2:49:54 AM (19 months ago)
Author:
xwa163
Message:

Fix lz4 related GEP instructions and TODO

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_numbers_to_bitstream_kernel.cpp

    r5895 r5926  
    2020namespace kernel {
    2121
     22    Value* LZ4NumbersToBitstreamKernel::loadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string bufferName, Value* offset) {
     23        // GEP here is safe
     24        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     25        Value* inputLocalBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
     26        Value* inputLocalBlockOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
     27
     28        Value* blockBasePtr = iBuilder->getInputStreamBlockPtr(bufferName, iBuilder->getSize(0), inputLocalBlockIndex);
     29        blockBasePtr = iBuilder->CreatePointerCast(blockBasePtr, iBuilder->getInt64Ty()->getPointerTo());
     30        // GEP here is safe
     31        return iBuilder->CreateLoad(iBuilder->CreateGEP(blockBasePtr, inputLocalBlockOffset));
     32    }
     33
    2234    void LZ4NumbersToBitstreamKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder,
    2335                                                               llvm::Value *const numOfStrides) {
    24 
    25 //        iBuilder->CallPrintInt("======Entry", iBuilder->getSize(0));
    26 //        iBuilder->CallPrintInt("mIsFinal", mIsFinal);
    27 //        iBuilder->CallPrintInt("numOfStrides", numOfStrides);
    28 
    2936        // Const
    3037        Constant *SIZE_ZERO = iBuilder->getSize(0);
     
    3239        Constant *INT64_ZERO = iBuilder->getInt64(0);
    3340        Constant *INT64_ONE = iBuilder->getInt64(1);
    34         Constant *BIT_BLOCK_ZERO = llvm::ConstantVector::get(
    35                 {INT64_ZERO, INT64_ZERO, INT64_ZERO, INT64_ZERO}); // TODO Assumed bit block type is always <4 * i64>
     41
    3642        unsigned int BIT_BLOCK_WIDTH = iBuilder->getBitBlockWidth();
     43        Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(BIT_BLOCK_WIDTH);
    3744        Constant *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(BIT_BLOCK_WIDTH);
     45        Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0);
     46        Value* BIT_BLOCK_ZERO = iBuilder->CreateBitCast(INT_BIT_BLOCK_ZERO, iBuilder->getBitBlockType());
    3847
    3948
     
    6271                                                                  SIZE_BIT_BLOCK_WIDTH); // always produce full block except for final block
    6372
    64 
    65 //        Value *initCurrentItemIndex = iBuilder->CreateSelect(
    66 //                isFinalBlock,
    67 //                SIZE_ZERO,
    68 //                iBuilder->CreateURem(itemProcessed, SIZE_BIT_BLOCK_WIDTH)
    69 //        );
    70 
    7173        Value *initCurrentItemIndex = iBuilder->CreateURem(itemProcessed, SIZE_BIT_BLOCK_WIDTH);
    7274
    7375        Value *initOutputIndex = SIZE_ZERO;
    7476
    75 //        Value *availableOutputBlocks = iBuilder->CreateSelect(mIsFinal, iBuilder->getSize(32), numOfStrides); //TODO workaround here
    76 //        Value *availableOutputBlocks = numOfStrides;
    77 //        Value *availableOutputBlocks = remainSpace;
     77
    7878        Value *availableOutputBlocks = iBuilder->CreateUMin(remainSpace, numOfStrides);
    7979
    80         // TODO handle input pointer
    81         Value *inputStartBasePtr = iBuilder->getInputStreamBlockPtr(START_NUM_STREAM_NAME, SIZE_ZERO);
    82         inputStartBasePtr = iBuilder->CreatePointerCast(inputStartBasePtr, iBuilder->getInt64Ty()->getPointerTo());
    83         Value *inputEndBasePtr = iBuilder->getInputStreamBlockPtr(END_NUM_STREAM_NAME, SIZE_ZERO);
    84         inputEndBasePtr = iBuilder->CreatePointerCast(inputEndBasePtr, iBuilder->getInt64Ty()->getPointerTo());
     80//        Value *inputStartBasePtr = iBuilder->getInputStreamBlockPtr(START_NUM_STREAM_NAME, SIZE_ZERO);
     81//        inputStartBasePtr = iBuilder->CreatePointerCast(inputStartBasePtr, iBuilder->getInt64Ty()->getPointerTo());
     82//        Value *inputEndBasePtr = iBuilder->getInputStreamBlockPtr(END_NUM_STREAM_NAME, SIZE_ZERO);
     83//        inputEndBasePtr = iBuilder->CreatePointerCast(inputEndBasePtr, iBuilder->getInt64Ty()->getPointerTo());
    8584        Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
    8685        Value *initCarryBit = iBuilder->getScalarField("carryBit");
     
    116115        phiCarryBit->addIncoming(initCarryBit, entryBlock);
    117116
    118 
    119         // TODO It is possible that in final block, not all items have been processed, while the output buffer is not enough. This situation need to be verified later
    120         // phiCurrentItemIndex < itemsToDo && currentOutputIndex < availableOutputBlocks
    121 //        iBuilder->CallPrintInt("phiCurrentItemIndex", phiCurrentItemIndex);
    122 //        iBuilder->CallPrintInt("aaa", iBuilder->CreateAdd(itemsToDo, initCurrentItemIndex));
    123117        iBuilder->CreateCondBr(
    124118                iBuilder->CreateAnd(
    125119                        iBuilder->CreateICmpULT(phiCurrentItemIndex, iBuilder->CreateAdd(itemsToDo,
    126                                                                                          initCurrentItemIndex)), //TODO should not be itemsToDo here, may be itemsToDo + initCurrentItemIndex
     120                                                                                         initCurrentItemIndex)),
    127121                        iBuilder->CreateICmpULT(phiCurrentOutputIndex, availableOutputBlocks)
    128122                ),
     
    135129
    136130        Value *currentOutputGlobalIndex = iBuilder->CreateAdd(phiCurrentOutputIndex, oldProducedOutputBlockIndex);
    137 
    138131        // StartBits
    139         Value *currentStartPos = iBuilder->CreateLoad(iBuilder->CreateGEP(inputStartBasePtr, phiCurrentItemIndex));
     132        Value *currentStartPos = this->loadInt64NumberInput(iBuilder, START_NUM_STREAM_NAME, phiCurrentItemIndex);
    140133        Value *currentStartGlobalBlockIndex = iBuilder->CreateUDiv(currentStartPos, SIZE_BIT_BLOCK_WIDTH);
    141 //        Value *currentStartLocalBlockIndex = iBuilder->CreateSub(currentStartGlobalBlockIndex,
    142 //                                                                 oldProducedOutputBlockIndex);
    143 //        iBuilder->CallPrintInt("currentStartLocalBlockIndex", currentStartLocalBlockIndex); //TODO overflow here
    144 
    145134
    146135        Value *currentStartLocalBlockOffset = iBuilder->CreateURem(currentStartPos,
     
    151140                                                            iBuilder->CreateICmpEQ(currentStartGlobalBlockIndex,
    152141                                                                                   currentOutputGlobalIndex));
    153 //        iBuilder->CallPrintRegister("phiCurrentBlockStartData", phiCurrentBlockStartData);
    154 //        iBuilder->CallPrintRegister("newBlockStartData", newBlockStartData);
    155 //        iBuilder->CallPrintInt("currentStartPos", currentStartPos);
    156 //        iBuilder->CallPrintInt("----", SIZE_ZERO);
    157 
    158142
    159143        // EndBits
    160         Value *currentEndPos = iBuilder->CreateLoad(iBuilder->CreateGEP(inputEndBasePtr, phiCurrentItemIndex));
     144        Value *currentEndPos = this->loadInt64NumberInput(iBuilder, END_NUM_STREAM_NAME, phiCurrentItemIndex);
    161145        Value *currentEndGlobalBlockIndex = iBuilder->CreateUDiv(currentEndPos, SIZE_BIT_BLOCK_WIDTH);
    162 //        Value *currentEndLocalBlockIndex = iBuilder->CreateSub(currentEndGlobalBlockIndex, oldProducedOutputBlockIndex);
    163146
    164147        Value *currentEndLocalBlockOffset = iBuilder->CreateURem(currentEndPos,
     
    169152                                                          iBuilder->CreateICmpEQ(currentEndGlobalBlockIndex,
    170153                                                                                 currentOutputGlobalIndex));
    171 //            iBuilder->CallPrintInt("%%%currentEndPos", currentEndPos);
    172 //            iBuilder->CallPrintRegister("%%%newBlockEndData", newBlockEndData);
    173 //        iBuilder->CallPrintInt("currentEndPos", currentEndPos);
    174154
    175155        Value *enterNewOutputBlock = iBuilder->CreateOr(
     
    185165        // Avoid branch mis-prediction by always storing output block
    186166        Value *outputData = iBuilder->simd_sub(BIT_BLOCK_WIDTH, newBlockEndData, newBlockStartWithCarry);
    187 //        iBuilder->CallPrintInt("----store", iBuilder->getSize(0));
    188 //        iBuilder->CallPrintInt("carry", phiCarryBit);
    189 //        iBuilder->CallPrintRegister("newBlockEndData", newBlockEndData);
    190 //        iBuilder->CallPrintRegister("newBlockStartWithCarry", newBlockStartWithCarry);
    191 //        iBuilder->CallPrintInt("----outputPtr", iBuilder->CreateGEP(outputBasePtr, phiCurrentOutputIndex));
    192 //        iBuilder->CallPrintRegister("outputData", outputData);
    193         iBuilder->CreateBlockAlignedStore(outputData, iBuilder->CreateGEP(outputBasePtr, phiCurrentOutputIndex));
     167
     168        iBuilder->CreateBlockAlignedStore(outputData, iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO, phiCurrentOutputIndex));
    194169
    195170        // Handle PHINodes
     
    263238                iBuilder->simd_add(BIT_BLOCK_WIDTH, phiCurrentBlockStartData, carryBitIntVec)
    264239        );
    265 //        iBuilder->CallPrintRegister("%%%phiCurrentBlockEndData", phiCurrentBlockEndData);
    266 //            iBuilder->CallPrintInt("----outputPtrFinal", iBuilder->CreateGEP(outputBasePtr, phiCurrentOutputIndex));
    267240
    268241        BasicBlock *storeFinalBlock = iBuilder->CreateBasicBlock("storeFinalBlock");
     
    273246
    274247//        iBuilder->CallPrintRegister("finalOutputData", finalOutputData);
    275         iBuilder->CreateBlockAlignedStore(finalOutputData, iBuilder->CreateGEP(outputBasePtr,
    276                                                                    phiCurrentOutputIndex)); //Possible overflow here if this store always happen
     248        iBuilder->CreateBlockAlignedStore(finalOutputData, iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO, phiCurrentOutputIndex)); //Possible overflow here if this store always happen
    277249        iBuilder->CreateBr(storeFinalBlockEnd);
    278250        iBuilder->SetInsertPoint(storeFinalBlockEnd);
     
    287259        iBuilder->setProcessedItemCount(END_NUM_STREAM_NAME, newProcessedItemCount);
    288260
    289         Value *lastEndPos = iBuilder->CreateLoad(
    290                 iBuilder->CreateGEP(inputEndBasePtr, iBuilder->CreateSub(phiCurrentItemIndex, SIZE_ONE)));
    291 //        iBuilder->CallPrintInt("lastEndPos", lastEndPos);
     261        Value *lastEndPos = this->loadInt64NumberInput(iBuilder, END_NUM_STREAM_NAME, iBuilder->CreateSub(phiCurrentItemIndex, SIZE_ONE));
    292262
    293263        iBuilder->setProducedItemCount(OUTPUT_BIT_STREAM_NAME,
     
    312282    /*
    313283     * iBuilder: kernel builder
    314      * intVec: BitBlockType, <4 * i64>
    315      * pos: size_t, 0 - 256, position of bit 1
     284     * intVec: BitBlockType
     285     * pos: size_t, 0 - bitBlockWidth, position of bit 1
    316286     * isSet: i1, when isSet == true, bit 1 will be set, otherwise this function do nothing
    317287     * */
    318288    Value *LZ4NumbersToBitstreamKernel::setIntVectorBitOne(const std::unique_ptr<KernelBuilder> &iBuilder,
    319289                                                            llvm::Value *intVec, llvm::Value *pos, llvm::Value *isSet) {
    320         Value *SIZE_64 = iBuilder->getSize(64); //TODO assume bit block type will always be <4 * i64>
    321         Value *blockIndex = iBuilder->CreateUDiv(pos, SIZE_64);
    322         Value *blockOffset = iBuilder->CreateURem(pos, SIZE_64);
    323 
    324         Value *oldValue = iBuilder->CreateExtractElement(intVec, blockIndex);
    325         // Use select to avoid branch misprediction
    326         Value *bitOneValue = iBuilder->CreateShl(
    327                 iBuilder->CreateSelect(isSet, iBuilder->getInt64(1), iBuilder->getInt64(0)),
    328                 blockOffset
    329         );
    330         Value *newValue = iBuilder->CreateOr(oldValue, bitOneValue);
    331         return iBuilder->CreateInsertElement(intVec, newValue, blockIndex);
     290        Type* BIT_BLOCK_TYPE = iBuilder->getBitBlockType();
     291        Type* BIT_BLOCK_WIDTH_INT_TYPE = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
     292
     293        Value* sourceInt = iBuilder->CreateBitCast(intVec, BIT_BLOCK_WIDTH_INT_TYPE);
     294        Value *oneBit = iBuilder->CreateShl(
     295                iBuilder->CreateSelect(isSet, ConstantInt::get(BIT_BLOCK_WIDTH_INT_TYPE, 1),
     296                                       ConstantInt::get(BIT_BLOCK_WIDTH_INT_TYPE, 0)),
     297                iBuilder->CreateZExt(pos, BIT_BLOCK_WIDTH_INT_TYPE)
     298        );
     299        return iBuilder->CreateBitCast(iBuilder->CreateOr(sourceInt, oneBit), BIT_BLOCK_TYPE);
    332300    }
    333301
Note: See TracChangeset for help on using the changeset viewer.