Ignore:
Timestamp:
Mar 22, 2018, 2:49:54 AM (18 months ago)
Author:
xwa163
Message:

Fix lz4 related GEP instructions and TODO

Location:
icGREP/icgrep-devel/icgrep
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5853 r5926  
    111111    for (unsigned i = 0; i < mSwizzleSetCount; i++) {
    112112        Value * pendingData = iBuilder->getScalarField("pendingSwizzleData" + std::to_string(i));
    113         Value * outputStreamPtr = iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0));
    114         iBuilder->CreateBlockAlignedStore(pendingData, iBuilder->CreateGEP(outputStreamPtr, outputIndex));
     113        Value * outputStreamPtr = iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0), outputIndex);
     114        iBuilder->CreateBlockAlignedStore(pendingData, outputStreamPtr);
    115115    }
    116116    iBuilder->setProducedItemCount("outputSwizzle0", iBuilder->CreateAdd(pendingOffset, outputProduced));
     
    185185    // There is a separate vector of pending data for each swizzle group.
    186186    std::vector<Value *> pendingData;
    187     std::vector<Value *> outputStreamPtr;
    188187
    189188    for (unsigned i = 0; i < mSwizzleSetCount; i++) {
    190189        pendingData.push_back(iBuilder->getScalarField("pendingSwizzleData" + std::to_string(i)));
    191         outputStreamPtr.push_back(iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0)));
    192190    }
    193191
     
    210208                pendingOffset)));
    211209            //iBuilder->CallPrintRegister("ComBineDGROUP", combinedGroup);
    212             // To avoid an unpredictable branch, always store the combined group, whether full or not.             
    213             iBuilder->CreateBlockAlignedStore(combinedGroup, iBuilder->CreateGEP(outputStreamPtr[j], outputIndex));
     210            // To avoid an unpredictable branch, always store the combined group, whether full or not.
     211            iBuilder->CreateBlockAlignedStore(combinedGroup, iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(j), outputIndex));
    214212           
    215213            // Any items in excess of the space available in the current pending group overflow for the next group.
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_index_builder.cpp

    r5923 r5926  
    413413
    414414    Value * LZ4IndexBuilderKernel::generateLoadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string inputBufferName, Value *globalOffset) {
     415        // Stride Size here is Constant 1 instead of BitBlockWidth
    415416        Constant* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
    416417        Constant* SIZE_ZERO = iBuilder->getSize(0);
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_multiple_pdep_kernel.cpp

    r5885 r5926  
    4848        Value * sourceItemsAvail = mAvailableItemCount[1]; //TODO need to be calculated from numOfStrides
    4949
    50         Value * PDEPStrmPtr = kb->getInputStreamBlockPtr("PDEPmarkerStream", kb->getInt32(0)); // mStreamBufferPtr[0];
    51 
    52 
    53         std::vector<Value*> inputSwizzlesPtrs = std::vector<Value*>(mStreamSize, NULL);
    54         std::vector<Value*> outputStreamPtrs = std::vector<Value*>(mStreamSize, NULL);
    55         for (int i = 0; i < mStreamSize; i++) {
    56             inputSwizzlesPtrs[i] = kb->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(i), kb->getInt32(0));
    57 //            kb->CallPrintInt("@@inputSwizzlesPtrs_" + std::to_string(i), inputSwizzlesPtrs[i]);
    58             // Get pointer to start of the output StreamSetBlock we're currently writing to
    59             outputStreamPtrs[i] = kb->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(i), kb->getInt32(0));
    60         }
    61 
    6250        Constant * blockWidth = kb->getSize(kb->getBitBlockWidth());
    6351        Value * blocksToDo = kb->CreateSelect(mIsFinal, kb->CreateUDivCeil(itemsToDo, blockWidth), kb->CreateUDiv(itemsToDo, blockWidth));
     
    9381        Value * updatedProcessedSourceBits = updatedProcessedSourceBitsPhi;
    9482        Value * updatedSourceItems = sourceItemsRemaining;
    95         Value * PDEP_ms_blk = kb->CreateBlockAlignedLoad(kb->CreateGEP(PDEPStrmPtr, blockOffsetPhi));
     83        Value * PDEP_ms_blk = kb->CreateBlockAlignedLoad(kb->getInputStreamBlockPtr("PDEPmarkerStream", kb->getInt32(0), blockOffsetPhi));
    9684
    9785        const auto PDEP_masks = get_PDEP_masks(kb, PDEP_ms_blk, mPDEPWidth);
     
    124112
    125113                // Load current and next BitBlocks/swizzles
    126                 Value * current_swizzle_ptr = kb->CreateGEP(inputSwizzlesPtrs[iStreamIndex], kb->CreateAdd(kb->CreateMul(current_blk_idx, kb->getSize(mSwizzleFactor)), current_swizzle_idx));
     114                Value* current_swizzle_ptr = kb->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(iStreamIndex), current_swizzle_idx, current_blk_idx);
     115
    127116                Value * current_swizzle = kb->CreateBlockAlignedLoad(current_swizzle_ptr);//Constant::getNullValue(cast<PointerType>(current_swizzle_ptr->getType())->getElementType());
    128117
    129                 Value * next_swizzle_ptr = kb->CreateGEP(inputSwizzlesPtrs[iStreamIndex], kb->CreateAdd(kb->CreateMul(next_blk_idx, kb->getSize(mSwizzleFactor)), next_swizzle_idx));
     118
     119                Value* next_swizzle_ptr = kb->getInputStreamBlockPtr("sourceStreamSet" + std::to_string(iStreamIndex), next_swizzle_idx, next_blk_idx);
    130120                Value * next_swizzle = kb->CreateBlockAlignedLoad(next_swizzle_ptr);//Constant::getNullValue(cast<PointerType>(current_swizzle_ptr->getType())->getElementType());
    131121
     
    161151
    162152                // Store the result
    163                 auto outputPos = kb->CreateGEP(outputStreamPtrs[iStreamIndex], kb->CreateAdd(kb->CreateMul(blockOffsetPhi, kb->getSize(mSwizzleFactor)), kb->getSize(i)));
    164 //                if (iStreamIndex == 0) {
    165 //                    kb->CallPrintInt("dataPtr_" + std::to_string(iStreamIndex) + "_" + std::to_string(i), outputPos);
    166 //                    kb->CallPrintRegister("data_" + std::to_string(iStreamIndex) + "_" + std::to_string(i), result_swizzle);
    167 //                }
     153                Value* outputPos = kb->getOutputStreamBlockPtr("outputStreamSet" + std::to_string(iStreamIndex), kb->getSize(i), blockOffsetPhi);
    168154
    169155                kb->CreateBlockAlignedStore(result_swizzle, outputPos);
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_numbers_to_bitstream_kernel.cpp

    r5895 r5926  
    2020namespace kernel {
    2121
     22    Value* LZ4NumbersToBitstreamKernel::loadInt64NumberInput(const unique_ptr<KernelBuilder> &iBuilder, string bufferName, Value* offset) {
     23        // GEP here is safe
     24        Value* SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(iBuilder->getBitBlockWidth());
     25        Value* inputLocalBlockIndex = iBuilder->CreateUDiv(offset, SIZE_BIT_BLOCK_WIDTH);
     26        Value* inputLocalBlockOffset = iBuilder->CreateURem(offset, SIZE_BIT_BLOCK_WIDTH);
     27
     28        Value* blockBasePtr = iBuilder->getInputStreamBlockPtr(bufferName, iBuilder->getSize(0), inputLocalBlockIndex);
     29        blockBasePtr = iBuilder->CreatePointerCast(blockBasePtr, iBuilder->getInt64Ty()->getPointerTo());
     30        // GEP here is safe
     31        return iBuilder->CreateLoad(iBuilder->CreateGEP(blockBasePtr, inputLocalBlockOffset));
     32    }
     33
    2234    void LZ4NumbersToBitstreamKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &iBuilder,
    2335                                                               llvm::Value *const numOfStrides) {
    24 
    25 //        iBuilder->CallPrintInt("======Entry", iBuilder->getSize(0));
    26 //        iBuilder->CallPrintInt("mIsFinal", mIsFinal);
    27 //        iBuilder->CallPrintInt("numOfStrides", numOfStrides);
    28 
    2936        // Const
    3037        Constant *SIZE_ZERO = iBuilder->getSize(0);
     
    3239        Constant *INT64_ZERO = iBuilder->getInt64(0);
    3340        Constant *INT64_ONE = iBuilder->getInt64(1);
    34         Constant *BIT_BLOCK_ZERO = llvm::ConstantVector::get(
    35                 {INT64_ZERO, INT64_ZERO, INT64_ZERO, INT64_ZERO}); // TODO Assumed bit block type is always <4 * i64>
     41
    3642        unsigned int BIT_BLOCK_WIDTH = iBuilder->getBitBlockWidth();
     43        Type * const INT_BIT_BLOCK_TY = iBuilder->getIntNTy(BIT_BLOCK_WIDTH);
    3744        Constant *SIZE_BIT_BLOCK_WIDTH = iBuilder->getSize(BIT_BLOCK_WIDTH);
     45        Constant* INT_BIT_BLOCK_ZERO = ConstantInt::get(INT_BIT_BLOCK_TY, 0);
     46        Value* BIT_BLOCK_ZERO = iBuilder->CreateBitCast(INT_BIT_BLOCK_ZERO, iBuilder->getBitBlockType());
    3847
    3948
     
    6271                                                                  SIZE_BIT_BLOCK_WIDTH); // always produce full block except for final block
    6372
    64 
    65 //        Value *initCurrentItemIndex = iBuilder->CreateSelect(
    66 //                isFinalBlock,
    67 //                SIZE_ZERO,
    68 //                iBuilder->CreateURem(itemProcessed, SIZE_BIT_BLOCK_WIDTH)
    69 //        );
    70 
    7173        Value *initCurrentItemIndex = iBuilder->CreateURem(itemProcessed, SIZE_BIT_BLOCK_WIDTH);
    7274
    7375        Value *initOutputIndex = SIZE_ZERO;
    7476
    75 //        Value *availableOutputBlocks = iBuilder->CreateSelect(mIsFinal, iBuilder->getSize(32), numOfStrides); //TODO workaround here
    76 //        Value *availableOutputBlocks = numOfStrides;
    77 //        Value *availableOutputBlocks = remainSpace;
     77
    7878        Value *availableOutputBlocks = iBuilder->CreateUMin(remainSpace, numOfStrides);
    7979
    80         // TODO handle input pointer
    81         Value *inputStartBasePtr = iBuilder->getInputStreamBlockPtr(START_NUM_STREAM_NAME, SIZE_ZERO);
    82         inputStartBasePtr = iBuilder->CreatePointerCast(inputStartBasePtr, iBuilder->getInt64Ty()->getPointerTo());
    83         Value *inputEndBasePtr = iBuilder->getInputStreamBlockPtr(END_NUM_STREAM_NAME, SIZE_ZERO);
    84         inputEndBasePtr = iBuilder->CreatePointerCast(inputEndBasePtr, iBuilder->getInt64Ty()->getPointerTo());
     80//        Value *inputStartBasePtr = iBuilder->getInputStreamBlockPtr(START_NUM_STREAM_NAME, SIZE_ZERO);
     81//        inputStartBasePtr = iBuilder->CreatePointerCast(inputStartBasePtr, iBuilder->getInt64Ty()->getPointerTo());
     82//        Value *inputEndBasePtr = iBuilder->getInputStreamBlockPtr(END_NUM_STREAM_NAME, SIZE_ZERO);
     83//        inputEndBasePtr = iBuilder->CreatePointerCast(inputEndBasePtr, iBuilder->getInt64Ty()->getPointerTo());
    8584        Value *outputBasePtr = iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO);
    8685        Value *initCarryBit = iBuilder->getScalarField("carryBit");
     
    116115        phiCarryBit->addIncoming(initCarryBit, entryBlock);
    117116
    118 
    119         // TODO It is possible that in final block, not all items have been processed, while the output buffer is not enough. This situation need to be verified later
    120         // phiCurrentItemIndex < itemsToDo && currentOutputIndex < availableOutputBlocks
    121 //        iBuilder->CallPrintInt("phiCurrentItemIndex", phiCurrentItemIndex);
    122 //        iBuilder->CallPrintInt("aaa", iBuilder->CreateAdd(itemsToDo, initCurrentItemIndex));
    123117        iBuilder->CreateCondBr(
    124118                iBuilder->CreateAnd(
    125119                        iBuilder->CreateICmpULT(phiCurrentItemIndex, iBuilder->CreateAdd(itemsToDo,
    126                                                                                          initCurrentItemIndex)), //TODO should not be itemsToDo here, may be itemsToDo + initCurrentItemIndex
     120                                                                                         initCurrentItemIndex)),
    127121                        iBuilder->CreateICmpULT(phiCurrentOutputIndex, availableOutputBlocks)
    128122                ),
     
    135129
    136130        Value *currentOutputGlobalIndex = iBuilder->CreateAdd(phiCurrentOutputIndex, oldProducedOutputBlockIndex);
    137 
    138131        // StartBits
    139         Value *currentStartPos = iBuilder->CreateLoad(iBuilder->CreateGEP(inputStartBasePtr, phiCurrentItemIndex));
     132        Value *currentStartPos = this->loadInt64NumberInput(iBuilder, START_NUM_STREAM_NAME, phiCurrentItemIndex);
    140133        Value *currentStartGlobalBlockIndex = iBuilder->CreateUDiv(currentStartPos, SIZE_BIT_BLOCK_WIDTH);
    141 //        Value *currentStartLocalBlockIndex = iBuilder->CreateSub(currentStartGlobalBlockIndex,
    142 //                                                                 oldProducedOutputBlockIndex);
    143 //        iBuilder->CallPrintInt("currentStartLocalBlockIndex", currentStartLocalBlockIndex); //TODO overflow here
    144 
    145134
    146135        Value *currentStartLocalBlockOffset = iBuilder->CreateURem(currentStartPos,
     
    151140                                                            iBuilder->CreateICmpEQ(currentStartGlobalBlockIndex,
    152141                                                                                   currentOutputGlobalIndex));
    153 //        iBuilder->CallPrintRegister("phiCurrentBlockStartData", phiCurrentBlockStartData);
    154 //        iBuilder->CallPrintRegister("newBlockStartData", newBlockStartData);
    155 //        iBuilder->CallPrintInt("currentStartPos", currentStartPos);
    156 //        iBuilder->CallPrintInt("----", SIZE_ZERO);
    157 
    158142
    159143        // EndBits
    160         Value *currentEndPos = iBuilder->CreateLoad(iBuilder->CreateGEP(inputEndBasePtr, phiCurrentItemIndex));
     144        Value *currentEndPos = this->loadInt64NumberInput(iBuilder, END_NUM_STREAM_NAME, phiCurrentItemIndex);
    161145        Value *currentEndGlobalBlockIndex = iBuilder->CreateUDiv(currentEndPos, SIZE_BIT_BLOCK_WIDTH);
    162 //        Value *currentEndLocalBlockIndex = iBuilder->CreateSub(currentEndGlobalBlockIndex, oldProducedOutputBlockIndex);
    163146
    164147        Value *currentEndLocalBlockOffset = iBuilder->CreateURem(currentEndPos,
     
    169152                                                          iBuilder->CreateICmpEQ(currentEndGlobalBlockIndex,
    170153                                                                                 currentOutputGlobalIndex));
    171 //            iBuilder->CallPrintInt("%%%currentEndPos", currentEndPos);
    172 //            iBuilder->CallPrintRegister("%%%newBlockEndData", newBlockEndData);
    173 //        iBuilder->CallPrintInt("currentEndPos", currentEndPos);
    174154
    175155        Value *enterNewOutputBlock = iBuilder->CreateOr(
     
    185165        // Avoid branch mis-prediction by always storing output block
    186166        Value *outputData = iBuilder->simd_sub(BIT_BLOCK_WIDTH, newBlockEndData, newBlockStartWithCarry);
    187 //        iBuilder->CallPrintInt("----store", iBuilder->getSize(0));
    188 //        iBuilder->CallPrintInt("carry", phiCarryBit);
    189 //        iBuilder->CallPrintRegister("newBlockEndData", newBlockEndData);
    190 //        iBuilder->CallPrintRegister("newBlockStartWithCarry", newBlockStartWithCarry);
    191 //        iBuilder->CallPrintInt("----outputPtr", iBuilder->CreateGEP(outputBasePtr, phiCurrentOutputIndex));
    192 //        iBuilder->CallPrintRegister("outputData", outputData);
    193         iBuilder->CreateBlockAlignedStore(outputData, iBuilder->CreateGEP(outputBasePtr, phiCurrentOutputIndex));
     167
     168        iBuilder->CreateBlockAlignedStore(outputData, iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO, phiCurrentOutputIndex));
    194169
    195170        // Handle PHINodes
     
    263238                iBuilder->simd_add(BIT_BLOCK_WIDTH, phiCurrentBlockStartData, carryBitIntVec)
    264239        );
    265 //        iBuilder->CallPrintRegister("%%%phiCurrentBlockEndData", phiCurrentBlockEndData);
    266 //            iBuilder->CallPrintInt("----outputPtrFinal", iBuilder->CreateGEP(outputBasePtr, phiCurrentOutputIndex));
    267240
    268241        BasicBlock *storeFinalBlock = iBuilder->CreateBasicBlock("storeFinalBlock");
     
    273246
    274247//        iBuilder->CallPrintRegister("finalOutputData", finalOutputData);
    275         iBuilder->CreateBlockAlignedStore(finalOutputData, iBuilder->CreateGEP(outputBasePtr,
    276                                                                    phiCurrentOutputIndex)); //Possible overflow here if this store always happen
     248        iBuilder->CreateBlockAlignedStore(finalOutputData, iBuilder->getOutputStreamBlockPtr(OUTPUT_BIT_STREAM_NAME, SIZE_ZERO, phiCurrentOutputIndex)); //Possible overflow here if this store always happen
    277249        iBuilder->CreateBr(storeFinalBlockEnd);
    278250        iBuilder->SetInsertPoint(storeFinalBlockEnd);
     
    287259        iBuilder->setProcessedItemCount(END_NUM_STREAM_NAME, newProcessedItemCount);
    288260
    289         Value *lastEndPos = iBuilder->CreateLoad(
    290                 iBuilder->CreateGEP(inputEndBasePtr, iBuilder->CreateSub(phiCurrentItemIndex, SIZE_ONE)));
    291 //        iBuilder->CallPrintInt("lastEndPos", lastEndPos);
     261        Value *lastEndPos = this->loadInt64NumberInput(iBuilder, END_NUM_STREAM_NAME, iBuilder->CreateSub(phiCurrentItemIndex, SIZE_ONE));
    292262
    293263        iBuilder->setProducedItemCount(OUTPUT_BIT_STREAM_NAME,
     
    312282    /*
    313283     * iBuilder: kernel builder
    314      * intVec: BitBlockType, <4 * i64>
    315      * pos: size_t, 0 - 256, position of bit 1
     284     * intVec: BitBlockType
     285     * pos: size_t, 0 - bitBlockWidth, position of bit 1
    316286     * isSet: i1, when isSet == true, bit 1 will be set, otherwise this function do nothing
    317287     * */
    318288    Value *LZ4NumbersToBitstreamKernel::setIntVectorBitOne(const std::unique_ptr<KernelBuilder> &iBuilder,
    319289                                                            llvm::Value *intVec, llvm::Value *pos, llvm::Value *isSet) {
    320         Value *SIZE_64 = iBuilder->getSize(64); //TODO assume bit block type will always be <4 * i64>
    321         Value *blockIndex = iBuilder->CreateUDiv(pos, SIZE_64);
    322         Value *blockOffset = iBuilder->CreateURem(pos, SIZE_64);
    323 
    324         Value *oldValue = iBuilder->CreateExtractElement(intVec, blockIndex);
    325         // Use select to avoid branch misprediction
    326         Value *bitOneValue = iBuilder->CreateShl(
    327                 iBuilder->CreateSelect(isSet, iBuilder->getInt64(1), iBuilder->getInt64(0)),
    328                 blockOffset
    329         );
    330         Value *newValue = iBuilder->CreateOr(oldValue, bitOneValue);
    331         return iBuilder->CreateInsertElement(intVec, newValue, blockIndex);
     290        Type* BIT_BLOCK_TYPE = iBuilder->getBitBlockType();
     291        Type* BIT_BLOCK_WIDTH_INT_TYPE = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
     292
     293        Value* sourceInt = iBuilder->CreateBitCast(intVec, BIT_BLOCK_WIDTH_INT_TYPE);
     294        Value *oneBit = iBuilder->CreateShl(
     295                iBuilder->CreateSelect(isSet, ConstantInt::get(BIT_BLOCK_WIDTH_INT_TYPE, 1),
     296                                       ConstantInt::get(BIT_BLOCK_WIDTH_INT_TYPE, 0)),
     297                iBuilder->CreateZExt(pos, BIT_BLOCK_WIDTH_INT_TYPE)
     298        );
     299        return iBuilder->CreateBitCast(iBuilder->CreateOr(sourceInt, oneBit), BIT_BLOCK_TYPE);
    332300    }
    333301
  • icGREP/icgrep-devel/icgrep/kernels/lz4/lz4_numbers_to_bitstream_kernel.h

    r5885 r5926  
    2323        llvm::Value* setIntVectorBitOne(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* intVec, llvm::Value* pos, llvm::Value* isSet);
    2424        inline llvm::Value* intVecGT(const std::unique_ptr<KernelBuilder> &iBuilder, llvm::Value* intVec1, llvm::Value* intVec2);
     25        inline llvm::Value* loadInt64NumberInput(const std::unique_ptr<KernelBuilder> &iBuilder, std::string bufferName, llvm::Value* offset);
    2526    };
    2627}
  • icGREP/icgrep-devel/icgrep/lz4/LZ4Generator.cpp

    r5921 r5926  
    317317
    318318    Kernel * generateDepositK = pxDriver.addKernelInstance<LZ4GenerateDepositStreamKernel>(iBuilder);
    319     pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker}); // TODO deposit
     319    pxDriver.makeKernelCall(generateDepositK, {M0Marker}, {DepositMarker});
    320320
    321321}
Note: See TracChangeset for help on using the changeset viewer.