Ignore:
Timestamp:
May 10, 2017, 4:26:11 PM (2 years ago)
Author:
nmedfort
Message:

Large refactoring step. Removed IR generation code from Kernel (formally KernelBuilder?) and moved it into the new KernelBuilder? class.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5436 r5440  
    1212namespace kernel {
    1313
    14 inline std::vector<Value *> parallel_prefix_deletion_masks(IDISA::IDISA_Builder * const iBuilder, const unsigned fw, Value * del_mask) {
     14inline std::vector<Value *> parallel_prefix_deletion_masks(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * del_mask) {
    1515    Value * m = iBuilder->simd_not(del_mask);
    1616    Value * mk = iBuilder->simd_slli(fw, del_mask, 1);
     
    2929}
    3030
    31 inline Value * apply_parallel_prefix_deletion(IDISA::IDISA_Builder * const iBuilder, const unsigned fw, Value * del_mask, const std::vector<Value *> & mv, Value * strm) {
     31inline Value * apply_parallel_prefix_deletion(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * del_mask, const std::vector<Value *> & mv, Value * strm) {
    3232    Value * s = iBuilder->simd_and(strm, iBuilder->simd_not(del_mask));
    3333    for (unsigned i = 0; i < mv.size(); i++) {
     
    3939}
    4040
    41 inline Value * partial_sum_popcount(IDISA::IDISA_Builder * const iBuilder, const unsigned fw, Value * mask) {
     41inline Value * partial_sum_popcount(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * mask) {
    4242    Value * field = iBuilder->simd_popcount(fw, mask);
    4343    const auto count = iBuilder->getBitBlockWidth() / fw;
     
    5252// Outputs: the deleted streams, plus a partial sum popcount
    5353
    54 void DeletionKernel::generateDoBlockMethod() {
    55     Value * delMask = loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
     54void DeletionKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     55    Value * delMask = iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
    5656    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
    5757    for (unsigned j = 0; j < mStreamCount; ++j) {
    58         Value * input = loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     58        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    5959        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
    60         storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     60        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    6161    }
    6262    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
    63     storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    64 }
    65 
    66 void DeletionKernel::generateFinalBlockMethod(Value * remainingBytes) {
     63    iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
     64}
     65
     66void DeletionKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * remainingBytes) {
    6767    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    6868    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
    6969    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
    70     Value * delMask = iBuilder->CreateOr(EOF_del, loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
     70    Value * delMask = iBuilder->CreateOr(EOF_del, iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
    7171    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
    7272    for (unsigned j = 0; j < mStreamCount; ++j) {
    73         Value * input = loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     73        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    7474        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
    75         storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     75        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    7676    }
    7777    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
    78     storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
     78    iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    7979}
    8080
     
    9292const unsigned PEXT_width = 64;
    9393
    94 inline std::vector<Value *> get_PEXT_masks(IDISA::IDISA_Builder * const iBuilder, Value * del_mask) {
     94inline std::vector<Value *> get_PEXT_masks(const std::unique_ptr<KernelBuilder> & iBuilder, Value * del_mask) {
    9595    Value * m = iBuilder->fwCast(PEXT_width, iBuilder->simd_not(del_mask));
    9696    std::vector<Value *> masks;
     
    103103// Apply PEXT deletion to a collection of blocks and swizzle the result.
    104104// strms contains the blocks to process
    105 inline std::vector<Value *> apply_PEXT_deletion_with_swizzle(IDISA::IDISA_Builder * const iBuilder, const std::vector<Value *> & masks, std::vector<Value *> strms) {
     105inline std::vector<Value *> apply_PEXT_deletion_with_swizzle(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks, std::vector<Value *> strms) {
    106106    Value * PEXT_func = nullptr;
    107107    if (PEXT_width == 64) {
     
    146146}
    147147
    148 inline Value * apply_PEXT_deletion(IDISA::IDISA_Builder * const iBuilder, const std::vector<Value *> & masks, Value * strm) {
     148inline Value * apply_PEXT_deletion(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks, Value * strm) {
    149149    Value * PEXT_func = nullptr;
    150150    if (PEXT_width == 64) {
     
    168168// Outputs: swizzles containing the swizzled deleted streams, plus a partial sum popcount
    169169
    170 void DeleteByPEXTkernel::generateDoBlockMethod() {
    171     Value * delMask = loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
     170void DeleteByPEXTkernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     171    Value * delMask = iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
    172172    const auto masks = get_PEXT_masks(iBuilder, delMask);
    173     generateProcessingLoop(masks, delMask);
    174 }
    175 
    176 void DeleteByPEXTkernel::generateFinalBlockMethod(Value * remainingBytes) {
     173    generateProcessingLoop(iBuilder, masks, delMask);
     174}
     175
     176void DeleteByPEXTkernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> &iBuilder, Value * remainingBytes) {
    177177    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    178178    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
    179179    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
    180     Value * delMask = iBuilder->CreateOr(EOF_del, loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
     180    Value * delMask = iBuilder->CreateOr(EOF_del, iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
    181181    const auto masks = get_PEXT_masks(iBuilder, delMask);
    182     generateProcessingLoop(masks, delMask);
    183 }
    184 
    185 void DeleteByPEXTkernel::generateProcessingLoop(const std::vector<Value *> & masks, Value * delMask) {
    186     if (mShouldSwizzle)    
    187         generatePEXTAndSwizzleLoop(masks);
    188     else
    189         generatePEXTLoop(masks);   
    190    
     182    generateProcessingLoop(iBuilder, masks, delMask);
     183}
     184
     185void DeleteByPEXTkernel::generateProcessingLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks, Value * delMask) {
     186    if (mShouldSwizzle) {
     187        generatePEXTAndSwizzleLoop(iBuilder, masks);
     188    } else {
     189        generatePEXTLoop(iBuilder, masks);
     190    }
    191191    //Value * delCount = partial_sum_popcount(iBuilder, mDelCountFieldWidth, apply_PEXT_deletion(iBuilder, masks, iBuilder->simd_not(delMask)));
    192192    Value * delCount = iBuilder->simd_popcount(mDelCountFieldWidth, iBuilder->simd_not(delMask));
    193     storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    194 }
    195 
    196 void DeleteByPEXTkernel::generatePEXTLoop(const std::vector<Value *> & masks) {
     193    iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
     194}
     195
     196void DeleteByPEXTkernel::generatePEXTLoop(const std::unique_ptr<KernelBuilder> &iBuilder, const std::vector<Value *> & masks) {
    197197    for (unsigned j = 0; j < mStreamCount; ++j) {
    198         Value * input = loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     198        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    199199        Value * output = apply_PEXT_deletion(iBuilder, masks, input);
    200         storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    201     }
    202 }
    203 
    204 void DeleteByPEXTkernel::generatePEXTAndSwizzleLoop(const std::vector<Value *> & masks) {
     200        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     201    }
     202}
     203
     204void DeleteByPEXTkernel::generatePEXTAndSwizzleLoop(const std::unique_ptr<KernelBuilder> & iBuilder, const std::vector<Value *> & masks) {
    205205    // Group blocks together into input vector. Input should contain mStreamCount/mSwizzleFactor blocks (e.g. for U8U16 16/4=4)
    206206    // mStreamCount/mSwizzleFactor -> (mStreamCount + mSwizzleFactor - 1) / mSwizzleFactor
     
    210210        for (unsigned i = streamSelectionIndex; i < (streamSelectionIndex + mSwizzleFactor); ++i) {
    211211                // Check if i > mStreamCount. If it is, add null streams until we get mStreamCount/mSwizzleFactor streams in the input vector
    212             if ( i >= mStreamCount)
     212            if ( i >= mStreamCount) {
    213213                                input.push_back(iBuilder->allZeroes());
    214                         else
    215                         input.push_back(loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(i)));
     214            } else {
     215                input.push_back(iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(i)));
     216            }
    216217        }
    217218        std::vector<Value *> output = apply_PEXT_deletion_with_swizzle(iBuilder, masks, input);
    218219        for (unsigned i = 0; i < mSwizzleFactor; i++) {
    219              storeOutputStreamBlock(std::string(mOutputSwizzleNameBase) + std::to_string(j), iBuilder->getInt32(i), output[i]);
     220             iBuilder->storeOutputStreamBlock(std::string(mOutputSwizzleNameBase) + std::to_string(j), iBuilder->getInt32(i), output[i]);
    220221        }
    221222    }
     
    280281}
    281282   
    282 void SwizzledBitstreamCompressByCount::generateDoBlockMethod() {
     283void SwizzledBitstreamCompressByCount::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    283284       
    284     Value * countStreamPtr = iBuilder->CreateBitCast(getInputStreamBlockPtr("countsPerStride", iBuilder->getInt32(0)), iBuilder->getIntNTy(mFieldWidth)->getPointerTo());
     285    Value * countsPerStridePtr = iBuilder->getInputStreamBlockPtr("countsPerStride", iBuilder->getInt32(0));
     286    Value * countStreamPtr = iBuilder->CreatePointerCast(countsPerStridePtr, iBuilder->getIntNTy(mFieldWidth)->getPointerTo());
    285287   
    286288    // Output is written and committed to the output buffer one swizzle at a time.
     
    289291    Constant * outputIndexShift = iBuilder->getSize(std::log2(mFieldWidth));
    290292   
    291     Value * outputProduced = getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
     293    Value * outputProduced = iBuilder->getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
    292294    Value * producedOffset = iBuilder->CreateAnd(outputProduced, blockOffsetMask);
    293295    Value * outputIndex = iBuilder->CreateLShr(producedOffset, outputIndexShift);
    294296
    295297    // There may be pending data in the kernel state, for up to mFieldWidth-1 bits per stream.
    296     Value * pendingOffset = getScalarField("pendingOffset");
     298    Value * pendingOffset = iBuilder->getScalarField("pendingOffset");
    297299    // There is a separate vector of pending data for each swizzle group.
    298300    std::vector<Value *> pendingData;
    299301    std::vector<Value *> outputStreamPtr;
    300302    for (unsigned i = 0; i < mSwizzleSetCount; i++) {
    301         pendingData.push_back(getScalarField("pendingSwizzleData" + std::to_string(i)));
    302         outputStreamPtr.push_back(getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0)));
     303        pendingData.push_back(iBuilder->getScalarField("pendingSwizzleData" + std::to_string(i)));
     304        outputStreamPtr.push_back(iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0)));
    303305    }
    304306   
     
    313315        // according to the same newItemCount, pendingSpace, ...
    314316        for (unsigned j = 0; j < mSwizzleSetCount; j++) {
    315             Value * newItems = loadInputStreamBlock("inputSwizzle" + std::to_string(j), iBuilder->getInt32(i));
     317            Value * newItems = iBuilder->loadInputStreamBlock("inputSwizzle" + std::to_string(j), iBuilder->getInt32(i));
    316318            // Combine as many of the new items as possible into the pending group.
    317319            Value * combinedGroup = iBuilder->CreateOr(pendingData[j], iBuilder->CreateShl(newItems, iBuilder->simd_fill(mFieldWidth, pendingOffset)));
     
    327329        pendingOffset = iBuilder->CreateAnd(iBuilder->CreateAdd(newItemCount, pendingOffset), iBuilder->getSize(mFieldWidth-1));
    328330    }
    329     setScalarField("pendingOffset", pendingOffset);
     331    iBuilder->setScalarField("pendingOffset", pendingOffset);
    330332   
    331333    Value * newlyProduced = iBuilder->CreateSub(iBuilder->CreateShl(outputIndex, outputIndexShift), producedOffset);
    332334    Value * produced = iBuilder->CreateAdd(outputProduced, newlyProduced);
    333335    for (unsigned j = 0; j < mSwizzleSetCount; j++) {
    334         setScalarField("pendingSwizzleData" + std::to_string(j), pendingData[j]);
    335     }
    336     setProducedItemCount("outputSwizzle0", produced);
    337 }
    338 
    339 void SwizzledBitstreamCompressByCount::generateFinalBlockMethod(Value * remainingBytes) {
    340     CreateDoBlockMethodCall();
     336        iBuilder->setScalarField("pendingSwizzleData" + std::to_string(j), pendingData[j]);
     337    }
     338    iBuilder->setProducedItemCount("outputSwizzle0", produced);
     339}
     340
     341void SwizzledBitstreamCompressByCount::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * /* remainingBytes */) {
     342    CreateDoBlockMethodCall(iBuilder);
    341343    Constant * blockOffsetMask = iBuilder->getSize(iBuilder->getBitBlockWidth() - 1);
    342344    Constant * outputIndexShift = iBuilder->getSize(std::log2(mFieldWidth));
    343345   
    344     Value * outputProduced = getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
     346    Value * outputProduced = iBuilder->getProducedItemCount("outputSwizzle0"); // All output groups have the same count.
    345347    Value * producedOffset = iBuilder->CreateAnd(outputProduced, blockOffsetMask);
    346348    Value * outputIndex = iBuilder->CreateLShr(producedOffset, outputIndexShift);
    347     Value * pendingOffset = getScalarField("pendingOffset");
     349    Value * pendingOffset = iBuilder->getScalarField("pendingOffset");
    348350
    349351    // Write the pending data.
    350352    for (unsigned i = 0; i < mSwizzleSetCount; i++) {
    351         Value * pendingData = getScalarField("pendingSwizzleData" + std::to_string(i));
    352         Value * outputStreamPtr = getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0));
     353        Value * pendingData = iBuilder->getScalarField("pendingSwizzleData" + std::to_string(i));
     354        Value * outputStreamPtr = iBuilder->getOutputStreamBlockPtr("outputSwizzle" + std::to_string(i), iBuilder->getInt32(0));
    353355        iBuilder->CreateBlockAlignedStore(pendingData, iBuilder->CreateGEP(outputStreamPtr, outputIndex));
    354356    }
    355     setProducedItemCount("outputSwizzle0", iBuilder->CreateAdd(pendingOffset, outputProduced));
    356 }
    357 }
     357    iBuilder->setProducedItemCount("outputSwizzle0", iBuilder->CreateAdd(pendingOffset, outputProduced));
     358}
     359}
Note: See TracChangeset for help on using the changeset viewer.