Ignore:
Timestamp:
Apr 30, 2018, 7:47:31 AM (17 months ago)
Author:
cameron
Message:

Restructuring step for DeletionKernel?: move partial sum popcount in p2sWithCompress

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5985 r6004  
    3939}
    4040
    41 inline Value * partial_sum_popcount(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * mask) {
    42     Value * field = iBuilder->simd_popcount(fw, mask);
    43     const auto count = iBuilder->getBitBlockWidth() / fw;
    44     for (unsigned move = 1; move < count; move *= 2) {
    45         field = iBuilder->simd_add(fw, field, iBuilder->mvmd_slli(fw, field, move));
    46     }
    47     return field;
    48 }
    49 
     41// Apply deletion to a set of stream_count input streams to produce a set of output streams.
     42// Kernel inputs: stream_count data streams plus one del_mask stream
     43// Outputs: the deleted streams, plus a partial sum popcount
     44
     45void DeletionKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     46    Value * delMask = iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
     47    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
     48    for (unsigned j = 0; j < mStreamCount; ++j) {
     49        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     50        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
     51        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     52    }
     53    Value * unitCount = iBuilder->simd_popcount(mDeletionFieldWidth, iBuilder->simd_not(delMask));
     54    iBuilder->storeOutputStreamBlock("unitCounts", iBuilder->getInt32(0), iBuilder->bitCast(unitCount));
     55}
     56
     57void DeletionKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * remainingBytes) {
     58    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
     59    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
     60    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
     61    Value * delMask = iBuilder->CreateOr(EOF_del, iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
     62    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
     63    for (unsigned j = 0; j < mStreamCount; ++j) {
     64        Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
     65        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
     66        iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
     67    }
     68    Value * const unitCount = iBuilder->simd_popcount(mDeletionFieldWidth, iBuilder->simd_not(delMask));
     69    iBuilder->storeOutputStreamBlock("unitCounts", iBuilder->getInt32(0), iBuilder->bitCast(unitCount));
     70}
     71
     72DeletionKernel::DeletionKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const unsigned fieldWidth, const unsigned streamCount)
     73: BlockOrientedKernel("del" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount),
     74                      {Binding{iBuilder->getStreamSetTy(streamCount), "inputStreamSet"},
     75                          Binding{iBuilder->getStreamSetTy(), "delMaskSet"}},
     76                      {Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet"},
     77                          Binding{iBuilder->getStreamSetTy(), "unitCounts", FixedRate(), RoundUpTo(iBuilder->getBitBlockWidth())}},
     78                      {}, {}, {})
     79, mDeletionFieldWidth(fieldWidth)
     80, mStreamCount(streamCount) {
     81}
     82
     83   
     84   
    5085SwizzledDeleteByPEXTkernel::SwizzledDeleteByPEXTkernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned streamCount, unsigned PEXT_width)
    5186: BlockOrientedKernel("PEXTdel" + std::to_string(PEXT_width) + "_" + std::to_string(streamCount),
     
    290325
    291326    return swizzleSets;
    292 }
    293 
    294 // Apply deletion to a set of stream_count input streams to produce a set of output streams.
    295 // Kernel inputs: stream_count data streams plus one del_mask stream
    296 // Outputs: the deleted streams, plus a partial sum popcount
    297 
    298 void DeletionKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    299     Value * delMask = iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0));
    300     const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
    301     for (unsigned j = 0; j < mStreamCount; ++j) {
    302         Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    303         Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
    304         iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    305     }
    306     Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
    307     iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    308 }
    309 
    310 void DeletionKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * remainingBytes) {
    311     IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
    312     Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
    313     Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
    314     Value * delMask = iBuilder->CreateOr(EOF_del, iBuilder->loadInputStreamBlock("delMaskSet", iBuilder->getInt32(0)));
    315     const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
    316     for (unsigned j = 0; j < mStreamCount; ++j) {
    317         Value * input = iBuilder->loadInputStreamBlock("inputStreamSet", iBuilder->getInt32(j));
    318         Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
    319         iBuilder->storeOutputStreamBlock("outputStreamSet", iBuilder->getInt32(j), output);
    320     }
    321     Value * const delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
    322     iBuilder->storeOutputStreamBlock("deletionCounts", iBuilder->getInt32(0), iBuilder->bitCast(delCount));
    323 }
    324 
    325 DeletionKernel::DeletionKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, const unsigned fieldWidth, const unsigned streamCount)
    326 : BlockOrientedKernel("del" + std::to_string(fieldWidth) + "_" + std::to_string(streamCount),
    327               {Binding{iBuilder->getStreamSetTy(streamCount), "inputStreamSet"},
    328                Binding{iBuilder->getStreamSetTy(), "delMaskSet"}},
    329               {Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet"},
    330                Binding{iBuilder->getStreamSetTy(), "deletionCounts", FixedRate(), RoundUpTo(iBuilder->getBitBlockWidth())}},
    331               {}, {}, {})
    332 , mDeletionFieldWidth(fieldWidth)
    333 , mStreamCount(streamCount) {
    334327}
    335328
Note: See TracChangeset for help on using the changeset viewer.