Ignore:
Timestamp:
Apr 30, 2018, 7:47:31 AM (16 months ago)
Author:
cameron
Message:

Restructuring step for DeletionKernel?: move partial sum popcount in p2sWithCompress

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5771 r6004  
    5151}
    5252
     53inline Value * partial_sum_popcounts(const std::unique_ptr<KernelBuilder> & iBuilder, const unsigned fw, Value * popcounts) {
     54    Value * summed_counts = popcounts;
     55    const auto count = iBuilder->getBitBlockWidth() / fw;
     56    for (unsigned move = 1; move < count; move *= 2) {
     57        summed_counts = iBuilder->simd_add(fw, summed_counts, iBuilder->mvmd_slli(fw, summed_counts, move));
     58    }
     59    return summed_counts;
     60}
     61
    5362void P2SKernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
    5463    IntegerType * i32 = b->getInt32Ty();
    5564    PointerType * bitBlockPtrTy = PointerType::get(b->getBitBlockType(), 0);
     65    unsigned const unitsPerRegister = b->getBitBlockWidth()/8;
    5666
    5767    Value * basisBits[8];
     
    6272    p2s(b, basisBits, bytePack);
    6373
    64     unsigned units_per_register = b->getBitBlockWidth()/8;
    65     Value * delCountBlock_ptr = b->getInputStreamBlockPtr("deletionCounts", b->getInt32(0));
    66     Value * unit_counts = b->fwCast(units_per_register, b->CreateBlockAlignedLoad(delCountBlock_ptr));
     74    Value * const fieldCounts = b->loadInputStreamBlock("fieldCounts", b->getInt32(0));
     75    Value * unitCounts = partial_sum_popcounts(b, unitsPerRegister, fieldCounts);
    6776
    6877    Value * output_ptr = b->getOutputStreamBlockPtr("byteStream", b->getInt32(0));
     
    7180    for (unsigned j = 0; j < 8; ++j) {
    7281        b->CreateStore(bytePack[j], b->CreateBitCast(b->CreateGEP(output_ptr, offset), bitBlockPtrTy));
    73         offset = b->CreateZExt(b->CreateExtractElement(unit_counts, b->getInt32(j)), i32);
     82        offset = b->CreateZExt(b->CreateExtractElement(unitCounts, b->getInt32(j)), i32);
    7483    }
    7584
     
    99108    }
    100109}
    101        
     110   
    102111void P2S16KernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
    103112    IntegerType * i32Ty = b->getInt32Ty();
     
    105114    PointerType * bitBlockPtrTy = b->getBitBlockType()->getPointerTo();
    106115    ConstantInt * blockMask = b->getSize(b->getBitBlockWidth() - 1);
    107 
     116    unsigned const unitsPerRegister = b->getBitBlockWidth()/16;
     117   
    108118    Value * hi_input[8];
    109119    for (unsigned j = 0; j < 8; ++j) {
     
    120130    p2s(b, lo_input, lo_bytes);
    121131
    122     Value * const delCount = b->loadInputStreamBlock("deletionCounts", b->getInt32(0));
    123     Value * const unitCounts = b->fwCast(b->getBitBlockWidth() / 16, delCount);
     132    Value * const fieldCounts = b->loadInputStreamBlock("fieldCounts", b->getInt32(0));
     133    Value * unitCounts = partial_sum_popcounts(b, unitsPerRegister, fieldCounts);
     134   
    124135    Value * outputPtr = b->getOutputStreamBlockPtr("i16Stream", b->getInt32(0));
    125136    outputPtr = b->CreatePointerCast(outputPtr, int16PtrTy);
     
    158169P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b)
    159170: BlockOrientedKernel("p2s_compress",
    160               {Binding{b->getStreamSetTy(8, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "deletionCounts"}},
     171              {Binding{b->getStreamSetTy(8, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "fieldCounts"}},
    161172              {Binding{b->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}},
    162173              {}, {}, {}) {
     
    173184P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b)
    174185: BlockOrientedKernel("p2s_16_compress",
    175               {Binding{b->getStreamSetTy(16, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "deletionCounts"}},
     186              {Binding{b->getStreamSetTy(16, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "fieldCounts"}},
    176187              {Binding{b->getStreamSetTy(1, 16), "i16Stream", BoundedRate(0, 1)}},
    177188              {},
Note: See TracChangeset for help on using the changeset viewer.