Ignore:
Timestamp:
Dec 3, 2017, 12:40:40 PM (22 months ago)
Author:
nmedfort
Message:

Bug fixes and simplified MultiBlockKernel? logic

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5706 r5755  
    3737}
    3838               
    39 void P2SKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     39void P2SKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
    4040    Value * p_bitblock[8];
    4141    for (unsigned i = 0; i < 8; i++) {
    42         p_bitblock[i] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
     42        p_bitblock[i] = b->loadInputStreamBlock("basisBits", b->getInt32(i));
    4343    }
    4444    Value * s_bytepack[8];
    45     p2s(iBuilder, p_bitblock, s_bytepack);
     45    p2s(b, p_bitblock, s_bytepack);
    4646    for (unsigned j = 0; j < 8; ++j) {
    47         iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), s_bytepack[j]);
     47        b->storeOutputStreamPack("byteStream", b->getInt32(0), b->getInt32(j), s_bytepack[j]);
    4848    }
    4949}
    5050
    51 void P2SKernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    52     IntegerType * i32 = iBuilder->getInt32Ty();
    53     PointerType * bitBlockPtrTy = PointerType::get(iBuilder->getBitBlockType(), 0);
     51void P2SKernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
     52    IntegerType * i32 = b->getInt32Ty();
     53    PointerType * bitBlockPtrTy = PointerType::get(b->getBitBlockType(), 0);
    5454
    5555    Value * basisBits[8];
    5656    for (unsigned i = 0; i < 8; i++) {
    57         basisBits[i] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
     57        basisBits[i] = b->loadInputStreamBlock("basisBits", b->getInt32(i));
    5858    }
    5959    Value * bytePack[8];
    60     p2s(iBuilder, basisBits, bytePack);
     60    p2s(b, basisBits, bytePack);
    6161
    62     unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
    63     Value * delCountBlock_ptr = iBuilder->getInputStreamBlockPtr("deletionCounts", iBuilder->getInt32(0));
    64     Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
     62    unsigned units_per_register = b->getBitBlockWidth()/8;
     63    Value * delCountBlock_ptr = b->getInputStreamBlockPtr("deletionCounts", b->getInt32(0));
     64    Value * unit_counts = b->fwCast(units_per_register, b->CreateBlockAlignedLoad(delCountBlock_ptr));
    6565
    66     Value * output_ptr = iBuilder->getOutputStreamBlockPtr("byteStream", iBuilder->getInt32(0));
    67     output_ptr = iBuilder->CreatePointerCast(output_ptr, iBuilder->getInt8PtrTy());
    68     Value * offset = iBuilder->getInt32(0);
     66    Value * output_ptr = b->getOutputStreamBlockPtr("byteStream", b->getInt32(0));
     67    output_ptr = b->CreatePointerCast(output_ptr, b->getInt8PtrTy());
     68    Value * offset = b->getInt32(0);
    6969    for (unsigned j = 0; j < 8; ++j) {
    70         iBuilder->CreateStore(bytePack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy));
    71         offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(j)), i32);
     70        b->CreateStore(bytePack[j], b->CreateBitCast(b->CreateGEP(output_ptr, offset), bitBlockPtrTy));
     71        offset = b->CreateZExt(b->CreateExtractElement(unit_counts, b->getInt32(j)), i32);
    7272    }
    7373
    74     Value * unitsGenerated = iBuilder->getProducedItemCount("byteStream"); // units generated to buffer
    75     unitsGenerated = iBuilder->CreateAdd(unitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    76     iBuilder->setProducedItemCount("byteStream", unitsGenerated);
     74    Value * unitsGenerated = b->getProducedItemCount("byteStream"); // units generated to buffer
     75    unitsGenerated = b->CreateAdd(unitsGenerated, b->CreateZExt(offset, b->getSizeTy()));
     76    b->setProducedItemCount("byteStream", unitsGenerated);
    7777}
    7878
    79 void P2S16Kernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
     79void P2S16Kernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
    8080    Value * hi_input[8];
    8181    for (unsigned j = 0; j < 8; ++j) {
    82         hi_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
     82        hi_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(j));
    8383    }
    8484    Value * hi_bytes[8];
    85     p2s(iBuilder, hi_input, hi_bytes);   
     85    p2s(b, hi_input, hi_bytes);
    8686    Value * lo_input[8];
    8787    for (unsigned j = 0; j < 8; ++j) {
    88         lo_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
     88        lo_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(j + 8));
    8989    }
    9090    Value * lo_bytes[8];
    91     p2s(iBuilder, lo_input, lo_bytes);   
     91    p2s(b, lo_input, lo_bytes);
    9292    for (unsigned j = 0; j < 8; ++j) {
    93         Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
    94         Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    95         iBuilder->storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j), merge0);
    96         iBuilder->storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j + 1), merge1);
     93        Value * merge0 = b->bitCast(b->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
     94        Value * merge1 = b->bitCast(b->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
     95        b->storeOutputStreamPack("i16Stream", b->getInt32(0), b->getInt32(2 * j), merge0);
     96        b->storeOutputStreamPack("i16Stream", b->getInt32(0), b->getInt32(2 * j + 1), merge1);
    9797    }
    9898}
    9999       
    100 void P2S16KernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
    101     IntegerType * i32Ty = iBuilder->getInt32Ty();
    102     PointerType * int16PtrTy = iBuilder->getInt16Ty()->getPointerTo();
    103     PointerType * bitBlockPtrTy = iBuilder->getBitBlockType()->getPointerTo();
    104     ConstantInt * blockMask = iBuilder->getSize(iBuilder->getBitBlockWidth() - 1);
     100void P2S16KernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
     101    IntegerType * i32Ty = b->getInt32Ty();
     102    PointerType * int16PtrTy = b->getInt16Ty()->getPointerTo();
     103    PointerType * bitBlockPtrTy = b->getBitBlockType()->getPointerTo();
     104    ConstantInt * blockMask = b->getSize(b->getBitBlockWidth() - 1);
    105105
    106106    Value * hi_input[8];
    107107    for (unsigned j = 0; j < 8; ++j) {
    108         hi_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
     108        hi_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(j));
    109109    }
    110110    Value * hi_bytes[8];
    111     p2s(iBuilder, hi_input, hi_bytes);
     111    p2s(b, hi_input, hi_bytes);
    112112
    113113    Value * lo_input[8];
    114114    for (unsigned j = 0; j < 8; ++j) {
    115         lo_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
     115        lo_input[j] = b->loadInputStreamBlock("basisBits", b->getInt32(j + 8));
    116116    }
    117117    Value * lo_bytes[8];
    118     p2s(iBuilder, lo_input, lo_bytes);
     118    p2s(b, lo_input, lo_bytes);
    119119
    120     Value * delCount = iBuilder->loadInputStreamBlock("deletionCounts", iBuilder->getInt32(0));
    121     Value * unitCounts = iBuilder->fwCast(iBuilder->getBitBlockWidth() / 16, delCount);
    122     Value * outputPtr = iBuilder->getOutputStreamBlockPtr("i16Stream", iBuilder->getInt32(0));
    123     outputPtr = iBuilder->CreatePointerCast(outputPtr, int16PtrTy);
    124     Value * i16UnitsGenerated = iBuilder->getProducedItemCount("i16Stream"); // units generated to buffer
    125     outputPtr = iBuilder->CreateGEP(outputPtr, iBuilder->CreateAnd(i16UnitsGenerated, blockMask));
     120    Value * const delCount = b->loadInputStreamBlock("deletionCounts", b->getInt32(0));
     121    Value * const unitCounts = b->fwCast(b->getBitBlockWidth() / 16, delCount);
     122    Value * outputPtr = b->getOutputStreamBlockPtr("i16Stream", b->getInt32(0));
     123    outputPtr = b->CreatePointerCast(outputPtr, int16PtrTy);
     124    Value * const i16UnitsGenerated = b->getProducedItemCount("i16Stream"); // units generated to buffer
     125    outputPtr = b->CreateGEP(outputPtr, b->CreateAnd(i16UnitsGenerated, blockMask));
    126126
    127     Value * offset = ConstantInt::get(i32Ty, 0);
     127    Value * offset = b->getInt32(0);
    128128
    129129    for (unsigned j = 0; j < 8; ++j) {
    130         Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
    131         iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(outputPtr, offset), bitBlockPtrTy), 1);
    132         offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unitCounts, iBuilder->getInt32(2 * j)), i32Ty);
     130        Value * const merge0 = b->bitCast(b->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
     131        b->CreateAlignedStore(merge0, b->CreateBitCast(b->CreateGEP(outputPtr, offset), bitBlockPtrTy), 1);
     132        Value * const nextOffset1 = b->CreateZExt(b->CreateExtractElement(unitCounts, b->getInt32(2 * j)), i32Ty);
     133        b->CreateAssert(b->CreateICmpULE(offset, nextOffset1), "deletion offset is not monotonically non-decreasing");
    133134
    134         Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    135         iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(outputPtr, offset), bitBlockPtrTy), 1);
    136         offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unitCounts, iBuilder->getInt32(2 * j + 1)), i32Ty);
     135        Value * const merge1 = b->bitCast(b->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
     136        b->CreateAlignedStore(merge1, b->CreateBitCast(b->CreateGEP(outputPtr, nextOffset1), bitBlockPtrTy), 1);
     137        Value * const nextOffset2 = b->CreateZExt(b->CreateExtractElement(unitCounts, b->getInt32(2 * j + 1)), i32Ty);
     138        b->CreateAssert(b->CreateICmpULE(nextOffset1, nextOffset2), "deletion offset is not monotonically non-decreasing");
     139
     140        offset = nextOffset2;
    137141    }
    138142
    139     Value * i16UnitsFinal = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
    140     iBuilder->setProducedItemCount("i16Stream", i16UnitsFinal);
     143    Value * const i16UnitsFinal = b->CreateAdd(i16UnitsGenerated, b->CreateZExt(offset, b->getSizeTy()));
     144    b->setProducedItemCount("i16Stream", i16UnitsFinal);
    141145}
    142146
    143 P2SKernel::P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
     147P2SKernel::P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & b)
    144148: BlockOrientedKernel("p2s",
    145               {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}},
    146               {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"}},
     149              {Binding{b->getStreamSetTy(8, 1), "basisBits"}},
     150              {Binding{b->getStreamSetTy(1, 8), "byteStream"}},
    147151              {}, {}, {}) {
    148152}
    149153
    150 P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
     154P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b)
    151155: BlockOrientedKernel("p2s_compress",
    152               {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}, Binding{iBuilder->getStreamSetTy(1, 1), "deletionCounts"}},
    153               {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}},
     156              {Binding{b->getStreamSetTy(8, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "deletionCounts"}},
     157              {Binding{b->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}},
    154158              {}, {}, {}) {
    155159}
    156160
    157 P2S16Kernel::P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
     161P2S16Kernel::P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> & b)
    158162: BlockOrientedKernel("p2s_16",
    159               {Binding{iBuilder->getStreamSetTy(16, 1), "basisBits"}},
    160               {Binding{iBuilder->getStreamSetTy(1, 16), "i16Stream"}},
     163              {Binding{b->getStreamSetTy(16, 1), "basisBits"}},
     164              {Binding{b->getStreamSetTy(1, 16), "i16Stream"}},
    161165              {}, {}, {}) {
    162166}
Note: See TracChangeset for help on using the changeset viewer.