Ignore:
Timestamp:
Jun 23, 2016, 8:26:09 PM (3 years ago)
Author:
cameron
Message:

Update u8u16 to use new kernel infrastructure

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5009 r5071  
    88#include <IDISA/idisa_builder.h>
    99#include <llvm/IR/Value.h>
    10 
    11 namespace kernel {
    1210
    1311std::vector<Value *> parallel_prefix_deletion_masks(IDISA::IDISA_Builder * iBuilder, unsigned fw, Value * del_mask) {
     
    4947// Kernel inputs: stream_count data streams plus one del_mask stream
    5048// Outputs: the deleted streams, plus a partial sum popcount
    51 void generateDeletionKernel(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned stream_count, KernelBuilder * kBuilder) {
     49
     50
     51void deletionKernel::generateKernel() {
     52    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     53    if (mKernelStateType == nullptr) finalizeKernelStateType();
     54    KernelBuilder::generateKernel();
    5255   
    53     for(unsigned i = 0; i < stream_count; ++i) {
    54         kBuilder->addInputStream(1);
    55         kBuilder->addOutputStream(1);
     56    Module * m = iBuilder->getModule();
     57    unsigned blockSize = iBuilder->getBitBlockWidth();
     58    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     59    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
     60   
     61    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     62   
     63    Value * inputStreamBlock = getParameter(doBlockFunction, "inputStreamSet");
     64    Value * outputStreamBlock = getParameter(doBlockFunction, "outputStreamSet");
     65    Value * delCountBlock = getParameter(doBlockFunction, "deletionCounts");
     66   
     67    Value * del_mask = iBuilder->CreateBlockAlignedLoad(inputStreamBlock, {iBuilder->getInt32(0), iBuilder->getInt32(mStreamCount)});
     68   
     69    std::vector<Value *> move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, del_mask);
     70       
     71    for (unsigned j = 0; j < mStreamCount; ++j) {
     72        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamBlock, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
     73        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, del_mask, move_masks, input);
     74        iBuilder->CreateBlockAlignedStore(output, outputStreamBlock, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
    5675    }
    57     kBuilder->addInputStream(1, "del_mask");
    58     kBuilder->addOutputStream(1);  // partial_sum popcount
    59     kBuilder->prepareFunction();
     76    Value * counts = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(del_mask));
     77    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(counts), delCountBlock, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     78                                         
     79    iBuilder->CreateRetVoid();
    6080   
    61     Value * del_mask = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(stream_count));
    62    
    63     std::vector<Value *> move_masks = parallel_prefix_deletion_masks(iBuilder, fw, del_mask);
    64     for (unsigned j = 0; j < stream_count; ++j) {
    65         Value * input = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
    66         Value * output = apply_parallel_prefix_deletion(iBuilder, fw, del_mask, move_masks, input);
    67         iBuilder->CreateBlockAlignedStore(output, kBuilder->getOutputStream(j));
     81    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", finalBlockFunction, 0));
     82    Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
     83    inputStreamBlock = getParameter(finalBlockFunction, "inputStreamSet");
     84    outputStreamBlock = getParameter(finalBlockFunction, "outputStreamSet");
     85    delCountBlock = getParameter(finalBlockFunction, "deletionCounts");
     86    Value * remaining = iBuilder->CreateZExt(remainingBytes, iBuilder->getIntNTy(blockSize));
     87    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(iBuilder->getIntNTy(blockSize)), remaining));
     88    Value * const delmaskPtr = iBuilder->CreateGEP(inputStreamBlock, {iBuilder->getInt32(0), iBuilder->getInt32(16)});
     89    Value * const delmaskVal = iBuilder->CreateBlockAlignedLoad(delmaskPtr);
     90    iBuilder->CreateBlockAlignedStore(iBuilder->CreateOr(EOF_del, delmaskVal), delmaskPtr);
     91    Function::arg_iterator args = finalBlockFunction->arg_begin();
     92    Value * self = &*(args++);
     93    /* Skip "remaining" arg */ args++;
     94    std::vector<Value *> doBlockArgs = {self};
     95    while (args != finalBlockFunction->arg_end()){
     96        doBlockArgs.push_back(&*args++);
    6897    }
    69     Value * counts = partial_sum_popcount(iBuilder, fw, iBuilder->simd_not(del_mask));
    70    
    71     iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(counts), kBuilder->getOutputStream(stream_count));
    72 
    73     kBuilder->finalize();
    74 }
     98    iBuilder->CreateCall(doBlockFunction, doBlockArgs);   
     99    iBuilder->CreateRetVoid();
     100    iBuilder->restoreIP(savePoint);
    75101}
    76102
     103
  • icGREP/icgrep-devel/icgrep/kernels/deletion.h

    r5002 r5071  
    66#define DELETION_H
    77
     8#include "streamset.h"
     9#include "interface.h"
     10#include "kernel.h"
    811
    912
     
    3033llvm::Value * apply_parallel_prefix_deletion(IDISA::IDISA_Builder * iBuilder, unsigned fw, llvm::Value * del_mask, std::vector<llvm::Value *> mv, llvm::Value * strm);
    3134
    32 namespace kernel {
    33  
    34     class KernelBuilder;
     35using namespace kernel;
    3536
    36     void generateDeletionKernel(llvm::Module * m, IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned stream_count, KernelBuilder * kBuilder);
    37 
    38 }
     37class deletionKernel : public kernel::KernelBuilder {
     38public:
     39    deletionKernel(IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned stream_count) :
     40    KernelBuilder(iBuilder, "del",
     41                  {StreamSetBinding{StreamSetType(stream_count + 2, 1), "inputStreamSet"}},
     42                  {StreamSetBinding{StreamSetType(stream_count, 1), "outputStreamSet"}, StreamSetBinding{StreamSetType(1, 1), "deletionCounts"}},
     43                  {}, {}, {}),
     44    mDeletionFieldWidth(fw),
     45    mStreamCount(stream_count) {}
     46   
     47    void generateKernel() override;
     48private:
     49    unsigned mDeletionFieldWidth;
     50    unsigned mStreamCount;
     51};
     52   
    3953#endif
    4054
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5040 r5071  
    4747}
    4848               
    49 void generateP2SKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
    50     for (unsigned i = 0; i < 8; ++i) {
    51         kBuilder->addInputStream(1);
     49void p2sKernel::generateKernel() {
     50    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     51    if (mKernelStateType == nullptr) finalizeKernelStateType();
     52    KernelBuilder::generateKernel();
     53
     54    Module * m = iBuilder->getModule();
     55    addTrivialFinalBlockMethod(m);
     56    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     57   
     58    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     59   
     60    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
     61    Value * byteStreamBlock_ptr = getParameter(doBlockFunction, "byteStream"); // output
     62    Value * p_bitblock[8];
     63    for (unsigned i = 0; i < 8; i++) {
     64        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
    5265    }
    53     kBuilder->addOutputStream(8);
    54     kBuilder->prepareFunction();
    55     Value * input[8];
     66    Value * s_bytepack[8];
     67    p2s(iBuilder, p_bitblock, s_bytepack);
    5668    for (unsigned j = 0; j < 8; ++j) {
    57         input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
     69        iBuilder->CreateBlockAlignedStore(s_bytepack[j], byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j)});
    5870    }
    59     Value * output[8];
    60     p2s(iBuilder, input, output);
    61     Value * output_ptr = kBuilder->getOutputStream(0);
    62     for (unsigned j = 0; j < 8; ++j) {
     71    iBuilder->CreateRetVoid();
     72    iBuilder->restoreIP(savePoint);
     73}
     74       
     75void p2s_16Kernel::generateKernel() {
     76    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     77    if (mKernelStateType == nullptr) finalizeKernelStateType();
     78    KernelBuilder::generateKernel();
    6379
    64         iBuilder->CreateBlockAlignedStore(output[j], iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(j) })));
    65     }
    66     kBuilder->finalize();
    67 }
    68 
    69 void generateP2S_16Kernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
    70     for (unsigned i = 0; i < 16; ++i) {
    71         kBuilder->addInputStream(1);
    72     }
    73     kBuilder->addOutputStream(16);
    74     kBuilder->prepareFunction();
     80    Module * m = iBuilder->getModule();
     81    addTrivialFinalBlockMethod(m);
     82    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     83   
     84    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     85   
     86    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
     87    Value * i16StreamBlock_ptr = getParameter(doBlockFunction, "i16Stream"); // output
     88   
    7589    Value * hi_input[8];
    7690    for (unsigned j = 0; j < 8; ++j) {
    77         hi_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
     91        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
    7892    }
    7993    Value * hi_bytes[8];
     
    8296    Value * lo_input[8];
    8397    for (unsigned j = 0; j < 8; ++j) {
    84         lo_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j+8));
     98        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
    8599    }
    86100    Value * lo_bytes[8];
    87101    p2s(iBuilder, lo_input, lo_bytes);
    88102   
    89     Value * output_ptr = kBuilder->getOutputStream(0);
    90103    for (unsigned j = 0; j < 8; ++j) {
    91104        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
    92105        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    93         iBuilder->CreateBlockAlignedStore(merge0, iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(2*j) })));
    94         iBuilder->CreateBlockAlignedStore(merge1, iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(2*j+1) })));
     106        iBuilder->CreateBlockAlignedStore(merge0, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(2*j)});
     107        iBuilder->CreateBlockAlignedStore(merge1, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(2*j+1)});
    95108    }
    96     kBuilder->finalize();
     109    iBuilder->CreateRetVoid();
     110    iBuilder->restoreIP(savePoint);
    97111}
     112       
    98113   
    99114Function * create_write(Module * const mod) {
     
    110125const size_t OutputBufferSize=65536;
    111126
    112 void generateP2S_16_withCompressedOutputKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
     127void p2s_16Kernel_withCompressedOutputKernel::generateKernel() {
    113128    outs().SetBufferSize(OutputBufferSize);
    114     for (unsigned i = 0; i < 16; ++i) {
    115         kBuilder->addInputStream(1);
    116     }       
    117     kBuilder->addInputStream(1);  // partial popcounts
    118     kBuilder->addOutputStream(16);
     129    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     130    if (mKernelStateType == nullptr) finalizeKernelStateType();
     131    KernelBuilder::generateKernel();
    119132
    120     kBuilder->prepareFunction();
    121133   
     134    Module * m = iBuilder->getModule();
    122135    Type * i8PtrTy = iBuilder->getInt8PtrTy();
    123136    Type * i64 = iBuilder->getIntNTy(64);
     
    125138   
    126139    Function * writefn = cast<Function>(m->getOrInsertFunction("buffered_write", iBuilder->getVoidTy(), i8PtrTy, i64, nullptr));
     140
     141    addTrivialFinalBlockMethod(m);
     142    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    127143   
     144    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     145   
     146    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
     147    Value * delCountBlock_ptr = getParameter(doBlockFunction, "deletionCounts");
     148    Value * i16StreamBlock_ptr = getParameter(doBlockFunction, "i16Stream"); // output
     149
    128150    Value * hi_input[8];
    129151    for (unsigned j = 0; j < 8; ++j) {
    130         hi_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
     152        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
    131153    }
    132154    Value * hi_bytes[8];
     
    135157    Value * lo_input[8];
    136158    for (unsigned j = 0; j < 8; ++j) {
    137         lo_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j+8));
     159        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
    138160    }
    139161    Value * lo_bytes[8];
     
    142164    unsigned UTF_16_units_per_register = iBuilder->getBitBlockWidth()/16;
    143165   
    144     Value * partial_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(16)));
    145     if (UTF_16_units_per_register < 16) {
    146         partial_counts = iBuilder->CreateZExt(partial_counts, VectorType::get(iBuilder->getIntNTy(16), iBuilder->getBitBlockWidth()/UTF_16_units_per_register));
    147     }
    148     Value * byte_counts = iBuilder->CreateAdd(partial_counts, partial_counts); // double the code unit count to get byte counts
     166    Value * unit_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    149167   
    150     Value * output_ptr = iBuilder->CreateBitCast(kBuilder->getOutputStream(0), i8PtrTy);
    151     Value * byte_offset = ConstantInt::get(i64, 0);
     168    Value * u16_output_ptr = iBuilder->CreateBitCast(i16StreamBlock_ptr, PointerType::get(iBuilder->getInt16Ty(), 0));
     169    Value * offset = ConstantInt::get(i64, 0);
    152170   
    153171    for (unsigned j = 0; j < 8; ++j) {
     
    155173        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    156174        //iBuilder->CallPrintRegister("merge0", merge0);
    157         iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, byte_offset), bitBlockPtrTy), 1);
    158         byte_offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(byte_counts, iBuilder->getInt32(2*j)), i64);
    159         //iBuilder->CallPrintInt("byte_offset", byte_offset);
    160         iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, byte_offset), bitBlockPtrTy), 1);
     175        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
     176        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j)), i64);
     177        //iBuilder->CallPrintInt("offset", offset);
     178        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
    161179        //iBuilder->CallPrintRegister("merge1", merge1);
    162         byte_offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(byte_counts, iBuilder->getInt32(2*j+1)), i64);
    163         //iBuilder->CallPrintInt("byte_offset", byte_offset);
     180        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j+1)), i64);
     181        //iBuilder->CallPrintInt("offset", offset);
    164182    }
    165     iBuilder->CreateCall(writefn, std::vector<Value *>({output_ptr, byte_offset}));
    166    
    167     kBuilder->finalize();
     183    Value * byte_offset = iBuilder->CreateAdd(offset, offset);
     184    iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->CreateBitCast(i16StreamBlock_ptr, i8PtrTy), byte_offset}));
     185    iBuilder->CreateRetVoid();
     186    iBuilder->restoreIP(savePoint);
    168187}
    169 
     188       
    170189}
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r5009 r5071  
    66#define P2S_KERNEL_H
    77
     8#include "streamset.h"
     9#include "interface.h"
     10#include "kernel.h"
     11
    812namespace llvm { class Module; }
    913
     
    1216namespace kernel {
    1317
    14 class KernelBuilder;
    1518
    16     void generateP2SKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     19//    void generateP2SKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    1720
    18     void generateP2S_16Kernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     21//    void generateP2S_16Kernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    1922
    20     void generateP2S_16_withCompressedOutputKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     23//    void generateP2S_16_withCompressedOutputKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     24   
     25   
     26class p2sKernel : public KernelBuilder {
     27public:
     28    p2sKernel(IDISA::IDISA_Builder * iBuilder) :
     29    KernelBuilder(iBuilder, "p2s",
     30                  {StreamSetBinding{StreamSetType(8, 1), "basisBits"}},
     31                  {StreamSetBinding{StreamSetType(1, 8), "byteStream"}},
     32                  {}, {}, {}) {}
     33   
     34    void generateKernel() override;
     35   
     36};
    2137
     38class p2s_16Kernel : public KernelBuilder {
     39public:
     40    p2s_16Kernel(IDISA::IDISA_Builder * iBuilder) :
     41    KernelBuilder(iBuilder, "p2s_16",
     42                  {StreamSetBinding{StreamSetType(16, 1), "basisBits"}},
     43                  {StreamSetBinding{StreamSetType(1, 16), "i16Stream"}},
     44                  {}, {}, {}) {}
     45   
     46    void generateKernel() override;
     47   
     48};
     49
     50   
     51class p2s_16Kernel_withCompressedOutputKernel : public KernelBuilder {
     52public:
     53    p2s_16Kernel_withCompressedOutputKernel(IDISA::IDISA_Builder * iBuilder) :
     54    KernelBuilder(iBuilder, "p2s_16_compress",
     55                  {StreamSetBinding{StreamSetType(16, 1), "basisBits"}, StreamSetBinding{StreamSetType(1, 1), "deletionCounts"}},
     56                  {StreamSetBinding{StreamSetType(1, 16), "i16Stream"}},
     57                  {}, {}, {}) {}
     58       
     59    void generateKernel() override;
     60};
     61   
    2262}
    2363
Note: See TracChangeset for help on using the changeset viewer.