Ignore:
Timestamp:
Jun 23, 2016, 8:26:09 PM (3 years ago)
Author:
cameron
Message:

Update u8u16 to use new kernel infrastructure

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5009 r5071  
    88#include <IDISA/idisa_builder.h>
    99#include <llvm/IR/Value.h>
    10 
    11 namespace kernel {
    1210
    1311std::vector<Value *> parallel_prefix_deletion_masks(IDISA::IDISA_Builder * iBuilder, unsigned fw, Value * del_mask) {
     
    4947// Kernel inputs: stream_count data streams plus one del_mask stream
    5048// Outputs: the deleted streams, plus a partial sum popcount
    51 void generateDeletionKernel(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned stream_count, KernelBuilder * kBuilder) {
     49
     50
     51void deletionKernel::generateKernel() {
     52    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     53    if (mKernelStateType == nullptr) finalizeKernelStateType();
     54    KernelBuilder::generateKernel();
    5255   
    53     for(unsigned i = 0; i < stream_count; ++i) {
    54         kBuilder->addInputStream(1);
    55         kBuilder->addOutputStream(1);
     56    Module * m = iBuilder->getModule();
     57    unsigned blockSize = iBuilder->getBitBlockWidth();
     58    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     59    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
     60   
     61    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     62   
     63    Value * inputStreamBlock = getParameter(doBlockFunction, "inputStreamSet");
     64    Value * outputStreamBlock = getParameter(doBlockFunction, "outputStreamSet");
     65    Value * delCountBlock = getParameter(doBlockFunction, "deletionCounts");
     66   
     67    Value * del_mask = iBuilder->CreateBlockAlignedLoad(inputStreamBlock, {iBuilder->getInt32(0), iBuilder->getInt32(mStreamCount)});
     68   
     69    std::vector<Value *> move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, del_mask);
     70       
     71    for (unsigned j = 0; j < mStreamCount; ++j) {
     72        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamBlock, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
     73        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, del_mask, move_masks, input);
     74        iBuilder->CreateBlockAlignedStore(output, outputStreamBlock, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
    5675    }
    57     kBuilder->addInputStream(1, "del_mask");
    58     kBuilder->addOutputStream(1);  // partial_sum popcount
    59     kBuilder->prepareFunction();
     76    Value * counts = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(del_mask));
     77    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(counts), delCountBlock, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     78                                         
     79    iBuilder->CreateRetVoid();
    6080   
    61     Value * del_mask = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(stream_count));
    62    
    63     std::vector<Value *> move_masks = parallel_prefix_deletion_masks(iBuilder, fw, del_mask);
    64     for (unsigned j = 0; j < stream_count; ++j) {
    65         Value * input = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
    66         Value * output = apply_parallel_prefix_deletion(iBuilder, fw, del_mask, move_masks, input);
    67         iBuilder->CreateBlockAlignedStore(output, kBuilder->getOutputStream(j));
     81    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", finalBlockFunction, 0));
     82    Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
     83    inputStreamBlock = getParameter(finalBlockFunction, "inputStreamSet");
     84    outputStreamBlock = getParameter(finalBlockFunction, "outputStreamSet");
     85    delCountBlock = getParameter(finalBlockFunction, "deletionCounts");
     86    Value * remaining = iBuilder->CreateZExt(remainingBytes, iBuilder->getIntNTy(blockSize));
     87    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(iBuilder->getIntNTy(blockSize)), remaining));
     88    Value * const delmaskPtr = iBuilder->CreateGEP(inputStreamBlock, {iBuilder->getInt32(0), iBuilder->getInt32(16)});
     89    Value * const delmaskVal = iBuilder->CreateBlockAlignedLoad(delmaskPtr);
     90    iBuilder->CreateBlockAlignedStore(iBuilder->CreateOr(EOF_del, delmaskVal), delmaskPtr);
     91    Function::arg_iterator args = finalBlockFunction->arg_begin();
     92    Value * self = &*(args++);
     93    /* Skip "remaining" arg */ args++;
     94    std::vector<Value *> doBlockArgs = {self};
     95    while (args != finalBlockFunction->arg_end()){
     96        doBlockArgs.push_back(&*args++);
    6897    }
    69     Value * counts = partial_sum_popcount(iBuilder, fw, iBuilder->simd_not(del_mask));
    70    
    71     iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(counts), kBuilder->getOutputStream(stream_count));
    72 
    73     kBuilder->finalize();
    74 }
     98    iBuilder->CreateCall(doBlockFunction, doBlockArgs);   
     99    iBuilder->CreateRetVoid();
     100    iBuilder->restoreIP(savePoint);
    75101}
    76102
     103
Note: See TracChangeset for help on using the changeset viewer.