Ignore:
Timestamp:
Apr 8, 2016, 2:40:24 PM (3 years ago)
Author:
cameron
Message:

u8u16 progress

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5004 r5007  
    3838}
    3939
     40Value * partial_sum_popcount(IDISA::IDISA_Builder * iBuilder, unsigned fw, Value * mask) {
     41    Value * per_field = iBuilder->simd_popcount(fw, mask);
     42    for (unsigned move = 1; move < iBuilder->getBitBlockWidth()/fw; move *= 2) {
     43        per_field = iBuilder->simd_add(fw, per_field, iBuilder->mvmd_slli(fw, per_field, move));
     44    }
     45    return per_field;
     46}
     47
    4048// Apply deletion to a set of stream_count input streams to produce a set of output streams.
    4149// Kernel inputs: stream_count data streams plus one del_mask stream
     50// Outputs: the deleted streams, plus a partial sum popcount
    4251void generateDeletionKernel(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned stream_count, KernelBuilder * kBuilder) {
    4352   
     
    4756    }
    4857    kBuilder->addInputStream(1, "del_mask");
     58    kBuilder->addOutputStream(1);  // partial_sum popcount
    4959    kBuilder->prepareFunction();
    5060   
     
    5767        iBuilder->CreateBlockAlignedStore(output, kBuilder->getOutputStream(j));
    5868    }
     69    Value * counts = partial_sum_popcount(iBuilder, fw, del_mask);
     70   
     71    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(counts), kBuilder->getOutputStream(stream_count));
     72
    5973    kBuilder->finalize();
    6074}
    6175}
     76
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5006 r5007  
    7575    Value * output_ptr = kBuilder->getOutputStream(0);
    7676    for (unsigned j = 0; j < 8; ++j) {
    77         Value * merge0 = iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]);
    78         Value * merge1 = iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]);
     77        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
     78        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    7979        iBuilder->CreateBlockAlignedStore(merge0, iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(2*j) })));
    8080        iBuilder->CreateBlockAlignedStore(merge1, iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(2*j+1) })));
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r4995 r5007  
    2222   
    2323
    24 void generateStdOutKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
     24void generateStdOutKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder, unsigned fw) {
    2525    LLVMContext & ctxt = m->getContext();
    2626
     
    3030    // Insert this declaration in the module (if necessary):  declare i64 @write(i32, i8*, i64)
    3131    Function * writefn = create_write(m);
    32     kBuilder->addInputStream(8, "byte_pack");
     32    kBuilder->addInputStream(fw, "byte_pack");
    3333    // No output streams.
    3434    kBuilder->addInternalState(i64, "RemainingBytes");
     
    3939    BasicBlock * final_block_write = BasicBlock::Create(ctxt, "final_block_write", function, 0);
    4040    BasicBlock * exit_block = BasicBlock::Create(ctxt, "exit_stdout", function, 0);
    41 
    4241    Value * bytes = iBuilder->CreateLoad(kBuilder->getInternalState("RemainingBytes"));
    43     //iBuilder->CallPrintInt("bytes", iBuilder->CreatePtrToInt(bytes, iBuilder->getInt64Ty()));
    4442
    4543    Value * input = iBuilder->CreateBitCast(kBuilder->getInputStream(0), iBuilder->getInt8PtrTy());
     
    5048   
    5149    iBuilder->SetInsertPoint(full_block_write);
    52     iBuilder->CreateCall(writefn, std::vector<Value *>({ConstantInt::get(i32, 1), input, blockSize}));
     50    Value * outputBytes = ConstantInt::get(i64, iBuilder->getBitBlockWidth() * fw/8);
     51    iBuilder->CreateCall(writefn, std::vector<Value *>({ConstantInt::get(i32, 1), input, outputBytes}));
    5352    Value * remain = iBuilder->CreateSub(bytes, blockSize);
    5453    kBuilder->setInternalState("RemainingBytes", remain);
    55     //iBuilder->CallPrintInt("remain", iBuilder->CreatePtrToInt(remain, iBuilder->getInt64Ty()));
    56                  
     54    iBuilder->CreatePtrToInt(remain, iBuilder->getInt64Ty());
    5755    iBuilder->CreateBr(exit_block);
    5856   
    59    
    6057    iBuilder->SetInsertPoint(final_block_write);
    61     iBuilder->CreateCall(writefn, std::vector<Value *>({ConstantInt::get(i32, 1), input, bytes}));
     58    outputBytes = iBuilder->CreateMul(bytes, ConstantInt::get(i64, fw/8));
     59    iBuilder->CreateCall(writefn, std::vector<Value *>({ConstantInt::get(i32, 1), input, outputBytes}));
    6260    kBuilder->setInternalState("RemainingBytes", ConstantInt::getNullValue(i64));
    6361    iBuilder->CreateBr(exit_block);
     
    6866    kBuilder->finalize();
    6967}
     68
     69
    7070   
    7171}
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.h

    r4988 r5007  
    1414class KernelBuilder;
    1515
    16     void generateStdOutKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     16    void generateStdOutKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder, unsigned fw = 8);
    1717}
    1818
  • icGREP/icgrep-devel/icgrep/kernels/u8u16_pipeline.cpp

    r5005 r5007  
    3939
    4040void PipelineBuilder::CreateKernels(PabloFunction * function){
    41     mS2PKernel = new KernelBuilder("s2p", mMod, iBuilder, SegmentSize);
    42     mU8U16Kernel = new KernelBuilder("u8u16", mMod, iBuilder, SegmentSize);
    43     mDelKernel = new KernelBuilder("del", mMod, iBuilder, SegmentSize);
    44     mP2SKernel = new KernelBuilder("p2s", mMod, iBuilder, SegmentSize);   
    45     mStdOutKernel = new KernelBuilder("stdout", mMod, iBuilder, SegmentSize);
     41    mS2PKernel = new KernelBuilder(iBuilder, "s2p", SegmentSize);
     42    mU8U16Kernel = new KernelBuilder(iBuilder, "u8u16", SegmentSize);
     43    mDelKernel = new KernelBuilder(iBuilder, "del", SegmentSize);
     44    mP2SKernel = new KernelBuilder(iBuilder, "p2s", SegmentSize);   
     45    mStdOutKernel = new KernelBuilder(iBuilder, "stdout", SegmentSize);
    4646
    4747    generateS2PKernel(mMod, iBuilder, mS2PKernel);
    48     generateP2SKernel(mMod, iBuilder, mP2SKernel);
     48    generateP2S_16Kernel(mMod, iBuilder, mP2SKernel);
    4949    generateDeletionKernel(mMod, iBuilder, /*fw=*/8, /*stream_count=*/16, mDelKernel);
    50     generateStdOutKernel(mMod, iBuilder, mStdOutKernel);
     50    generateStdOutKernel(mMod, iBuilder, mStdOutKernel, 16);
    5151
    5252    pablo_function_passes(function);
     
    6969Function *  PipelineBuilder::ExecuteKernels() {
    7070    Type * const int64ty = iBuilder->getInt64Ty();
    71     Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    7271    Type * const inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, 8)})), 1), 0);
    7372   
     
    105104    Instance * stdOutInstance = mStdOutKernel->instantiate(p2sInstance->getOutputStreamSet());
    106105
    107     stdOutInstance->setInternalState("RemainingBytes", bufferSize);  // The total number of bytes to be sent to stdout.
     106    stdOutInstance->setInternalState("RemainingBytes", bufferSize);  // The total number of bytes remaining in input.
    108107
    109108   
Note: See TracChangeset for help on using the changeset viewer.