Ignore:
Timestamp:
Jun 23, 2016, 8:26:09 PM (3 years ago)
Author:
cameron
Message:

Update u8u16 to use new kernel infrastructure

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5040 r5071  
    4747}
    4848               
    49 void generateP2SKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
    50     for (unsigned i = 0; i < 8; ++i) {
    51         kBuilder->addInputStream(1);
     49void p2sKernel::generateKernel() {
     50    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     51    if (mKernelStateType == nullptr) finalizeKernelStateType();
     52    KernelBuilder::generateKernel();
     53
     54    Module * m = iBuilder->getModule();
     55    addTrivialFinalBlockMethod(m);
     56    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     57   
     58    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     59   
     60    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
     61    Value * byteStreamBlock_ptr = getParameter(doBlockFunction, "byteStream"); // output
     62    Value * p_bitblock[8];
     63    for (unsigned i = 0; i < 8; i++) {
     64        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
    5265    }
    53     kBuilder->addOutputStream(8);
    54     kBuilder->prepareFunction();
    55     Value * input[8];
     66    Value * s_bytepack[8];
     67    p2s(iBuilder, p_bitblock, s_bytepack);
    5668    for (unsigned j = 0; j < 8; ++j) {
    57         input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
     69        iBuilder->CreateBlockAlignedStore(s_bytepack[j], byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j)});
    5870    }
    59     Value * output[8];
    60     p2s(iBuilder, input, output);
    61     Value * output_ptr = kBuilder->getOutputStream(0);
    62     for (unsigned j = 0; j < 8; ++j) {
     71    iBuilder->CreateRetVoid();
     72    iBuilder->restoreIP(savePoint);
     73}
     74       
     75void p2s_16Kernel::generateKernel() {
     76    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     77    if (mKernelStateType == nullptr) finalizeKernelStateType();
     78    KernelBuilder::generateKernel();
    6379
    64         iBuilder->CreateBlockAlignedStore(output[j], iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(j) })));
    65     }
    66     kBuilder->finalize();
    67 }
    68 
    69 void generateP2S_16Kernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
    70     for (unsigned i = 0; i < 16; ++i) {
    71         kBuilder->addInputStream(1);
    72     }
    73     kBuilder->addOutputStream(16);
    74     kBuilder->prepareFunction();
     80    Module * m = iBuilder->getModule();
     81    addTrivialFinalBlockMethod(m);
     82    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
     83   
     84    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     85   
     86    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
     87    Value * i16StreamBlock_ptr = getParameter(doBlockFunction, "i16Stream"); // output
     88   
    7589    Value * hi_input[8];
    7690    for (unsigned j = 0; j < 8; ++j) {
    77         hi_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
     91        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
    7892    }
    7993    Value * hi_bytes[8];
     
    8296    Value * lo_input[8];
    8397    for (unsigned j = 0; j < 8; ++j) {
    84         lo_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j+8));
     98        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
    8599    }
    86100    Value * lo_bytes[8];
    87101    p2s(iBuilder, lo_input, lo_bytes);
    88102   
    89     Value * output_ptr = kBuilder->getOutputStream(0);
    90103    for (unsigned j = 0; j < 8; ++j) {
    91104        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
    92105        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    93         iBuilder->CreateBlockAlignedStore(merge0, iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(2*j) })));
    94         iBuilder->CreateBlockAlignedStore(merge1, iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(2*j+1) })));
     106        iBuilder->CreateBlockAlignedStore(merge0, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(2*j)});
     107        iBuilder->CreateBlockAlignedStore(merge1, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(2*j+1)});
    95108    }
    96     kBuilder->finalize();
     109    iBuilder->CreateRetVoid();
     110    iBuilder->restoreIP(savePoint);
    97111}
     112       
    98113   
    99114Function * create_write(Module * const mod) {
     
    110125const size_t OutputBufferSize=65536;
    111126
    112 void generateP2S_16_withCompressedOutputKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
     127void p2s_16Kernel_withCompressedOutputKernel::generateKernel() {
    113128    outs().SetBufferSize(OutputBufferSize);
    114     for (unsigned i = 0; i < 16; ++i) {
    115         kBuilder->addInputStream(1);
    116     }       
    117     kBuilder->addInputStream(1);  // partial popcounts
    118     kBuilder->addOutputStream(16);
     129    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
     130    if (mKernelStateType == nullptr) finalizeKernelStateType();
     131    KernelBuilder::generateKernel();
    119132
    120     kBuilder->prepareFunction();
    121133   
     134    Module * m = iBuilder->getModule();
    122135    Type * i8PtrTy = iBuilder->getInt8PtrTy();
    123136    Type * i64 = iBuilder->getIntNTy(64);
     
    125138   
    126139    Function * writefn = cast<Function>(m->getOrInsertFunction("buffered_write", iBuilder->getVoidTy(), i8PtrTy, i64, nullptr));
     140
     141    addTrivialFinalBlockMethod(m);
     142    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
    127143   
     144    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
     145   
     146    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
     147    Value * delCountBlock_ptr = getParameter(doBlockFunction, "deletionCounts");
     148    Value * i16StreamBlock_ptr = getParameter(doBlockFunction, "i16Stream"); // output
     149
    128150    Value * hi_input[8];
    129151    for (unsigned j = 0; j < 8; ++j) {
    130         hi_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
     152        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
    131153    }
    132154    Value * hi_bytes[8];
     
    135157    Value * lo_input[8];
    136158    for (unsigned j = 0; j < 8; ++j) {
    137         lo_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j+8));
     159        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
    138160    }
    139161    Value * lo_bytes[8];
     
    142164    unsigned UTF_16_units_per_register = iBuilder->getBitBlockWidth()/16;
    143165   
    144     Value * partial_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(16)));
    145     if (UTF_16_units_per_register < 16) {
    146         partial_counts = iBuilder->CreateZExt(partial_counts, VectorType::get(iBuilder->getIntNTy(16), iBuilder->getBitBlockWidth()/UTF_16_units_per_register));
    147     }
    148     Value * byte_counts = iBuilder->CreateAdd(partial_counts, partial_counts); // double the code unit count to get byte counts
     166    Value * unit_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
    149167   
    150     Value * output_ptr = iBuilder->CreateBitCast(kBuilder->getOutputStream(0), i8PtrTy);
    151     Value * byte_offset = ConstantInt::get(i64, 0);
     168    Value * u16_output_ptr = iBuilder->CreateBitCast(i16StreamBlock_ptr, PointerType::get(iBuilder->getInt16Ty(), 0));
     169    Value * offset = ConstantInt::get(i64, 0);
    152170   
    153171    for (unsigned j = 0; j < 8; ++j) {
     
    155173        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    156174        //iBuilder->CallPrintRegister("merge0", merge0);
    157         iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, byte_offset), bitBlockPtrTy), 1);
    158         byte_offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(byte_counts, iBuilder->getInt32(2*j)), i64);
    159         //iBuilder->CallPrintInt("byte_offset", byte_offset);
    160         iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, byte_offset), bitBlockPtrTy), 1);
     175        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
     176        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j)), i64);
     177        //iBuilder->CallPrintInt("offset", offset);
     178        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
    161179        //iBuilder->CallPrintRegister("merge1", merge1);
    162         byte_offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(byte_counts, iBuilder->getInt32(2*j+1)), i64);
    163         //iBuilder->CallPrintInt("byte_offset", byte_offset);
     180        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j+1)), i64);
     181        //iBuilder->CallPrintInt("offset", offset);
    164182    }
    165     iBuilder->CreateCall(writefn, std::vector<Value *>({output_ptr, byte_offset}));
    166    
    167     kBuilder->finalize();
     183    Value * byte_offset = iBuilder->CreateAdd(offset, offset);
     184    iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->CreateBitCast(i16StreamBlock_ptr, i8PtrTy), byte_offset}));
     185    iBuilder->CreateRetVoid();
     186    iBuilder->restoreIP(savePoint);
    168187}
    169 
     188       
    170189}
Note: See TracChangeset for help on using the changeset viewer.