Changeset 5009


Ignore:
Timestamp:
Apr 8, 2016, 5:36:43 PM (20 months ago)
Author:
cameron
Message:

u8u16 transcoder demo program now working

Location:
icGREP/icgrep-devel/icgrep/kernels
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5007 r5009  
    6767        iBuilder->CreateBlockAlignedStore(output, kBuilder->getOutputStream(j));
    6868    }
    69     Value * counts = partial_sum_popcount(iBuilder, fw, del_mask);
     69    Value * counts = partial_sum_popcount(iBuilder, fw, iBuilder->simd_not(del_mask));
    7070   
    7171    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(counts), kBuilder->getOutputStream(stream_count));
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5007 r5009  
    22#include "kernels/kernel.h"
    33#include "IDISA/idisa_builder.h"
     4#include <llvm/IR/TypeBuilder.h>
     5#include <llvm/IR/Type.h>
     6#include <iostream>
    47
    58namespace kernel{
     
    8285    kBuilder->finalize();
    8386}
     87   
     88Function * create_write(Module * const mod) {
     89    Function * write = mod->getFunction("write");
     90    if (write == nullptr) {
     91        FunctionType *write_type =
     92        TypeBuilder<long(int, char *, long), false>::get(mod->getContext());
     93        write = cast<Function>(mod->getOrInsertFunction("write", write_type,
     94                                                        AttributeSet().addAttribute(mod->getContext(), 2U, Attribute::NoAlias)));
     95    }
     96    return write;
     97}
     98
     99void generateP2S_16_withCompressedOutputKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
     100    for (unsigned i = 0; i < 16; ++i) {
     101        kBuilder->addInputStream(1);
     102    }       
     103    kBuilder->addInputStream(1);  // partial popcounts
     104    kBuilder->addOutputStream(16);
     105
     106    kBuilder->prepareFunction();
     107    Function * writefn = create_write(m);
     108   
     109    Type * i8PtrTy = iBuilder->getInt8PtrTy();
     110    Type * i64 = iBuilder->getIntNTy(64);
     111    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0);
     112   
     113    Value * hi_input[8];
     114    for (unsigned j = 0; j < 8; ++j) {
     115        hi_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
     116    }
     117    Value * hi_bytes[8];
     118    p2s(iBuilder, hi_input, hi_bytes);
     119   
     120    Value * lo_input[8];
     121    for (unsigned j = 0; j < 8; ++j) {
     122        lo_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j+8));
     123    }
     124    Value * lo_bytes[8];
     125    p2s(iBuilder, lo_input, lo_bytes);
     126   
     127    unsigned UTF_16_units_per_register = iBuilder->getBitBlockWidth()/16;
     128   
     129    Value * partial_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(16)));
     130    Value * byte_counts = iBuilder->CreateAdd(partial_counts, partial_counts); // double the code unit count to get byte counts
     131   
     132    Value * output_ptr = iBuilder->CreateBitCast(kBuilder->getOutputStream(0), i8PtrTy);
     133    Value * byte_offset = ConstantInt::get(i64, 0);
     134   
     135    for (unsigned j = 0; j < 8; ++j) {
     136        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
     137        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
     138        //iBuilder->CallPrintRegister("merge0", merge0);
     139        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, byte_offset), bitBlockPtrTy), 1);
     140        byte_offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(byte_counts, iBuilder->getInt32(2*j)), i64);
     141        //iBuilder->CallPrintInt("byte_offset", byte_offset);
     142        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, byte_offset), bitBlockPtrTy), 1);
     143        //iBuilder->CallPrintRegister("merge1", merge1);
     144        byte_offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(byte_counts, iBuilder->getInt32(2*j+1)), i64);
     145        //iBuilder->CallPrintInt("byte_offset", byte_offset);
     146    }
     147    iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), output_ptr, byte_offset}));
     148   
     149    kBuilder->finalize();
     150}
    84151
    85152}
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.h

    r5006 r5009  
    1818    void generateP2S_16Kernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
    1919
     20    void generateP2S_16_withCompressedOutputKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     21
    2022}
    2123
  • icGREP/icgrep-devel/icgrep/kernels/u8u16_pipeline.cpp

    r5007 r5009  
    3434    delete mS2PKernel;
    3535    delete mU8U16Kernel;
     36    delete mDelKernel;
    3637    delete mP2SKernel;
    37     delete mStdOutKernel;
     38    //delete mStdOutKernel;
    3839}
    3940
     
    4344    mDelKernel = new KernelBuilder(iBuilder, "del", SegmentSize);
    4445    mP2SKernel = new KernelBuilder(iBuilder, "p2s", SegmentSize);   
    45     mStdOutKernel = new KernelBuilder(iBuilder, "stdout", SegmentSize);
     46    //mStdOutKernel = new KernelBuilder(iBuilder, "stdout", SegmentSize);
    4647
    4748    generateS2PKernel(mMod, iBuilder, mS2PKernel);
    48     generateP2S_16Kernel(mMod, iBuilder, mP2SKernel);
    49     generateDeletionKernel(mMod, iBuilder, /*fw=*/8, /*stream_count=*/16, mDelKernel);
    50     generateStdOutKernel(mMod, iBuilder, mStdOutKernel, 16);
     49    generateP2S_16_withCompressedOutputKernel(mMod, iBuilder, mP2SKernel);
     50    generateDeletionKernel(mMod, iBuilder, iBuilder->getBitBlockWidth()/16, /*stream_count=*/16, mDelKernel);
     51    //generateStdOutKernel(mMod, iBuilder, mStdOutKernel, 16);
    5152
    5253    pablo_function_passes(function);
     
    102103    Instance * delInstance = mDelKernel->instantiate(u8u16Instance->getOutputStreamSet());
    103104    Instance * p2sInstance = mP2SKernel->instantiate(delInstance->getOutputStreamSet());
    104     Instance * stdOutInstance = mStdOutKernel->instantiate(p2sInstance->getOutputStreamSet());
    105 
    106     stdOutInstance->setInternalState("RemainingBytes", bufferSize);  // The total number of bytes remaining in input.
    107105
    108106   
     
    131129            p2sInstance->CreateDoBlockCall();
    132130        }
    133         for (unsigned i = 0; i < segmentSize; ++i) {
    134             stdOutInstance->CreateDoBlockCall();
    135         }
    136131        remainingBytes->addIncoming(iBuilder->CreateSub(remainingBytes, step), segmentBodyBlock);
    137132        iBuilder->CreateBr(segmentCondBlock);
     
    158153    delInstance->CreateDoBlockCall();
    159154    p2sInstance->CreateDoBlockCall();
    160     stdOutInstance->CreateDoBlockCall();
    161155
    162156    Value * diff = iBuilder->CreateSub(remainingBytes, step);
     
    181175
    182176    u8u16Instance->CreateDoBlockCall();
     177    Value * remaining = iBuilder->CreateZExt(remainingBytes, iBuilder->getIntNTy(mBlockSize));
     178    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(iBuilder->getIntNTy(mBlockSize)), remaining));
     179    Value * const delmask = u8u16Instance->getOutputStream(16);
     180    iBuilder->CreateBlockAlignedStore(iBuilder->CreateOr(EOF_del, iBuilder->CreateBlockAlignedLoad(delmask)), delmask);
    183181    delInstance->CreateDoBlockCall();
    184182    p2sInstance->CreateDoBlockCall();
    185     stdOutInstance->CreateDoBlockCall();
    186183    iBuilder->CreateRetVoid();
    187184    return main;
Note: See TracChangeset for help on using the changeset viewer.