Changeset 5007


Ignore:
Timestamp:
Apr 8, 2016, 2:40:24 PM (18 months ago)
Author:
cameron
Message:

u8u16 progress

Location:
icGREP/icgrep-devel/icgrep
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_builder.cpp

    r5001 r5007  
    343343}
    344344
     345Value * IDISA_Builder::mvmd_slli(unsigned fw, Value * a, unsigned shift) {
     346    unsigned field_count = mBitBlockWidth/fw;
     347    return mvmd_dslli(fw, a, Constant::getNullValue(fwVectorType(fw)), field_count - shift);
     348}
     349
     350Value * IDISA_Builder::mvmd_srli(unsigned fw, Value * a, unsigned shift) {
     351    unsigned field_count = mBitBlockWidth/fw;
     352    return mvmd_dslli(fw, Constant::getNullValue(fwVectorType(fw)), a, shift);
     353}
     354
    345355Value * IDISA_Builder::mvmd_dslli(unsigned fw, Value * a, Value * b, unsigned shift) {
    346356    unsigned field_count = mBitBlockWidth/fw;
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_builder.h

    r4986 r5007  
    5959    LoadInst * CreateBlockAlignedLoad(Value * const ptr);
    6060    LoadInst * CreateBlockAlignedLoad(Value * const ptr, Value * const index);
    61     LoadInst * CreateBlockAlignedLoad(Value * const ptr, std::initializer_list<Value *> indicies);
     61    LoadInst * CreateBlockAlignedLoad(Value * const ptr, std::initializer_list<Value *> indices);
    6262
    6363    void CreateBlockAlignedStore(Value * const value, Value * const ptr);
    6464    void CreateBlockAlignedStore(Value * const value, Value * const ptr, Value * const index);
    65     void CreateBlockAlignedStore(Value * const value, Value * const ptr, std::initializer_list<Value *> indicies);
     65    void CreateBlockAlignedStore(Value * const value, Value * const ptr, std::initializer_list<Value *> indices);
    6666
    6767    void CallPrintRegister(const std::string & regName, Value * const value);
     
    105105    virtual Value * mvmd_extract(unsigned fw, Value * a, unsigned fieldIndex);
    106106    virtual Value * mvmd_insert(unsigned fw, Value * blk, Value * elt, unsigned fieldIndex);
     107    virtual Value * mvmd_slli(unsigned fw, Value * a, unsigned shift);
     108    virtual Value * mvmd_srli(unsigned fw, Value * a, unsigned shift);
    107109    virtual Value * mvmd_dslli(unsigned fw, Value * a, Value * b, unsigned shift);
    108110   
  • icGREP/icgrep-devel/icgrep/kernels/deletion.cpp

    r5004 r5007  
    3838}
    3939
     40Value * partial_sum_popcount(IDISA::IDISA_Builder * iBuilder, unsigned fw, Value * mask) {
     41    Value * per_field = iBuilder->simd_popcount(fw, mask);
     42    for (unsigned move = 1; move < iBuilder->getBitBlockWidth()/fw; move *= 2) {
     43        per_field = iBuilder->simd_add(fw, per_field, iBuilder->mvmd_slli(fw, per_field, move));
     44    }
     45    return per_field;
     46}
     47
    4048// Apply deletion to a set of stream_count input streams to produce a set of output streams.
    4149// Kernel inputs: stream_count data streams plus one del_mask stream
     50// Outputs: the deleted streams, plus a partial sum popcount
    4251void generateDeletionKernel(Module * m, IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned stream_count, KernelBuilder * kBuilder) {
    4352   
     
    4756    }
    4857    kBuilder->addInputStream(1, "del_mask");
     58    kBuilder->addOutputStream(1);  // partial_sum popcount
    4959    kBuilder->prepareFunction();
    5060   
     
    5767        iBuilder->CreateBlockAlignedStore(output, kBuilder->getOutputStream(j));
    5868    }
     69    Value * counts = partial_sum_popcount(iBuilder, fw, del_mask);
     70   
     71    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(counts), kBuilder->getOutputStream(stream_count));
     72
    5973    kBuilder->finalize();
    6074}
    6175}
     76
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5006 r5007  
    7575    Value * output_ptr = kBuilder->getOutputStream(0);
    7676    for (unsigned j = 0; j < 8; ++j) {
    77         Value * merge0 = iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]);
    78         Value * merge1 = iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]);
     77        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
     78        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    7979        iBuilder->CreateBlockAlignedStore(merge0, iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(2*j) })));
    8080        iBuilder->CreateBlockAlignedStore(merge1, iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(2*j+1) })));
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r4995 r5007  
    2222   
    2323
    24 void generateStdOutKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
     24void generateStdOutKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder, unsigned fw) {
    2525    LLVMContext & ctxt = m->getContext();
    2626
     
    3030    // Insert this declaration in the module (if necessary):  declare i64 @write(i32, i8*, i64)
    3131    Function * writefn = create_write(m);
    32     kBuilder->addInputStream(8, "byte_pack");
     32    kBuilder->addInputStream(fw, "byte_pack");
    3333    // No output streams.
    3434    kBuilder->addInternalState(i64, "RemainingBytes");
     
    3939    BasicBlock * final_block_write = BasicBlock::Create(ctxt, "final_block_write", function, 0);
    4040    BasicBlock * exit_block = BasicBlock::Create(ctxt, "exit_stdout", function, 0);
    41 
    4241    Value * bytes = iBuilder->CreateLoad(kBuilder->getInternalState("RemainingBytes"));
    43     //iBuilder->CallPrintInt("bytes", iBuilder->CreatePtrToInt(bytes, iBuilder->getInt64Ty()));
    4442
    4543    Value * input = iBuilder->CreateBitCast(kBuilder->getInputStream(0), iBuilder->getInt8PtrTy());
     
    5048   
    5149    iBuilder->SetInsertPoint(full_block_write);
    52     iBuilder->CreateCall(writefn, std::vector<Value *>({ConstantInt::get(i32, 1), input, blockSize}));
     50    Value * outputBytes = ConstantInt::get(i64, iBuilder->getBitBlockWidth() * fw/8);
     51    iBuilder->CreateCall(writefn, std::vector<Value *>({ConstantInt::get(i32, 1), input, outputBytes}));
    5352    Value * remain = iBuilder->CreateSub(bytes, blockSize);
    5453    kBuilder->setInternalState("RemainingBytes", remain);
    55     //iBuilder->CallPrintInt("remain", iBuilder->CreatePtrToInt(remain, iBuilder->getInt64Ty()));
    56                  
     54    iBuilder->CreatePtrToInt(remain, iBuilder->getInt64Ty());
    5755    iBuilder->CreateBr(exit_block);
    5856   
    59    
    6057    iBuilder->SetInsertPoint(final_block_write);
    61     iBuilder->CreateCall(writefn, std::vector<Value *>({ConstantInt::get(i32, 1), input, bytes}));
     58    outputBytes = iBuilder->CreateMul(bytes, ConstantInt::get(i64, fw/8));
     59    iBuilder->CreateCall(writefn, std::vector<Value *>({ConstantInt::get(i32, 1), input, outputBytes}));
    6260    kBuilder->setInternalState("RemainingBytes", ConstantInt::getNullValue(i64));
    6361    iBuilder->CreateBr(exit_block);
     
    6866    kBuilder->finalize();
    6967}
     68
     69
    7070   
    7171}
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.h

    r4988 r5007  
    1414class KernelBuilder;
    1515
    16     void generateStdOutKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder);
     16    void generateStdOutKernel(llvm::Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder, unsigned fw = 8);
    1717}
    1818
  • icGREP/icgrep-devel/icgrep/kernels/u8u16_pipeline.cpp

    r5005 r5007  
    3939
    4040void PipelineBuilder::CreateKernels(PabloFunction * function){
    41     mS2PKernel = new KernelBuilder("s2p", mMod, iBuilder, SegmentSize);
    42     mU8U16Kernel = new KernelBuilder("u8u16", mMod, iBuilder, SegmentSize);
    43     mDelKernel = new KernelBuilder("del", mMod, iBuilder, SegmentSize);
    44     mP2SKernel = new KernelBuilder("p2s", mMod, iBuilder, SegmentSize);   
    45     mStdOutKernel = new KernelBuilder("stdout", mMod, iBuilder, SegmentSize);
     41    mS2PKernel = new KernelBuilder(iBuilder, "s2p", SegmentSize);
     42    mU8U16Kernel = new KernelBuilder(iBuilder, "u8u16", SegmentSize);
     43    mDelKernel = new KernelBuilder(iBuilder, "del", SegmentSize);
     44    mP2SKernel = new KernelBuilder(iBuilder, "p2s", SegmentSize);   
     45    mStdOutKernel = new KernelBuilder(iBuilder, "stdout", SegmentSize);
    4646
    4747    generateS2PKernel(mMod, iBuilder, mS2PKernel);
    48     generateP2SKernel(mMod, iBuilder, mP2SKernel);
     48    generateP2S_16Kernel(mMod, iBuilder, mP2SKernel);
    4949    generateDeletionKernel(mMod, iBuilder, /*fw=*/8, /*stream_count=*/16, mDelKernel);
    50     generateStdOutKernel(mMod, iBuilder, mStdOutKernel);
     50    generateStdOutKernel(mMod, iBuilder, mStdOutKernel, 16);
    5151
    5252    pablo_function_passes(function);
     
    6969Function *  PipelineBuilder::ExecuteKernels() {
    7070    Type * const int64ty = iBuilder->getInt64Ty();
    71     Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    7271    Type * const inputType = PointerType::get(ArrayType::get(StructType::get(mMod->getContext(), std::vector<Type *>({ArrayType::get(mBitBlockType, 8)})), 1), 0);
    7372   
     
    105104    Instance * stdOutInstance = mStdOutKernel->instantiate(p2sInstance->getOutputStreamSet());
    106105
    107     stdOutInstance->setInternalState("RemainingBytes", bufferSize);  // The total number of bytes to be sent to stdout.
     106    stdOutInstance->setInternalState("RemainingBytes", bufferSize);  // The total number of bytes remaining in input.
    108107
    109108   
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5005 r5007  
    1515#include <llvm/ExecutionEngine/MCJIT.h>
    1616#include <llvm/IRReader/IRReader.h>
     17#include <llvm/IR/Verifier.h>
     18#include <llvm/Support/Debug.h>
     19
    1720#include <llvm/Support/CommandLine.h>
    1821#include <llvm/CodeGen/CommandFlags.h>
     
    4548static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<input file ...>"), cl::OneOrMore);
    4649
     50static cl::OptionCategory eIRDumpOptions("LLVM IR Dump Options", "These options control dumping of LLVM IR.");
     51static cl::opt<bool> DumpGeneratedIR("dump-generated-IR", cl::init(false), cl::desc("Print LLVM IR generated by Pablo Compiler."), cl::cat(eIRDumpOptions));
    4752
    4853static cl::OptionCategory cMachineCodeOptimization("Machine Code Optimizations", "These options control back-end compilier optimization levels.");
     
    287292    pipelineBuilder.CreateKernels(function);
    288293
    289     pipelineBuilder.ExecuteKernels();
    290 
     294    llvm::Function * main_IR = pipelineBuilder.ExecuteKernels();
     295   
     296    if (DumpGeneratedIR) {
     297        M->dump();
     298    }
     299   
     300    verifyModule(*M, &dbgs());
    291301    //std::cerr << "ExecuteKernels(); done\n";
    292     llvm::Function * main_IR = M->getFunction("Main");
    293302    ExecutionEngine * mEngine = JIT_to_ExecutionEngine(M);
    294303   
     
    297306
    298307    delete idb;
    299 
    300308    return reinterpret_cast<u8u16FunctionType>(mEngine->getPointerToFunction(main_IR));
    301309}
    302310
    303 void doCaseFold(u8u16FunctionType fn_ptr, const std::string & fileName) {
     311void u8u16(u8u16FunctionType fn_ptr, const std::string & fileName) {
    304312    std::string mFileName = fileName;
    305313    size_t mFileSize;
     
    372380
    373381    for (unsigned i = 0; i != inputFiles.size(); ++i) {
    374         std::cerr << inputFiles[i] << " beginning\n";
    375         doCaseFold(fn_ptr, inputFiles[i]);
     382        u8u16(fn_ptr, inputFiles[i]);
    376383    }
    377384
Note: See TracChangeset for help on using the changeset viewer.