Ignore:
Timestamp:
Apr 8, 2016, 5:36:43 PM (23 months ago)
Author:
cameron
Message:

u8u16 transcoder demo program now working

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5007 r5009  
    22#include "kernels/kernel.h"
    33#include "IDISA/idisa_builder.h"
     4#include <llvm/IR/TypeBuilder.h>
     5#include <llvm/IR/Type.h>
     6#include <iostream>
    47
    58namespace kernel{
     
    8285    kBuilder->finalize();
    8386}
     87   
     88Function * create_write(Module * const mod) {
     89    Function * write = mod->getFunction("write");
     90    if (write == nullptr) {
     91        FunctionType *write_type =
     92        TypeBuilder<long(int, char *, long), false>::get(mod->getContext());
     93        write = cast<Function>(mod->getOrInsertFunction("write", write_type,
     94                                                        AttributeSet().addAttribute(mod->getContext(), 2U, Attribute::NoAlias)));
     95    }
     96    return write;
     97}
     98
     99void generateP2S_16_withCompressedOutputKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
     100    for (unsigned i = 0; i < 16; ++i) {
     101        kBuilder->addInputStream(1);
     102    }       
     103    kBuilder->addInputStream(1);  // partial popcounts
     104    kBuilder->addOutputStream(16);
     105
     106    kBuilder->prepareFunction();
     107    Function * writefn = create_write(m);
     108   
     109    Type * i8PtrTy = iBuilder->getInt8PtrTy();
     110    Type * i64 = iBuilder->getIntNTy(64);
     111    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0);
     112   
     113    Value * hi_input[8];
     114    for (unsigned j = 0; j < 8; ++j) {
     115        hi_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
     116    }
     117    Value * hi_bytes[8];
     118    p2s(iBuilder, hi_input, hi_bytes);
     119   
     120    Value * lo_input[8];
     121    for (unsigned j = 0; j < 8; ++j) {
     122        lo_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j+8));
     123    }
     124    Value * lo_bytes[8];
     125    p2s(iBuilder, lo_input, lo_bytes);
     126   
     127    unsigned UTF_16_units_per_register = iBuilder->getBitBlockWidth()/16;
     128   
     129    Value * partial_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(16)));
     130    Value * byte_counts = iBuilder->CreateAdd(partial_counts, partial_counts); // double the code unit count to get byte counts
     131   
     132    Value * output_ptr = iBuilder->CreateBitCast(kBuilder->getOutputStream(0), i8PtrTy);
     133    Value * byte_offset = ConstantInt::get(i64, 0);
     134   
     135    for (unsigned j = 0; j < 8; ++j) {
     136        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
     137        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
     138        //iBuilder->CallPrintRegister("merge0", merge0);
     139        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, byte_offset), bitBlockPtrTy), 1);
     140        byte_offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(byte_counts, iBuilder->getInt32(2*j)), i64);
     141        //iBuilder->CallPrintInt("byte_offset", byte_offset);
     142        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, byte_offset), bitBlockPtrTy), 1);
     143        //iBuilder->CallPrintRegister("merge1", merge1);
     144        byte_offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(byte_counts, iBuilder->getInt32(2*j+1)), i64);
     145        //iBuilder->CallPrintInt("byte_offset", byte_offset);
     146    }
     147    iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), output_ptr, byte_offset}));
     148   
     149    kBuilder->finalize();
     150}
    84151
    85152}
Note: See TracChangeset for help on using the changeset viewer.