source: icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp @ 5009

Last change on this file since 5009 was 5009, checked in by cameron, 23 months ago

u8u16 transcoder demo program now working

File size: 6.7 KB
Line 
1#include "p2s_kernel.h"
2#include "kernels/kernel.h"
3#include "IDISA/idisa_builder.h"
4#include <llvm/IR/TypeBuilder.h>
5#include <llvm/IR/Type.h>
6#include <iostream>
7
8namespace kernel{
9       
10void p2s_step(IDISA::IDISA_Builder * iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
11    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
12    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
13    s1 = iBuilder->esimd_mergeh(8, t1, t0);
14    s0 = iBuilder->esimd_mergel(8, t1, t0);
15}
16
17inline void p2s(IDISA::IDISA_Builder * iBuilder, Value * p[], Value * s[]) {
18    Value * bit00004444[2];
19    Value * bit22226666[2];
20    Value * bit11115555[2];
21    Value * bit33337777[2];
22    p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
23    p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
24    p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
25    p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
26
27    Value * bit00224466[4];
28    Value * bit11335577[4];
29    for (unsigned j = 0; j<2; j++) {
30        p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
31        p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
32    }
33    for (unsigned j = 0; j<4; j++) {
34        p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
35    }
36}
37               
38void generateP2SKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
39    for (unsigned i = 0; i < 8; ++i) {
40        kBuilder->addInputStream(1);
41    }
42    kBuilder->addOutputStream(8);
43    kBuilder->prepareFunction();
44    Value * input[8];
45    for (unsigned j = 0; j < 8; ++j) {
46        input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
47    }
48    Value * output[8];
49    p2s(iBuilder, input, output);
50    Value * output_ptr = kBuilder->getOutputStream(0);
51    for (unsigned j = 0; j < 8; ++j) {
52
53        iBuilder->CreateBlockAlignedStore(output[j], iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(j) })));
54    }
55    kBuilder->finalize();
56}
57
58void generateP2S_16Kernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
59    for (unsigned i = 0; i < 16; ++i) {
60        kBuilder->addInputStream(1);
61    }
62    kBuilder->addOutputStream(16);
63    kBuilder->prepareFunction();
64    Value * hi_input[8];
65    for (unsigned j = 0; j < 8; ++j) {
66        hi_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
67    }
68    Value * hi_bytes[8];
69    p2s(iBuilder, hi_input, hi_bytes);
70   
71    Value * lo_input[8];
72    for (unsigned j = 0; j < 8; ++j) {
73        lo_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j+8));
74    }
75    Value * lo_bytes[8];
76    p2s(iBuilder, lo_input, lo_bytes);
77   
78    Value * output_ptr = kBuilder->getOutputStream(0);
79    for (unsigned j = 0; j < 8; ++j) {
80        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
81        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
82        iBuilder->CreateBlockAlignedStore(merge0, iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(2*j) })));
83        iBuilder->CreateBlockAlignedStore(merge1, iBuilder->CreateGEP(output_ptr, std::vector<Value *>({ iBuilder->getInt32(0), iBuilder->getInt32(2*j+1) })));
84    }
85    kBuilder->finalize();
86}
87   
88Function * create_write(Module * const mod) {
89    Function * write = mod->getFunction("write");
90    if (write == nullptr) {
91        FunctionType *write_type =
92        TypeBuilder<long(int, char *, long), false>::get(mod->getContext());
93        write = cast<Function>(mod->getOrInsertFunction("write", write_type,
94                                                        AttributeSet().addAttribute(mod->getContext(), 2U, Attribute::NoAlias)));
95    }
96    return write;
97}
98
99void generateP2S_16_withCompressedOutputKernel(Module * m, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
100    for (unsigned i = 0; i < 16; ++i) {
101        kBuilder->addInputStream(1);
102    }       
103    kBuilder->addInputStream(1);  // partial popcounts
104    kBuilder->addOutputStream(16);
105
106    kBuilder->prepareFunction();
107    Function * writefn = create_write(m);
108   
109    Type * i8PtrTy = iBuilder->getInt8PtrTy(); 
110    Type * i64 = iBuilder->getIntNTy(64); 
111    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0); 
112   
113    Value * hi_input[8];
114    for (unsigned j = 0; j < 8; ++j) {
115        hi_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j));
116    }
117    Value * hi_bytes[8];
118    p2s(iBuilder, hi_input, hi_bytes);
119   
120    Value * lo_input[8];
121    for (unsigned j = 0; j < 8; ++j) {
122        lo_input[j] = iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(j+8));
123    }
124    Value * lo_bytes[8];
125    p2s(iBuilder, lo_input, lo_bytes);
126   
127    unsigned UTF_16_units_per_register = iBuilder->getBitBlockWidth()/16;
128   
129    Value * partial_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(kBuilder->getInputStream(16)));
130    Value * byte_counts = iBuilder->CreateAdd(partial_counts, partial_counts); // double the code unit count to get byte counts
131   
132    Value * output_ptr = iBuilder->CreateBitCast(kBuilder->getOutputStream(0), i8PtrTy);
133    Value * byte_offset = ConstantInt::get(i64, 0);
134   
135    for (unsigned j = 0; j < 8; ++j) {
136        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
137        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
138        //iBuilder->CallPrintRegister("merge0", merge0);
139        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, byte_offset), bitBlockPtrTy), 1);
140        byte_offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(byte_counts, iBuilder->getInt32(2*j)), i64);
141        //iBuilder->CallPrintInt("byte_offset", byte_offset);
142        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, byte_offset), bitBlockPtrTy), 1);
143        //iBuilder->CallPrintRegister("merge1", merge1);
144        byte_offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(byte_counts, iBuilder->getInt32(2*j+1)), i64);
145        //iBuilder->CallPrintInt("byte_offset", byte_offset);
146    }
147    iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), output_ptr, byte_offset}));
148   
149    kBuilder->finalize();
150}
151
152}
Note: See TracBrowser for help on using the repository browser.