source: icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp @ 5079

Last change on this file since 5079 was 5079, checked in by cameron, 3 years ago

Minor u8u16 updates, 8-bit p2sKernel with compressed output

File size: 9.2 KB
Line 
1#include "p2s_kernel.h"
2#include "kernels/kernel.h"
3#include "IDISA/idisa_builder.h"
4#include <llvm/IR/TypeBuilder.h>
5#include <llvm/IR/Type.h>
6#include <iostream>
7#include <stdint.h>
8#include <llvm/Support/FileSystem.h>
9#include <llvm/Support/raw_ostream.h>
10
11
12
13namespace kernel{
14       
15void p2s_step(IDISA::IDISA_Builder * iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
16    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
17    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
18    s1 = iBuilder->esimd_mergeh(8, t1, t0);
19    s0 = iBuilder->esimd_mergel(8, t1, t0);
20}
21
22inline void p2s(IDISA::IDISA_Builder * iBuilder, Value * p[], Value * s[]) {
23    Value * bit00004444[2];
24    Value * bit22226666[2];
25    Value * bit11115555[2];
26    Value * bit33337777[2];
27    p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
28    p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
29    p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
30    p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
31
32    Value * bit00224466[4];
33    Value * bit11335577[4];
34    for (unsigned j = 0; j<2; j++) {
35        p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
36        p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
37    }
38    for (unsigned j = 0; j<4; j++) {
39        p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
40    }
41}
42               
43void p2sKernel::generateDoBlockMethod() {
44    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
45    Module * m = iBuilder->getModule();
46   
47    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
48   
49    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
50   
51    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
52    Value * byteStreamBlock_ptr = getParameter(doBlockFunction, "byteStream"); // output
53    Value * p_bitblock[8];
54    for (unsigned i = 0; i < 8; i++) {
55        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
56    }
57    Value * s_bytepack[8];
58    p2s(iBuilder, p_bitblock, s_bytepack);
59    for (unsigned j = 0; j < 8; ++j) {
60        iBuilder->CreateBlockAlignedStore(s_bytepack[j], byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j)});
61    }
62    iBuilder->CreateRetVoid();
63    iBuilder->restoreIP(savePoint);
64}
65       
66   
67void p2sKernel_withCompressedOutput::prepareKernel() {
68    setDoBlockReturnType(iBuilder->getInt32Ty());
69    KernelBuilder::prepareKernel();
70}
71
72void p2sKernel_withCompressedOutput::generateDoBlockMethod() {
73    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
74    Module * m = iBuilder->getModule();
75    Type * i8PtrTy = iBuilder->getInt8PtrTy(); 
76    Type * i32 = iBuilder->getIntNTy(32); 
77    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0); 
78   
79    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
80   
81    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
82   
83    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
84    Value * delCountBlock_ptr = getParameter(doBlockFunction, "deletionCounts");
85    Value * byteStreamBlock_ptr = getParameter(doBlockFunction, "byteStream"); // output
86   
87    Value * p_bitblock[8];
88    for (unsigned i = 0; i < 8; i++) {
89        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
90    }
91    Value * s_bytepack[8];
92    p2s(iBuilder, p_bitblock, s_bytepack);
93   
94    unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
95   
96    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
97   
98    Value * output_ptr = iBuilder->CreateBitCast(byteStreamBlock_ptr, i8PtrTy);
99    Value * offset = ConstantInt::get(i32, 0);
100   
101    for (unsigned j = 0; j < 8; ++j) {
102        iBuilder->CreateAlignedStore(s_bytepack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy), 1);
103        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(j)), i32);
104    }
105    iBuilder->CreateRet(offset);
106    iBuilder->restoreIP(savePoint);
107}
108   
109   
110void p2s_16Kernel::generateDoBlockMethod() {
111    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
112    Module * m = iBuilder->getModule();
113   
114    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
115   
116    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
117   
118    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
119    Value * i16StreamBlock_ptr = getParameter(doBlockFunction, "i16Stream"); // output
120   
121    Value * hi_input[8];
122    for (unsigned j = 0; j < 8; ++j) {
123        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
124    }
125    Value * hi_bytes[8];
126    p2s(iBuilder, hi_input, hi_bytes);
127   
128    Value * lo_input[8];
129    for (unsigned j = 0; j < 8; ++j) {
130        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
131    }
132    Value * lo_bytes[8];
133    p2s(iBuilder, lo_input, lo_bytes);
134   
135    for (unsigned j = 0; j < 8; ++j) {
136        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
137        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
138        iBuilder->CreateBlockAlignedStore(merge0, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(2*j)});
139        iBuilder->CreateBlockAlignedStore(merge1, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(2*j+1)});
140    }
141    iBuilder->CreateRetVoid();
142    iBuilder->restoreIP(savePoint);
143}
144       
145
146void p2s_16Kernel_withCompressedOutput::prepareKernel() {
147    setDoBlockReturnType(iBuilder->getInt32Ty());
148    KernelBuilder::prepareKernel();
149}
150   
151void p2s_16Kernel_withCompressedOutput::generateDoBlockMethod() {
152    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
153    Module * m = iBuilder->getModule();
154    Type * i32 = iBuilder->getIntNTy(32); 
155    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0); 
156   
157    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
158   
159    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
160   
161    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
162    Value * delCountBlock_ptr = getParameter(doBlockFunction, "deletionCounts");
163    Value * i16StreamBlock_ptr = getParameter(doBlockFunction, "i16Stream"); // output
164
165    Value * hi_input[8];
166    for (unsigned j = 0; j < 8; ++j) {
167        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
168    }
169    Value * hi_bytes[8];
170    p2s(iBuilder, hi_input, hi_bytes);
171   
172    Value * lo_input[8];
173    for (unsigned j = 0; j < 8; ++j) {
174        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
175    }
176    Value * lo_bytes[8];
177    p2s(iBuilder, lo_input, lo_bytes);
178   
179    unsigned UTF_16_units_per_register = iBuilder->getBitBlockWidth()/16;
180   
181    Value * unit_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
182   
183    Value * u16_output_ptr = iBuilder->CreateBitCast(i16StreamBlock_ptr, PointerType::get(iBuilder->getInt16Ty(), 0));
184    Value * offset = ConstantInt::get(i32, 0);
185   
186    for (unsigned j = 0; j < 8; ++j) {
187        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
188        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
189        //iBuilder->CallPrintRegister("merge0", merge0);
190        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
191        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j)), i32);
192        //iBuilder->CallPrintInt("offset", offset);
193        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
194        //iBuilder->CallPrintRegister("merge1", merge1);
195        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j+1)), i32);
196        //iBuilder->CallPrintInt("offset", offset);
197    }
198    iBuilder->CreateRet(offset);
199    iBuilder->restoreIP(savePoint);
200}
201       
202}
Note: See TracBrowser for help on using the repository browser.