source: icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp @ 5111

Last change on this file since 5111 was 5111, checked in by cameron, 3 years ago

DoBlock/FinalBlock/DoSegment? Kernel functions always return void; clean-up

File size: 13.7 KB
Line 
1#include "p2s_kernel.h"
2#include "kernels/kernel.h"
3#include "IDISA/idisa_builder.h"
4#include <llvm/IR/TypeBuilder.h>
5#include <llvm/IR/Type.h>
6#include <iostream>
7#include <stdint.h>
8#include <llvm/Support/FileSystem.h>
9#include <llvm/Support/raw_ostream.h>
10
11
12
13namespace kernel{
14       
15void p2s_step(IDISA::IDISA_Builder * iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
16    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
17    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
18    s1 = iBuilder->esimd_mergeh(8, t1, t0);
19    s0 = iBuilder->esimd_mergel(8, t1, t0);
20}
21
22inline void p2s(IDISA::IDISA_Builder * iBuilder, Value * p[], Value * s[]) {
23    Value * bit00004444[2];
24    Value * bit22226666[2];
25    Value * bit11115555[2];
26    Value * bit33337777[2];
27    p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
28    p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
29    p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
30    p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
31
32    Value * bit00224466[4];
33    Value * bit11335577[4];
34    for (unsigned j = 0; j<2; j++) {
35        p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
36        p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
37    }
38    for (unsigned j = 0; j<4; j++) {
39        p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
40    }
41}
42               
43void p2sKernel::generateDoBlockMethod() {
44    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
45    Module * m = iBuilder->getModule();
46   
47    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
48   
49    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
50   
51    Value * self = getParameter(doBlockFunction, "self");
52    Value * blockNo = getScalarField(self, blockNoScalar);
53    Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
54    Value * byteStreamBlock_ptr = getStreamSetBlockPtr(self, "byteStream", blockNo);
55
56    Value * p_bitblock[8];
57    for (unsigned i = 0; i < 8; i++) {
58        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
59    }
60    Value * s_bytepack[8];
61    p2s(iBuilder, p_bitblock, s_bytepack);
62    for (unsigned j = 0; j < 8; ++j) {
63        iBuilder->CreateBlockAlignedStore(s_bytepack[j], byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j)});
64    }
65    iBuilder->CreateRetVoid();
66    iBuilder->restoreIP(savePoint);
67}
68       
69   
70void p2sKernel_withCompressedOutput::prepareKernel() {
71    KernelBuilder::prepareKernel();
72}
73
74void p2sKernel_withCompressedOutput::generateDoBlockMethod() {
75    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
76    Module * m = iBuilder->getModule();
77    Type * i8PtrTy = iBuilder->getInt8PtrTy(); 
78    Type * i32 = iBuilder->getIntNTy(32); 
79    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0); 
80   
81    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
82   
83    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
84    Value * self = getParameter(doBlockFunction, "self");
85    Value * blockNo = getScalarField(self, blockNoScalar);
86    Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
87    Value * delCountBlock_ptr = getStreamSetBlockPtr(self, "deletionCounts", blockNo);
88    Value * byteStreamBlock_ptr = getStreamSetBlockPtr(self, "byteStream", blockNo);
89   
90    Value * p_bitblock[8];
91    for (unsigned i = 0; i < 8; i++) {
92        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
93    }
94    Value * s_bytepack[8];
95    p2s(iBuilder, p_bitblock, s_bytepack);
96   
97    unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
98   
99    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
100   
101    Value * output_ptr = iBuilder->CreateBitCast(byteStreamBlock_ptr, i8PtrTy);
102    Value * offset = ConstantInt::get(i32, 0);
103   
104    for (unsigned j = 0; j < 8; ++j) {
105        iBuilder->CreateAlignedStore(s_bytepack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy), 1);
106        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(j)), i32);
107    }
108    iBuilder->CreateRetVoid();
109    iBuilder->restoreIP(savePoint);
110}
111   
112   
113void p2s_16Kernel::generateDoBlockMethod() {
114    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
115    Module * m = iBuilder->getModule();
116   
117    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
118   
119    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
120    Value * self = getParameter(doBlockFunction, "self");
121    Value * blockNo = getScalarField(self, blockNoScalar);
122    Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
123    Value * i16StreamBlock_ptr = getStreamSetBlockPtr(self, "i16Stream", blockNo);
124   
125    Value * hi_input[8];
126    for (unsigned j = 0; j < 8; ++j) {
127        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
128    }
129    Value * hi_bytes[8];
130    p2s(iBuilder, hi_input, hi_bytes);
131   
132    Value * lo_input[8];
133    for (unsigned j = 0; j < 8; ++j) {
134        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
135    }
136    Value * lo_bytes[8];
137    p2s(iBuilder, lo_input, lo_bytes);
138   
139    for (unsigned j = 0; j < 8; ++j) {
140        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
141        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
142        iBuilder->CreateBlockAlignedStore(merge0, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(2*j)});
143        iBuilder->CreateBlockAlignedStore(merge1, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(2*j+1)});
144    }
145    iBuilder->CreateRetVoid();
146    iBuilder->restoreIP(savePoint);
147}
148       
149
150void p2s_16Kernel_withCompressedOutput::prepareKernel() {
151    KernelBuilder::prepareKernel();
152}
153   
154static Function * create_write(Module * const mod) {
155    Function * write = mod->getFunction("write");
156    if (write == nullptr) {
157        FunctionType *write_type =
158        TypeBuilder<long(int, char *, long), false>::get(mod->getContext());
159        write = cast<Function>(mod->getOrInsertFunction("write", write_type,
160                                                        AttributeSet().addAttribute(mod->getContext(), 2U, Attribute::NoAlias)));
161    }
162    return write;
163}
164   
165   
166   
167void p2s_16Kernel_withCompressedOutput::generateDoBlockMethod() {
168    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
169    Module * m = iBuilder->getModule();
170    Type * i32 = iBuilder->getIntNTy(32); 
171    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0); 
172    Type * i8PtrTy = iBuilder->getInt8PtrTy();
173
174    Function * writefn = create_write(m);
175
176    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
177   
178    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
179    Value * self = getParameter(doBlockFunction, "self");
180    Value * blockNo = getScalarField(self, blockNoScalar);
181    Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
182    Value * delCountBlock_ptr = getStreamSetBlockPtr(self, "deletionCounts", blockNo);
183    Value * i16UnitsGenerated = getScalarField(self, "unitsGenerated");  // units generated to buffer
184    Value * i16UnitsWritten = getScalarField(self, "unitsWritten");  // units written to stdout
185   
186    Value * i16StreamBase_ptr = iBuilder->CreateBitCast(getStreamSetBasePtr(self, "i16Stream"), PointerType::get(iBuilder->getInt16Ty(), 0));
187    Value * u16_output_ptr = iBuilder->CreateGEP(i16StreamBase_ptr, iBuilder->CreateSub(i16UnitsGenerated, i16UnitsWritten));
188
189   
190    Value * hi_input[8];
191    for (unsigned j = 0; j < 8; ++j) {
192        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
193    }
194    Value * hi_bytes[8];
195    p2s(iBuilder, hi_input, hi_bytes);
196   
197    Value * lo_input[8];
198    for (unsigned j = 0; j < 8; ++j) {
199        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
200    }
201    Value * lo_bytes[8];
202    p2s(iBuilder, lo_input, lo_bytes);
203   
204    unsigned UTF_16_units_per_register = iBuilder->getBitBlockWidth()/16;
205   
206    Value * unit_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
207   
208    Value * offset = ConstantInt::get(i32, 0);
209   
210    for (unsigned j = 0; j < 8; ++j) {
211        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
212        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
213        //iBuilder->CallPrintRegister("merge0", merge0);
214        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
215        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j)), i32);
216        //iBuilder->CallPrintInt("offset", offset);
217        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
218        //iBuilder->CallPrintRegister("merge1", merge1);
219        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j+1)), i32);
220        //iBuilder->CallPrintInt("offset", offset);
221    }
222   
223    i16UnitsGenerated = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
224    setScalarField(self, "unitsGenerated", i16UnitsGenerated);
225   
226    Value * unitsInBuffer = iBuilder->CreateSub(i16UnitsGenerated, i16UnitsWritten);
227    Value * lessThanABlockRemaining = iBuilder->CreateICmpUGT(unitsInBuffer, ConstantInt::get(iBuilder->getSizeTy(), (getStreamSetBufferSize(self, "i16Stream") - 1) * iBuilder->getBitBlockWidth()));
228    BasicBlock * flushStmts = BasicBlock::Create(iBuilder->getContext(), "flush", doBlockFunction, 0);
229    BasicBlock * exitStmts = BasicBlock::Create(iBuilder->getContext(), "exit", doBlockFunction, 0);
230    iBuilder->CreateCondBr(lessThanABlockRemaining, flushStmts, exitStmts);
231   
232    iBuilder->SetInsertPoint(flushStmts);
233    iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), iBuilder->CreateBitCast(i16StreamBase_ptr, i8PtrTy), iBuilder->CreateAdd(unitsInBuffer, unitsInBuffer)}));
234    setScalarField(self, "unitsWritten", i16UnitsGenerated); // Everything generated has now been written.
235    iBuilder->CreateBr(exitStmts);
236   
237    iBuilder->SetInsertPoint(exitStmts);
238    iBuilder->CreateRetVoid();
239    iBuilder->restoreIP(savePoint);
240}
241
242void p2s_16Kernel_withCompressedOutput::generateFinalBlockMethod() {
243    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
244    Module * m = iBuilder->getModule();
245    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0); 
246    Type * i8PtrTy = iBuilder->getInt8PtrTy();
247    Function * writefn = create_write(m);
248    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
249    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
250    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
251    // Final Block arguments: self, remaining, then the standard DoBlock args.
252    Function::arg_iterator args = finalBlockFunction->arg_begin();
253    Value * self = &*(args++);
254    /* Skip "remaining" arg */ args++;
255    std::vector<Value *> doBlockArgs = {self};
256    while (args != finalBlockFunction->arg_end()){
257        doBlockArgs.push_back(&*args++);
258    }
259    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
260    Value * i16UnitsGenerated = getScalarField(self, "unitsGenerated");  // units generated to buffer
261    Value * i16UnitsWritten = getScalarField(self, "unitsWritten");  // units written to stdout
262    Value * unitsInBuffer = iBuilder->CreateSub(i16UnitsGenerated, i16UnitsWritten);
263    Value * mustFlush = iBuilder->CreateICmpUGT(unitsInBuffer, ConstantInt::get(iBuilder->getSizeTy(), 0));
264   
265    BasicBlock * flushStmts = BasicBlock::Create(iBuilder->getContext(), "flush", finalBlockFunction, 0);
266    BasicBlock * exitStmts = BasicBlock::Create(iBuilder->getContext(), "exit", finalBlockFunction, 0);
267    iBuilder->CreateCondBr(mustFlush, flushStmts, exitStmts);
268   
269    iBuilder->SetInsertPoint(flushStmts);
270    Value * i16StreamBase_ptr = iBuilder->CreateBitCast(getStreamSetBasePtr(self, "i16Stream"), PointerType::get(iBuilder->getInt16Ty(), 0));
271    iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->getInt32(1), iBuilder->CreateBitCast(i16StreamBase_ptr, i8PtrTy), iBuilder->CreateAdd(unitsInBuffer, unitsInBuffer)}));
272    setScalarField(self, "unitsWritten", i16UnitsGenerated); // Everything generated has now been written.
273    iBuilder->CreateBr(exitStmts);
274   
275    iBuilder->SetInsertPoint(exitStmts);
276    iBuilder->CreateRetVoid();
277    iBuilder->restoreIP(savePoint);
278}
279   
280   
281}
Note: See TracBrowser for help on using the repository browser.