source: icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp @ 5074

Last change on this file since 5074 was 5074, checked in by cameron, 3 years ago

Kernel infrastructure: move common logic into KernelBuilder? base class; demo linking in wc

File size: 8.1 KB
Line 
1#include "p2s_kernel.h"
2#include "kernels/kernel.h"
3#include "IDISA/idisa_builder.h"
4#include <llvm/IR/TypeBuilder.h>
5#include <llvm/IR/Type.h>
6#include <iostream>
7#include <stdint.h>
8#include <llvm/Support/FileSystem.h>
9#include <llvm/Support/raw_ostream.h>
10
11
12
13extern "C" {
14    void buffered_write(const char * ptr, size_t bytes) {
15        outs().write(ptr, bytes);
16    }
17};
18
19namespace kernel{
20       
21void p2s_step(IDISA::IDISA_Builder * iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
22    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
23    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
24    s1 = iBuilder->esimd_mergeh(8, t1, t0);
25    s0 = iBuilder->esimd_mergel(8, t1, t0);
26}
27
28inline void p2s(IDISA::IDISA_Builder * iBuilder, Value * p[], Value * s[]) {
29    Value * bit00004444[2];
30    Value * bit22226666[2];
31    Value * bit11115555[2];
32    Value * bit33337777[2];
33    p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
34    p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
35    p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
36    p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
37
38    Value * bit00224466[4];
39    Value * bit11335577[4];
40    for (unsigned j = 0; j<2; j++) {
41        p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
42        p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
43    }
44    for (unsigned j = 0; j<4; j++) {
45        p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
46    }
47}
48               
49void p2sKernel::generateDoBlockMethod() {
50    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
51    Module * m = iBuilder->getModule();
52   
53    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
54   
55    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
56   
57    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
58    Value * byteStreamBlock_ptr = getParameter(doBlockFunction, "byteStream"); // output
59    Value * p_bitblock[8];
60    for (unsigned i = 0; i < 8; i++) {
61        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
62    }
63    Value * s_bytepack[8];
64    p2s(iBuilder, p_bitblock, s_bytepack);
65    for (unsigned j = 0; j < 8; ++j) {
66        iBuilder->CreateBlockAlignedStore(s_bytepack[j], byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j)});
67    }
68    iBuilder->CreateRetVoid();
69    iBuilder->restoreIP(savePoint);
70}
71       
72void p2s_16Kernel::generateDoBlockMethod() {
73    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
74    Module * m = iBuilder->getModule();
75   
76    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
77   
78    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
79   
80    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
81    Value * i16StreamBlock_ptr = getParameter(doBlockFunction, "i16Stream"); // output
82   
83    Value * hi_input[8];
84    for (unsigned j = 0; j < 8; ++j) {
85        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
86    }
87    Value * hi_bytes[8];
88    p2s(iBuilder, hi_input, hi_bytes);
89   
90    Value * lo_input[8];
91    for (unsigned j = 0; j < 8; ++j) {
92        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
93    }
94    Value * lo_bytes[8];
95    p2s(iBuilder, lo_input, lo_bytes);
96   
97    for (unsigned j = 0; j < 8; ++j) {
98        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
99        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
100        iBuilder->CreateBlockAlignedStore(merge0, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(2*j)});
101        iBuilder->CreateBlockAlignedStore(merge1, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(2*j+1)});
102    }
103    iBuilder->CreateRetVoid();
104    iBuilder->restoreIP(savePoint);
105}
106       
107   
108Function * create_write(Module * const mod) {
109    Function * write = mod->getFunction("write");
110    if (write == nullptr) {
111        FunctionType *write_type =
112        TypeBuilder<long(int, char *, long), false>::get(mod->getContext());
113        write = cast<Function>(mod->getOrInsertFunction("write", write_type,
114                                                        AttributeSet().addAttribute(mod->getContext(), 2U, Attribute::NoAlias)));
115    }
116    return write;
117}
118
119const size_t OutputBufferSize=65536;
120
121void p2s_16Kernel_withCompressedOutputKernel::generateDoBlockMethod() {
122    outs().SetBufferSize(OutputBufferSize);
123    IDISA::IDISA_Builder::InsertPoint savePoint = iBuilder->saveIP();
124    Module * m = iBuilder->getModule();
125    Type * i8PtrTy = iBuilder->getInt8PtrTy(); 
126    Type * i64 = iBuilder->getIntNTy(64); 
127    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0); 
128   
129    Function * writefn = cast<Function>(m->getOrInsertFunction("buffered_write", iBuilder->getVoidTy(), i8PtrTy, i64, nullptr));
130
131    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
132   
133    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
134   
135    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");  // input
136    Value * delCountBlock_ptr = getParameter(doBlockFunction, "deletionCounts");
137    Value * i16StreamBlock_ptr = getParameter(doBlockFunction, "i16Stream"); // output
138
139    Value * hi_input[8];
140    for (unsigned j = 0; j < 8; ++j) {
141        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
142    }
143    Value * hi_bytes[8];
144    p2s(iBuilder, hi_input, hi_bytes);
145   
146    Value * lo_input[8];
147    for (unsigned j = 0; j < 8; ++j) {
148        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
149    }
150    Value * lo_bytes[8];
151    p2s(iBuilder, lo_input, lo_bytes);
152   
153    unsigned UTF_16_units_per_register = iBuilder->getBitBlockWidth()/16;
154   
155    Value * unit_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
156   
157    Value * u16_output_ptr = iBuilder->CreateBitCast(i16StreamBlock_ptr, PointerType::get(iBuilder->getInt16Ty(), 0));
158    Value * offset = ConstantInt::get(i64, 0);
159   
160    for (unsigned j = 0; j < 8; ++j) {
161        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
162        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
163        //iBuilder->CallPrintRegister("merge0", merge0);
164        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
165        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j)), i64);
166        //iBuilder->CallPrintInt("offset", offset);
167        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
168        //iBuilder->CallPrintRegister("merge1", merge1);
169        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j+1)), i64);
170        //iBuilder->CallPrintInt("offset", offset);
171    }
172    Value * byte_offset = iBuilder->CreateAdd(offset, offset);
173    iBuilder->CreateCall(writefn, std::vector<Value *>({iBuilder->CreateBitCast(i16StreamBlock_ptr, i8PtrTy), byte_offset}));
174    iBuilder->CreateRetVoid();
175    iBuilder->restoreIP(savePoint);
176}
177       
178}
Note: See TracBrowser for help on using the repository browser.