source: icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp @ 5051

Last change on this file since 5051 was 5051, checked in by cameron, 3 years ago

s2p kernel with new infrastructure, includes s2p_FinalBlock

File size: 8.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5#include "s2p_kernel.h"
6#include <kernels/kernel.h>
7#include <IDISA/idisa_builder.h>
8
9namespace kernel {
10
11const int PACK_LANES = 1;
12
13void s2p_step(IDISA::IDISA_Builder * iBuilder, Value * s0, Value * s1, Value * hi_mask, unsigned shift, Value * &p0, Value * &p1) {
14    Value * t0 = nullptr;
15    Value * t1 = nullptr;
16    if ((iBuilder->getBitBlockWidth() == 256) && (PACK_LANES == 2)) {
17        Value * x0 = iBuilder->esimd_mergel(128, s0, s1);
18        Value * x1 = iBuilder->esimd_mergeh(128, s0, s1);
19        t0 = iBuilder->hsimd_packh_in_lanes(PACK_LANES, 16, x0, x1);
20        t1 = iBuilder->hsimd_packl_in_lanes(PACK_LANES, 16, x0, x1);
21    }
22    else {
23        t0 = iBuilder->hsimd_packh(16, s0, s1);
24        t1 = iBuilder->hsimd_packl(16, s0, s1);
25    }
26    p0 = iBuilder->simd_if(1, hi_mask, t0, iBuilder->simd_srli(16, t1, shift));
27    p1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, t0, shift), t1);
28}
29
30void s2p(IDISA::IDISA_Builder * iBuilder, Value * input[], Value * output[]) {
31    Value * bit00224466[4];
32    Value * bit11335577[4];
33
34    for (unsigned i = 0; i < 4; i++) {
35        Value * s0 = input[2 * i];
36        Value * s1 = input[2 * i + 1];
37        s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit00224466[i], bit11335577[i]);
38    }
39    Value * bit00004444[2];
40    Value * bit22226666[2];
41    Value * bit11115555[2];
42    Value * bit33337777[2];
43    for (unsigned j = 0; j<2; j++) {
44        s2p_step(iBuilder, bit00224466[2*j], bit00224466[2*j+1],
45                 iBuilder->simd_himask(4), 2, bit00004444[j], bit22226666[j]);
46        s2p_step(iBuilder, bit11335577[2*j], bit11335577[2*j+1],
47                 iBuilder->simd_himask(4), 2, bit11115555[j], bit33337777[j]);
48    }
49    s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, output[0], output[4]);
50    s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, output[1], output[5]);
51    s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, output[2], output[6]);
52    s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, output[3], output[7]);
53}
54
55void s2p(IDISA::IDISA_Builder * iBuilder, Value * input, Value * output[]) {
56    Value * bit[8];
57    for (unsigned i = 0; i < 8; i++) {
58        bit[i] = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
59    }
60    s2p(iBuilder, bit, output);
61}
62
63void generateS2PKernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
64    kBuilder->addInputStream(8, "byte_pack");
65    for(unsigned i = 0; i < 8; ++i) {
66        kBuilder->addOutputStream(1);
67    }
68    kBuilder->prepareFunction();
69    Value * output[8];
70
71    Value * ptr = kBuilder->getInputStream(0);
72    //iBuilder->CallPrintInt("ptr", iBuilder->CreatePtrToInt(ptr, iBuilder->getInt64Ty()));
73    s2p(iBuilder, ptr, output);
74    for (unsigned j = 0; j < 8; ++j) {
75        //iBuilder->CallPrintRegister("bit" + std::to_string(j + 1), output[j]);
76        iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
77    }
78    kBuilder->finalize();
79}
80
81void generateS2P_16Kernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
82    kBuilder->addInputStream(16, "unit_pack");
83    for(unsigned i = 0; i < 16; i++) {
84            kBuilder->addOutputStream(1);
85    }
86    kBuilder->prepareFunction();
87
88    Value * ptr = kBuilder->getInputStream(0);
89
90    Value * lo[8];
91    Value * hi[8];
92    for (unsigned i = 0; i < 8; i++) {
93        Value * s0 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
94        Value * s1 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
95        lo[i] = iBuilder->hsimd_packl(16, s0, s1);
96        hi[i] = iBuilder->hsimd_packh(16, s0, s1);
97    }
98
99    Value * output[16];
100    s2p(iBuilder, lo, output);
101    s2p(iBuilder, hi, output + 8);
102    for (unsigned j = 0; j < 16; j++) {
103        iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
104    }
105    kBuilder->finalize();
106}
107       
108void generateS2P_idealKernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
109    kBuilder->addInputStream(8, "byte_pack");
110    for(unsigned i = 0; i < 8; ++i) {
111        kBuilder->addOutputStream(1);
112    }
113    kBuilder->prepareFunction();
114    Value * input = kBuilder->getInputStream(0);
115    Value * output[8];
116    Value * hi_nybble[4];
117    Value * lo_nybble[4];
118    for (unsigned i = 0; i<4; i++) {
119        Value * s0 = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
120        Value * s1 = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
121        hi_nybble[i] = iBuilder->hsimd_packh(8, s0, s1);
122        lo_nybble[i] = iBuilder->hsimd_packl(8, s0, s1);
123    }
124    Value * pair01[2];
125    Value * pair23[2];
126    Value * pair45[2];
127    Value * pair67[2];
128    for (unsigned i = 0; i<2; i++) {
129        pair01[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
130        pair23[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
131        pair45[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
132        pair67[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
133    }
134    output[0] = iBuilder->hsimd_packh(2, pair01[0], pair01[1]);
135    output[1] = iBuilder->hsimd_packl(2, pair01[0], pair01[1]);
136    output[2] = iBuilder->hsimd_packh(2, pair23[0], pair23[1]);
137    output[3] = iBuilder->hsimd_packl(2, pair23[0], pair23[1]);
138    output[4] = iBuilder->hsimd_packh(2, pair45[0], pair45[1]);
139    output[5] = iBuilder->hsimd_packl(2, pair45[0], pair45[1]);
140    output[6] = iBuilder->hsimd_packh(2, pair67[0], pair67[1]);
141    output[7] = iBuilder->hsimd_packl(2, pair67[0], pair67[1]);
142
143    s2p(iBuilder, kBuilder->getInputStream(0), output);
144    for (unsigned j = 0; j < 8; ++j) {
145        iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
146    }
147    kBuilder->finalize();
148}
149   
150std::unique_ptr<llvm::Module> s2pKernel::createKernelModule() {
151    std::unique_ptr<llvm::Module> theModule = KernelInterface::createKernelModule();
152   
153    /***********************
154     WARNING iBuilder has a different module than theModule at this point.
155    ***********************/
156    Function * doBlockFunction = theModule.get()->getFunction(mKernelName + "_DoBlock");
157   
158    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
159   
160    Value * byteStreamBlock_ptr = getParameter(doBlockFunction, "byteStream");
161    Value * basisBitsBlock_ptr = getParameter(doBlockFunction, "basisBits");
162    Value * s_bytepack[8];
163    for (unsigned i = 0; i < 8; i++) {
164        s_bytepack[i] = iBuilder->CreateBlockAlignedLoad(byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(i)});
165    }
166    Value * p_bitblock[8];
167    s2p(iBuilder, s_bytepack, p_bitblock);
168    for (unsigned j = 0; j < 8; ++j) {
169        iBuilder->CreateBlockAlignedStore(p_bitblock[j], basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
170    }
171    iBuilder->CreateRetVoid();
172
173    /* Now the prepare the s2p final block function:
174     assumption: if remaining bytes is greater than 0, it is safe to read a full block of bytes.
175     if remaining bytes is zero, no read should be performed (e.g. for mmapped buffer).
176     */
177    Function * finalBlockFunction = theModule.get()->getFunction(mKernelName + "_FinalBlock");
178    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
179
180    Value * self = getParameter(finalBlockFunction, "self");
181    Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
182    byteStreamBlock_ptr = getParameter(finalBlockFunction, "byteStream");
183    basisBitsBlock_ptr = getParameter(finalBlockFunction, "basisBits");
184   
185    BasicBlock * finalPartialBlock = BasicBlock::Create(iBuilder->getContext(), "partial", finalBlockFunction, 0);
186    BasicBlock * finalEmptyBlock = BasicBlock::Create(iBuilder->getContext(), "empty", finalBlockFunction, 0);
187    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exit", finalBlockFunction, 0);
188   
189    Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, ConstantInt::get(iBuilder->getInt64Ty(), 0));
190    iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
191    iBuilder->SetInsertPoint(finalPartialBlock);
192    iBuilder->CreateCall(doBlockFunction, {self, byteStreamBlock_ptr, basisBitsBlock_ptr});
193   
194    iBuilder->CreateBr(exitBlock);
195   
196    iBuilder->SetInsertPoint(finalEmptyBlock);
197    iBuilder->CreateStore(Constant::getNullValue(basisBitsBlock_ptr->getType()->getPointerElementType()), basisBitsBlock_ptr);
198    iBuilder->CreateBr(exitBlock);
199   
200    iBuilder->SetInsertPoint(exitBlock);
201    iBuilder->CreateRetVoid();
202
203    return theModule;
204}
205
206   
207}
Note: See TracBrowser for help on using the repository browser.