source: icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp @ 5261

Last change on this file since 5261 was 5261, checked in by cameron, 9 months ago

Move responsibility for ProducedItemCount? into doSegment unless overridden

File size: 8.1 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "s2p_kernel.h"
7#include <IR_Gen/idisa_builder.h>  // for IDISA_Builder
8#include <llvm/IR/Constant.h>      // for Constant
9#include <llvm/IR/Module.h>
10namespace llvm { class BasicBlock; }
11namespace llvm { class Function; }
12namespace llvm { class Value; }
13
14using namespace llvm;
15
16namespace kernel {
17
18const int PACK_LANES = 1;
19
20void s2p_step(IDISA::IDISA_Builder * iBuilder, Value * s0, Value * s1, Value * hi_mask, unsigned shift, Value * &p0, Value * &p1) {
21    Value * t0 = nullptr;
22    Value * t1 = nullptr;
23    if ((iBuilder->getBitBlockWidth() == 256) && (PACK_LANES == 2)) {
24        Value * x0 = iBuilder->esimd_mergel(128, s0, s1);
25        Value * x1 = iBuilder->esimd_mergeh(128, s0, s1);
26        t0 = iBuilder->hsimd_packh_in_lanes(PACK_LANES, 16, x0, x1);
27        t1 = iBuilder->hsimd_packl_in_lanes(PACK_LANES, 16, x0, x1);
28    }
29    else {
30        t0 = iBuilder->hsimd_packh(16, s0, s1);
31        t1 = iBuilder->hsimd_packl(16, s0, s1);
32    }
33    p0 = iBuilder->simd_if(1, hi_mask, t0, iBuilder->simd_srli(16, t1, shift));
34    p1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, t0, shift), t1);
35}
36
37void s2p(IDISA::IDISA_Builder * iBuilder, Value * input[], Value * output[]) {
38    Value * bit00224466[4];
39    Value * bit11335577[4];
40
41    for (unsigned i = 0; i < 4; i++) {
42        Value * s0 = input[2 * i];
43        Value * s1 = input[2 * i + 1];
44        s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit00224466[i], bit11335577[i]);
45    }
46    Value * bit00004444[2];
47    Value * bit22226666[2];
48    Value * bit11115555[2];
49    Value * bit33337777[2];
50    for (unsigned j = 0; j<2; j++) {
51        s2p_step(iBuilder, bit00224466[2*j], bit00224466[2*j+1],
52                 iBuilder->simd_himask(4), 2, bit00004444[j], bit22226666[j]);
53        s2p_step(iBuilder, bit11335577[2*j], bit11335577[2*j+1],
54                 iBuilder->simd_himask(4), 2, bit11115555[j], bit33337777[j]);
55    }
56    s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, output[0], output[4]);
57    s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, output[1], output[5]);
58    s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, output[2], output[6]);
59    s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, output[3], output[7]);
60}
61
62/* Alternative transposition model, but small field width packs are problematic. */
63#if 0
64void s2p_ideal(IDISA::IDISA_Builder * iBuilder, Value * input[], Value * output[]) {
65    Value * hi_nybble[4];
66    Value * lo_nybble[4];
67    for (unsigned i = 0; i<4; i++) {
68        Value * s0 = input[2*i];
69        Value * s1 = input[2*i+1];
70        hi_nybble[i] = iBuilder->hsimd_packh(8, s0, s1);
71        lo_nybble[i] = iBuilder->hsimd_packl(8, s0, s1);
72    }
73    Value * pair01[2];
74    Value * pair23[2];
75    Value * pair45[2];
76    Value * pair67[2];
77    for (unsigned i = 0; i<2; i++) {
78        pair01[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
79        pair23[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
80        pair45[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
81        pair67[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
82    }
83    output[0] = iBuilder->hsimd_packh(2, pair01[0], pair01[1]);
84    output[1] = iBuilder->hsimd_packl(2, pair01[0], pair01[1]);
85    output[2] = iBuilder->hsimd_packh(2, pair23[0], pair23[1]);
86    output[3] = iBuilder->hsimd_packl(2, pair23[0], pair23[1]);
87    output[4] = iBuilder->hsimd_packh(2, pair45[0], pair45[1]);
88    output[5] = iBuilder->hsimd_packl(2, pair45[0], pair45[1]);
89    output[6] = iBuilder->hsimd_packh(2, pair67[0], pair67[1]);
90    output[7] = iBuilder->hsimd_packl(2, pair67[0], pair67[1]);
91}
92#endif
93   
94   
95#if 0
96
97
98void generateS2P_16Kernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
99    kBuilder->addInputStream(16, "unit_pack");
100    for(unsigned i = 0; i < 16; i++) {
101            kBuilder->addOutputStream(1);
102    }
103    kBuilder->prepareFunction();
104
105    Value * ptr = kBuilder->getInputStream(0);
106
107    Value * lo[8];
108    Value * hi[8];
109    for (unsigned i = 0; i < 8; i++) {
110        Value * s0 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
111        Value * s1 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
112        lo[i] = iBuilder->hsimd_packl(16, s0, s1);
113        hi[i] = iBuilder->hsimd_packh(16, s0, s1);
114    }
115
116    Value * output[16];
117    s2p(iBuilder, lo, output);
118    s2p(iBuilder, hi, output + 8);
119    for (unsigned j = 0; j < 16; j++) {
120        iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
121    }
122    kBuilder->finalize();
123}
124   
125#endif
126   
127void S2PKernel::generateFinalBlockMethod() const {
128    /* Prepare the s2p final block function:
129     assumption: if remaining bytes is greater than 0, it is safe to read a full block of bytes.
130     if remaining bytes is zero, no read should be performed (e.g. for mmapped buffer).
131     */
132    auto savePoint = iBuilder->saveIP();
133    Module * m = iBuilder->getModule();
134    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
135    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
136    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
137   
138    Value * self = getParameter(finalBlockFunction, "self");
139    Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
140   
141    BasicBlock * finalPartialBlock = BasicBlock::Create(iBuilder->getContext(), "partial", finalBlockFunction, 0);
142    BasicBlock * finalEmptyBlock = BasicBlock::Create(iBuilder->getContext(), "empty", finalBlockFunction, 0);
143    BasicBlock * exitBlock = BasicBlock::Create(iBuilder->getContext(), "exit", finalBlockFunction, 0);
144   
145    Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0));
146    iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
147    iBuilder->SetInsertPoint(finalPartialBlock);
148    iBuilder->CreateCall(doBlockFunction, {self});
149   
150    iBuilder->CreateBr(exitBlock);
151   
152    iBuilder->SetInsertPoint(finalEmptyBlock);
153    Value * blockNo = getScalarField(self, blockNoScalar);
154    Value * basisBitsPtr = getStreamView(self, "basisBits", blockNo, iBuilder->getInt64(0));
155    iBuilder->CreateStore(Constant::getNullValue(basisBitsPtr->getType()->getPointerElementType()), basisBitsPtr);
156    iBuilder->CreateBr(exitBlock);
157   
158    iBuilder->SetInsertPoint(exitBlock);
159    iBuilder->CreateRetVoid();
160    iBuilder->restoreIP(savePoint);
161}
162   
163void S2PKernel::generateDoBlockLogic(Value * self, Value * blockNo) const {
164    Value * bytepack[8];
165    for (unsigned i = 0; i < 8; i++) {
166        Value * byteStream = getStream(self, "byteStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(i));
167        bytepack[i] = iBuilder->CreateBlockAlignedLoad(byteStream);
168    }
169    Value * basisbits[8];
170    s2p(iBuilder, bytepack, basisbits);
171    for (unsigned i = 0; i < 8; ++i) {
172        Value * basisBits = getStream(self, "basisBits", blockNo, iBuilder->getInt32(i));
173        iBuilder->CreateBlockAlignedStore(basisbits[i], basisBits);
174    }
175}
176   
177void S2PKernel::generateDoBlockMethod() const {
178    auto savePoint = iBuilder->saveIP();
179
180    Function * doBlockFunction = iBuilder->getModule()->getFunction(mKernelName + doBlock_suffix);
181   
182    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction));
183   
184    Value * self = getParameter(doBlockFunction, "self");
185    Value * blockNo = getScalarField(self, blockNoScalar);
186   
187    generateDoBlockLogic(self, blockNo);
188
189    iBuilder->CreateRetVoid();
190    iBuilder->restoreIP(savePoint);
191}
192
193S2PKernel::S2PKernel(IDISA::IDISA_Builder * builder)
194: KernelBuilder(builder, "s2p", {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {Binding{builder->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {}) {
195    setNoTerminateAttribute(true);
196    setDoBlockUpdatesProducedItemCountsAttribute(false);
197
198}
199
200}
Note: See TracBrowser for help on using the repository browser.