source: icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp @ 5353

Last change on this file since 5353 was 5350, checked in by nmedfort, 3 years ago

First attempt at inlining all DoBlock? and FinalBlock? functions by using indirect jumps. Disabled for NVPTX until Linda can check whether they're supported by the LLVM NVPTX library.

File size: 6.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "s2p_kernel.h"
7#include <IR_Gen/idisa_builder.h>  // for IDISA_Builder
8#include <llvm/IR/Constant.h>      // for Constant
9#include <llvm/IR/Module.h>
10#include <llvm/Support/raw_ostream.h>
11namespace llvm { class BasicBlock; }
12namespace llvm { class Function; }
13namespace llvm { class Value; }
14
15using namespace llvm;
16
17namespace kernel {
18
19const int PACK_LANES = 1;
20
21void s2p_step(IDISA::IDISA_Builder * iBuilder, Value * s0, Value * s1, Value * hi_mask, unsigned shift, Value * &p0, Value * &p1) {
22    Value * t0 = nullptr;
23    Value * t1 = nullptr;
24    if ((iBuilder->getBitBlockWidth() == 256) && (PACK_LANES == 2)) {
25        Value * x0 = iBuilder->esimd_mergel(128, s0, s1);
26        Value * x1 = iBuilder->esimd_mergeh(128, s0, s1);
27        t0 = iBuilder->hsimd_packh_in_lanes(PACK_LANES, 16, x0, x1);
28        t1 = iBuilder->hsimd_packl_in_lanes(PACK_LANES, 16, x0, x1);
29    }
30    else {
31        t0 = iBuilder->hsimd_packh(16, s0, s1);
32        t1 = iBuilder->hsimd_packl(16, s0, s1);
33    }
34    p0 = iBuilder->simd_if(1, hi_mask, t0, iBuilder->simd_srli(16, t1, shift));
35    p1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, t0, shift), t1);
36}
37
38void s2p(IDISA::IDISA_Builder * iBuilder, Value * input[], Value * output[]) {
39    Value * bit00224466[4];
40    Value * bit11335577[4];
41
42    for (unsigned i = 0; i < 4; i++) {
43        Value * s0 = input[2 * i];
44        Value * s1 = input[2 * i + 1];
45        s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit00224466[i], bit11335577[i]);
46    }
47    Value * bit00004444[2];
48    Value * bit22226666[2];
49    Value * bit11115555[2];
50    Value * bit33337777[2];
51    for (unsigned j = 0; j<2; j++) {
52        s2p_step(iBuilder, bit00224466[2*j], bit00224466[2*j+1],
53                 iBuilder->simd_himask(4), 2, bit00004444[j], bit22226666[j]);
54        s2p_step(iBuilder, bit11335577[2*j], bit11335577[2*j+1],
55                 iBuilder->simd_himask(4), 2, bit11115555[j], bit33337777[j]);
56    }
57    s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, output[0], output[4]);
58    s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, output[1], output[5]);
59    s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, output[2], output[6]);
60    s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, output[3], output[7]);
61}
62
63/* Alternative transposition model, but small field width packs are problematic. */
64#if 0
65void s2p_ideal(IDISA::IDISA_Builder * iBuilder, Value * input[], Value * output[]) {
66    Value * hi_nybble[4];
67    Value * lo_nybble[4];
68    for (unsigned i = 0; i<4; i++) {
69        Value * s0 = input[2*i];
70        Value * s1 = input[2*i+1];
71        hi_nybble[i] = iBuilder->hsimd_packh(8, s0, s1);
72        lo_nybble[i] = iBuilder->hsimd_packl(8, s0, s1);
73    }
74    Value * pair01[2];
75    Value * pair23[2];
76    Value * pair45[2];
77    Value * pair67[2];
78    for (unsigned i = 0; i<2; i++) {
79        pair01[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
80        pair23[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
81        pair45[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
82        pair67[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
83    }
84    output[0] = iBuilder->hsimd_packh(2, pair01[0], pair01[1]);
85    output[1] = iBuilder->hsimd_packl(2, pair01[0], pair01[1]);
86    output[2] = iBuilder->hsimd_packh(2, pair23[0], pair23[1]);
87    output[3] = iBuilder->hsimd_packl(2, pair23[0], pair23[1]);
88    output[4] = iBuilder->hsimd_packh(2, pair45[0], pair45[1]);
89    output[5] = iBuilder->hsimd_packl(2, pair45[0], pair45[1]);
90    output[6] = iBuilder->hsimd_packh(2, pair67[0], pair67[1]);
91    output[7] = iBuilder->hsimd_packl(2, pair67[0], pair67[1]);
92}
93#endif
94   
95#if 0
96void generateS2P_16Kernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
97    kBuilder->addInputStream(16, "unit_pack");
98    for(unsigned i = 0; i < 16; i++) {
99            kBuilder->addOutputStream(1);
100    }
101    kBuilder->prepareFunction();
102
103    Value * ptr = kBuilder->getInputStream(0);
104
105    Value * lo[8];
106    Value * hi[8];
107    for (unsigned i = 0; i < 8; i++) {
108        Value * s0 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
109        Value * s1 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
110        lo[i] = iBuilder->hsimd_packl(16, s0, s1);
111        hi[i] = iBuilder->hsimd_packh(16, s0, s1);
112    }
113
114    Value * output[16];
115    s2p(iBuilder, lo, output);
116    s2p(iBuilder, hi, output + 8);
117    for (unsigned j = 0; j < 16; j++) {
118        iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
119    }
120    kBuilder->finalize();
121}   
122#endif
123   
124void S2PKernel::generateDoBlockMethod() {
125    Value * bytepack[8];
126    for (unsigned i = 0; i < 8; i++) {
127        bytepack[i] = loadInputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
128    }
129    Value * basisbits[8];
130    s2p(iBuilder, bytepack, basisbits);
131    for (unsigned i = 0; i < 8; ++i) {
132        storeOutputStreamBlock("basisBits", iBuilder->getInt32(i), basisbits[i]);
133    }
134}
135
136void S2PKernel::generateFinalBlockMethod(Value * remainingBytes) {
137    /* Prepare the s2p final block function:
138     assumption: if remaining bytes is greater than 0, it is safe to read a full block of bytes.
139     if remaining bytes is zero, no read should be performed (e.g. for mmapped buffer).
140     */
141   
142    BasicBlock * finalPartialBlock = CreateBasicBlock("partial");
143    BasicBlock * finalEmptyBlock = CreateBasicBlock("empty");
144    BasicBlock * exitBlock = CreateBasicBlock("exit");
145   
146    Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0));
147    iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
148    iBuilder->SetInsertPoint(finalPartialBlock);
149    CreateDoBlockMethodCall();
150   
151    iBuilder->CreateBr(exitBlock);
152   
153    iBuilder->SetInsertPoint(finalEmptyBlock);
154
155    for (unsigned i = 0; i < 8; ++i) {
156        storeOutputStreamBlock("basisBits", iBuilder->getInt32(i), Constant::getNullValue(iBuilder->getBitBlockType()));
157    }
158
159    iBuilder->CreateBr(exitBlock);
160   
161    iBuilder->SetInsertPoint(exitBlock);
162}
163
164S2PKernel::S2PKernel(IDISA::IDISA_Builder * builder)
165: BlockOrientedKernel(builder, "s2p", {Binding{builder->getStreamSetTy(1, 8), "byteStream"}}, {Binding{builder->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {}) {
166    setNoTerminateAttribute(true);
167    setDoBlockUpdatesProducedItemCountsAttribute(false);
168}
169
170}
Note: See TracBrowser for help on using the repository browser.