source: icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp @ 5440

Last change on this file since 5440 was 5440, checked in by nmedfort, 23 months ago

Large refactoring step. Removed IR generation code from Kernel (formally KernelBuilder?) and moved it into the new KernelBuilder? class.

File size: 6.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "s2p_kernel.h"
7#include <kernels/kernel_builder.h>
8#include <llvm/Support/raw_ostream.h>
9
10using namespace llvm;
11
12namespace kernel {
13
14const int PACK_LANES = 1;
15
16void s2p_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * s0, Value * s1, Value * hi_mask, unsigned shift, Value * &p0, Value * &p1) {
17    Value * t0 = nullptr;
18    Value * t1 = nullptr;
19    if ((iBuilder->getBitBlockWidth() == 256) && (PACK_LANES == 2)) {
20        Value * x0 = iBuilder->esimd_mergel(128, s0, s1);
21        Value * x1 = iBuilder->esimd_mergeh(128, s0, s1);
22        t0 = iBuilder->hsimd_packh_in_lanes(PACK_LANES, 16, x0, x1);
23        t1 = iBuilder->hsimd_packl_in_lanes(PACK_LANES, 16, x0, x1);
24    } else {
25        t0 = iBuilder->hsimd_packh(16, s0, s1);
26        t1 = iBuilder->hsimd_packl(16, s0, s1);
27    }
28    p0 = iBuilder->simd_if(1, hi_mask, t0, iBuilder->simd_srli(16, t1, shift));
29    p1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, t0, shift), t1);
30}
31
32void s2p(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[]) {
33    Value * bit00224466[4];
34    Value * bit11335577[4];
35
36    for (unsigned i = 0; i < 4; i++) {
37        Value * s0 = input[2 * i];
38        Value * s1 = input[2 * i + 1];
39        s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit00224466[i], bit11335577[i]);
40    }
41    Value * bit00004444[2];
42    Value * bit22226666[2];
43    Value * bit11115555[2];
44    Value * bit33337777[2];
45    for (unsigned j = 0; j<2; j++) {
46        s2p_step(iBuilder, bit00224466[2*j], bit00224466[2*j+1],
47                 iBuilder->simd_himask(4), 2, bit00004444[j], bit22226666[j]);
48        s2p_step(iBuilder, bit11335577[2*j], bit11335577[2*j+1],
49                 iBuilder->simd_himask(4), 2, bit11115555[j], bit33337777[j]);
50    }
51    s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, output[0], output[4]);
52    s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, output[1], output[5]);
53    s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, output[2], output[6]);
54    s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, output[3], output[7]);
55}
56
57/* Alternative transposition model, but small field width packs are problematic. */
58#if 0
59void s2p_ideal(const std::unique_ptr<KernelBuilder> & iBuilder, Value * input[], Value * output[]) {
60    Value * hi_nybble[4];
61    Value * lo_nybble[4];
62    for (unsigned i = 0; i<4; i++) {
63        Value * s0 = input[2*i];
64        Value * s1 = input[2*i+1];
65        hi_nybble[i] = iBuilder->hsimd_packh(8, s0, s1);
66        lo_nybble[i] = iBuilder->hsimd_packl(8, s0, s1);
67    }
68    Value * pair01[2];
69    Value * pair23[2];
70    Value * pair45[2];
71    Value * pair67[2];
72    for (unsigned i = 0; i<2; i++) {
73        pair01[i] = iBuilder->hsimd_packh(4, hi_nybble[2*i], hi_nybble[2*i+1]);
74        pair23[i] = iBuilder->hsimd_packl(4, hi_nybble[2*i], hi_nybble[2*i+1]);
75        pair45[i] = iBuilder->hsimd_packh(4, lo_nybble[2*i], lo_nybble[2*i+1]);
76        pair67[i] = iBuilder->hsimd_packl(4, lo_nybble[2*i], lo_nybble[2*i+1]);
77    }
78    output[0] = iBuilder->hsimd_packh(2, pair01[0], pair01[1]);
79    output[1] = iBuilder->hsimd_packl(2, pair01[0], pair01[1]);
80    output[2] = iBuilder->hsimd_packh(2, pair23[0], pair23[1]);
81    output[3] = iBuilder->hsimd_packl(2, pair23[0], pair23[1]);
82    output[4] = iBuilder->hsimd_packh(2, pair45[0], pair45[1]);
83    output[5] = iBuilder->hsimd_packl(2, pair45[0], pair45[1]);
84    output[6] = iBuilder->hsimd_packh(2, pair67[0], pair67[1]);
85    output[7] = iBuilder->hsimd_packl(2, pair67[0], pair67[1]);
86}
87#endif
88   
89#if 0
90void generateS2P_16Kernel(const std::unique_ptr<KernelBuilder> & iBuilder, Kernel * kBuilder) {
91    kBuilder->addInputStream(16, "unit_pack");
92    for(unsigned i = 0; i < 16; i++) {
93            kBuilder->addOutputStream(1);
94    }
95    Value * ptr = kBuilder->getInputStream(0);
96
97    Value * lo[8];
98    Value * hi[8];
99    for (unsigned i = 0; i < 8; i++) {
100        Value * s0 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
101        Value * s1 = iBuilder->CreateBlockAlignedLoad(ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
102        lo[i] = iBuilder->hsimd_packl(16, s0, s1);
103        hi[i] = iBuilder->hsimd_packh(16, s0, s1);
104    }
105
106    Value * output[16];
107    s2p(iBuilder, lo, output);
108    s2p(iBuilder, hi, output + 8);
109    for (unsigned j = 0; j < 16; j++) {
110        iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
111    }
112}   
113#endif
114   
115void S2PKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
116    Value * bytepack[8];
117    for (unsigned i = 0; i < 8; i++) {
118        if (mAligned) {
119            bytepack[i] = iBuilder->loadInputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
120        } else {
121            Value * ptr = iBuilder->getInputStreamPackPtr("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(i));
122            // CreateLoad defaults to aligned here, so we need to force the alignment to 1 byte.
123            bytepack[i] = iBuilder->CreateAlignedLoad(ptr, 1);
124        }
125    }
126    Value * basisbits[8];
127    s2p(iBuilder, bytepack, basisbits);
128    for (unsigned i = 0; i < 8; ++i) {
129        iBuilder->storeOutputStreamBlock("basisBits", iBuilder->getInt32(i), basisbits[i]);
130    }
131}
132
133void S2PKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder, Value * remainingBytes) {
134    /* Prepare the s2p final block function:
135     assumption: if remaining bytes is greater than 0, it is safe to read a full block of bytes.
136     if remaining bytes is zero, no read should be performed (e.g. for mmapped buffer).
137     */
138   
139    BasicBlock * finalPartialBlock = iBuilder->CreateBasicBlock("partial");
140    BasicBlock * finalEmptyBlock = iBuilder->CreateBasicBlock("empty");
141    BasicBlock * exitBlock = iBuilder->CreateBasicBlock("exit");
142   
143    Value * emptyBlockCond = iBuilder->CreateICmpEQ(remainingBytes, iBuilder->getSize(0));
144    iBuilder->CreateCondBr(emptyBlockCond, finalEmptyBlock, finalPartialBlock);
145    iBuilder->SetInsertPoint(finalPartialBlock);
146    CreateDoBlockMethodCall(iBuilder);
147   
148    iBuilder->CreateBr(exitBlock);
149   
150    iBuilder->SetInsertPoint(finalEmptyBlock);
151
152    for (unsigned i = 0; i < 8; ++i) {
153        iBuilder->storeOutputStreamBlock("basisBits", iBuilder->getInt32(i), Constant::getNullValue(iBuilder->getBitBlockType()));
154    }
155
156    iBuilder->CreateBr(exitBlock);
157   
158    iBuilder->SetInsertPoint(exitBlock);
159}
160
161S2PKernel::S2PKernel(const std::unique_ptr<KernelBuilder> & b, bool aligned)
162: BlockOrientedKernel(aligned ? "s2p" : "s2p_unaligned",
163    {Binding{b->getStreamSetTy(1, 8), "byteStream"}}, {Binding{b->getStreamSetTy(8, 1), "basisBits"}}, {}, {}, {}),
164  mAligned(aligned) {
165    setNoTerminateAttribute(true);
166}
167
168}
Note: See TracBrowser for help on using the repository browser.