source: icGREP/icgrep-devel/icgrep/kernels/s2p_kernel.cpp @ 4959

Last change on this file since 4959 was 4959, checked in by nmedfort, 4 years ago

Initial modifications to Pablo Compiler and Kernel Builder to support circular buffers for Lookahead.

File size: 3.0 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5#include "s2p_kernel.h"
6#include <kernels/kernel.h>
7#include <IDISA/idisa_builder.h>
8
9const int PACK_LANES = 1;
10
11void s2p_step(IDISA::IDISA_Builder * iBuilder, Value * s0, Value * s1, Value * hi_mask, unsigned shift, Value * &p0, Value * &p1) {
12    Value * t0 = nullptr;
13    Value * t1 = nullptr;
14    if ((iBuilder->getBitBlockWidth() == 256) && (PACK_LANES == 2)) {
15        Value * x0 = iBuilder->esimd_mergel(128, s0, s1);
16        Value * x1 = iBuilder->esimd_mergeh(128, s0, s1);
17        t0 = iBuilder->hsimd_packh_in_lanes(PACK_LANES, 16, x0, x1);
18        t1 = iBuilder->hsimd_packl_in_lanes(PACK_LANES, 16, x0, x1);
19    }
20    else {
21        t0 = iBuilder->hsimd_packh(16, s0, s1);
22        t1 = iBuilder->hsimd_packl(16, s0, s1);
23    }
24    p0 = iBuilder->simd_if(1, hi_mask, t0, iBuilder->simd_srli(16, t1, shift));
25    p1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, t0, shift), t1);
26}
27
28inline void s2p(IDISA::IDISA_Builder * iBuilder, Value * input, Value * output[]) {
29    Value * bit00224466[4];
30    Value * bit11335577[4];
31    for (unsigned i = 0; i<4; i++) {
32        Value * s0 = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i)});
33        Value * s1 = iBuilder->CreateBlockAlignedLoad(input, {iBuilder->getInt32(0), iBuilder->getInt32(2 * i + 1)});
34        s2p_step(iBuilder, s0, s1, iBuilder->simd_himask(2), 1, bit00224466[i], bit11335577[i]);
35    }
36    Value * bit00004444[2];
37    Value * bit22226666[2];
38    Value * bit11115555[2];
39    Value * bit33337777[2];
40    for (unsigned j = 0; j<2; j++) {
41        s2p_step(iBuilder, bit00224466[2*j], bit00224466[2*j+1],
42                 iBuilder->simd_himask(4), 2, bit00004444[j], bit22226666[j]);
43        s2p_step(iBuilder, bit11335577[2*j], bit11335577[2*j+1],
44                 iBuilder->simd_himask(4), 2, bit11115555[j], bit33337777[j]);
45    }
46    s2p_step(iBuilder, bit00004444[0], bit00004444[1], iBuilder->simd_himask(8), 4, output[0], output[4]);
47    s2p_step(iBuilder, bit11115555[0], bit11115555[1], iBuilder->simd_himask(8), 4, output[1], output[5]);
48    s2p_step(iBuilder, bit22226666[0], bit22226666[1], iBuilder->simd_himask(8), 4, output[2], output[6]);
49    s2p_step(iBuilder, bit33337777[0], bit33337777[1], iBuilder->simd_himask(8), 4, output[3], output[7]);
50}
51
52void generateS2PKernel(Module *, IDISA::IDISA_Builder * iBuilder, KernelBuilder * kBuilder) {
53    kBuilder->addInputStream(8, "byte_pack");
54    for(unsigned i = 0; i < 8; ++i) {
55        kBuilder->addOutputStream(1);
56    }
57    kBuilder->prepareFunction();
58    for(unsigned i = 0; i < kBuilder->getSegmentBlocks(); ++i){
59        Value * output[8];
60        s2p(iBuilder, kBuilder->getInputStream(0), output);
61        for (unsigned j = 0; j < 8; ++j) {
62            iBuilder->CreateBlockAlignedStore(output[j], kBuilder->getOutputStream(j));
63        }
64        kBuilder->increment();
65    }
66    kBuilder->finalize();
67}
68
69
70
71
72
73
74
75
76
77
78
79
Note: See TracBrowser for help on using the repository browser.