source: icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp @ 5292

Last change on this file since 5292 was 5292, checked in by nmedfort, 3 years ago

Removed 'function' and 'self' parameters from generateXXXMethod() functions.

File size: 9.0 KB
Line 
1#include "p2s_kernel.h"
2#include "IR_Gen/idisa_builder.h"  // for IDISA_Builder
3#include "llvm/IR/Constant.h"      // for Constant
4#include "llvm/IR/Constants.h"     // for ConstantInt
5#include "llvm/IR/DerivedTypes.h"  // for PointerType, VectorType
6#include "llvm/IR/Function.h"      // for Function, Function::arg_iterator
7#include <llvm/IR/Module.h>
8#include <kernels/streamset.h>
9namespace llvm { class Value; }
10
11using namespace llvm;
12
13namespace kernel{
14       
15void p2s_step(IDISA::IDISA_Builder * iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
16    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
17    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
18    s1 = iBuilder->esimd_mergeh(8, t1, t0);
19    s0 = iBuilder->esimd_mergel(8, t1, t0);
20}
21
22inline void p2s(IDISA::IDISA_Builder * iBuilder, Value * p[], Value * s[]) {
23    Value * bit00004444[2];
24    Value * bit22226666[2];
25    Value * bit11115555[2];
26    Value * bit33337777[2];
27    p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
28    p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
29    p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
30    p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
31    Value * bit00224466[4];
32    Value * bit11335577[4];
33    for (unsigned j = 0; j<2; j++) {
34        p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
35        p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
36    }
37    for (unsigned j = 0; j<4; j++) {
38        p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
39    }
40}
41               
42void P2SKernel::generateDoBlockMethod(llvm::Value * blockNo) {
43    Value * p_bitblock[8];
44    for (unsigned i = 0; i < 8; i++) {
45        Value * ptr = getStream("basisBits", blockNo, iBuilder->getInt32(i));
46        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(ptr);
47    }
48    Value * s_bytepack[8];
49    p2s(iBuilder, p_bitblock, s_bytepack);
50    for (unsigned j = 0; j < 8; ++j) {
51        Value * ptr = getStream("byteStream", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(j));
52        iBuilder->CreateBlockAlignedStore(s_bytepack[j], ptr);
53    }
54}
55
56P2SKernel::P2SKernel(IDISA::IDISA_Builder * iBuilder)
57: BlockOrientedKernel(iBuilder, "p2s",
58              {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}},
59              {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"}},
60              {}, {}, {}) {
61
62}
63   
64
65void P2SKernelWithCompressedOutput::generateDoBlockMethod(llvm::Value * blockNo) {
66    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
67    IntegerType * i32 = iBuilder->getInt32Ty();
68    PointerType * bitBlockPtrTy = PointerType::get(iBuilder->getBitBlockType(), 0);
69
70    Value * basisBits[8];
71    for (unsigned i = 0; i < 8; i++) {
72        Value * basisBitsBlock_ptr = getStream("basisBits", blockNo, iBuilder->getInt32(i));
73        basisBits[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr);
74    }
75    Value * bytePack[8];
76    p2s(iBuilder, basisBits, bytePack);
77
78    unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
79    Value * delCountBlock_ptr = getStream("deletionCounts", blockNo, iBuilder->getInt32(0));
80    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
81
82    Value * unitsGenerated = getProducedItemCount("byteStream"); // units generated to buffer
83    Value * output_ptr = getStreamView(i8PtrTy, "byteStream", blockNo, iBuilder->getInt32(0));
84    Value * offset = iBuilder->getInt32(0);
85    for (unsigned j = 0; j < 8; ++j) {
86        iBuilder->CreateAlignedStore(bytePack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy), 1);
87        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(j)), i32);
88    }
89    unitsGenerated = iBuilder->CreateAdd(unitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
90    setProducedItemCount("byteStream", unitsGenerated);
91}
92   
93P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder)
94: BlockOrientedKernel(iBuilder, "p2s_compress",
95              {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}, Binding{iBuilder->getStreamSetTy(1, 1), "deletionCounts"}},
96              {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"}},
97              {}, {}, {}) {
98    setDoBlockUpdatesProducedItemCountsAttribute(true);
99}
100   
101   
102
103void P2S16Kernel::generateDoBlockMethod(Value * blockNo) {
104    Value * hi_input[8];
105    for (unsigned j = 0; j < 8; ++j) {
106        Value * ptr = getStream("basisBits", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(j));
107        hi_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
108    }
109    Value * hi_bytes[8];
110    p2s(iBuilder, hi_input, hi_bytes);   
111    Value * lo_input[8];
112    for (unsigned j = 0; j < 8; ++j) {
113        Value * ptr = getStream("basisBits", blockNo, iBuilder->getInt32(0), iBuilder->getInt32(j + 8));
114        lo_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
115    }
116    Value * lo_bytes[8];
117    p2s(iBuilder, lo_input, lo_bytes);   
118    for (unsigned j = 0; j < 8; ++j) {
119        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
120        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
121        Value * ptr0 = getStream("i16Stream", blockNo, iBuilder->getInt32(2 * j));
122        iBuilder->CreateBlockAlignedStore(merge0, ptr0);
123        Value * ptr1 = getStream("i16Stream", blockNo, iBuilder->getInt32(2 * j + 1));
124        iBuilder->CreateBlockAlignedStore(merge1, ptr1);
125    }
126}
127   
128
129P2S16Kernel::P2S16Kernel(IDISA::IDISA_Builder * iBuilder)
130: BlockOrientedKernel(iBuilder, "p2s_16",
131              {Binding{iBuilder->getStreamSetTy(16, 1), "basisBits"}},
132              {Binding{iBuilder->getStreamSetTy(1, 16), "i16Stream"}},
133              {}, {}, {}) {
134
135}
136
137   
138void P2S16KernelWithCompressedOutput::generateDoBlockMethod(Value * blockNo) {
139    IntegerType * i32Ty = iBuilder->getInt32Ty();
140    PointerType * bitBlockPtrTy = iBuilder->getBitBlockType()->getPointerTo();
141    Value * hi_input[8];
142    for (unsigned j = 0; j < 8; ++j) {
143        Value * ptr = getStream("basisBits", blockNo, iBuilder->getInt32(j));
144        hi_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
145    }
146    Value * hi_bytes[8];
147    p2s(iBuilder, hi_input, hi_bytes);
148    Value * lo_input[8];
149    for (unsigned j = 0; j < 8; ++j) {
150        Value * ptr = getStream("basisBits", blockNo, iBuilder->getInt32(j + 8));
151        lo_input[j] = iBuilder->CreateBlockAlignedLoad(ptr);
152    }
153    Value * lo_bytes[8];
154    p2s(iBuilder, lo_input, lo_bytes);
155    Value * delCountBlock_ptr = getStream("deletionCounts", blockNo, iBuilder->getInt32(0));
156    Value * unit_counts = iBuilder->fwCast(iBuilder->getBitBlockWidth() / 16, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
157    PointerType * int16PtrTy = PointerType::get(iBuilder->getInt16Ty(), 0);
158    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
159    Value * i16UnitsGenerated = getProducedItemCount("i16Stream"); // units generated to buffer
160    Value * i16BlockNo = iBuilder->CreateUDiv(i16UnitsGenerated, stride);
161    Value * u16_output_ptr = getStreamView(int16PtrTy, "i16Stream", i16BlockNo, iBuilder->CreateURem(i16UnitsGenerated, stride));
162    Value * offset = ConstantInt::get(i32Ty, 0);
163    for (unsigned j = 0; j < 8; ++j) {
164        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
165        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
166        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
167        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j)), i32Ty);
168        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
169        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j + 1)), i32Ty);
170    }
171    i16UnitsGenerated = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
172    setProducedItemCount("i16Stream", i16UnitsGenerated);
173}
174   
175P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder)
176: BlockOrientedKernel(iBuilder, "p2s_16_compress",
177              {Binding{iBuilder->getStreamSetTy(16, 1), "basisBits"}, Binding{iBuilder->getStreamSetTy(1, 1), "deletionCounts"}},
178              {Binding{iBuilder->getStreamSetTy(1, 16), "i16Stream"}},
179              {},
180              {},
181              {Binding{iBuilder->getSizeTy(), "unitsGenerated"}, Binding{iBuilder->getSizeTy(), "unitsWritten"}}) {
182    setDoBlockUpdatesProducedItemCountsAttribute(true);
183}
184   
185   
186}
Note: See TracBrowser for help on using the repository browser.