source: icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp @ 5706

Last change on this file since 5706 was 5706, checked in by nmedfort, 21 months ago

First stage of MultiBlockKernel? and pipeline restructuring

File size: 8.4 KB
Line 
1#include "p2s_kernel.h"
2#include <kernels/streamset.h>
3#include <kernels/kernel_builder.h>
4
5namespace llvm { class Value; }
6
7using namespace llvm;
8using namespace parabix;
9
10namespace kernel{
11       
12void p2s_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
13    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
14    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
15    s1 = iBuilder->esimd_mergeh(8, t1, t0);
16    s0 = iBuilder->esimd_mergel(8, t1, t0);
17}
18
19inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[]) {
20    Value * bit00004444[2];
21    Value * bit22226666[2];
22    Value * bit11115555[2];
23    Value * bit33337777[2];
24    p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
25    p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
26    p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
27    p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
28    Value * bit00224466[4];
29    Value * bit11335577[4];
30    for (unsigned j = 0; j<2; j++) {
31        p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
32        p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
33    }
34    for (unsigned j = 0; j<4; j++) {
35        p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
36    }
37}
38               
39void P2SKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
40    Value * p_bitblock[8];
41    for (unsigned i = 0; i < 8; i++) {
42        p_bitblock[i] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
43    }
44    Value * s_bytepack[8];
45    p2s(iBuilder, p_bitblock, s_bytepack);
46    for (unsigned j = 0; j < 8; ++j) {
47        iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), s_bytepack[j]);
48    }
49}
50
51void P2SKernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
52    IntegerType * i32 = iBuilder->getInt32Ty();
53    PointerType * bitBlockPtrTy = PointerType::get(iBuilder->getBitBlockType(), 0);
54
55    Value * basisBits[8];
56    for (unsigned i = 0; i < 8; i++) {
57        basisBits[i] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
58    }
59    Value * bytePack[8];
60    p2s(iBuilder, basisBits, bytePack);
61
62    unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
63    Value * delCountBlock_ptr = iBuilder->getInputStreamBlockPtr("deletionCounts", iBuilder->getInt32(0));
64    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
65
66    Value * output_ptr = iBuilder->getOutputStreamBlockPtr("byteStream", iBuilder->getInt32(0));
67    output_ptr = iBuilder->CreatePointerCast(output_ptr, iBuilder->getInt8PtrTy());
68    Value * offset = iBuilder->getInt32(0);
69    for (unsigned j = 0; j < 8; ++j) {
70        iBuilder->CreateStore(bytePack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy));
71        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(j)), i32);
72    }
73
74    Value * unitsGenerated = iBuilder->getProducedItemCount("byteStream"); // units generated to buffer
75    unitsGenerated = iBuilder->CreateAdd(unitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
76    iBuilder->setProducedItemCount("byteStream", unitsGenerated);
77}
78
79void P2S16Kernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
80    Value * hi_input[8];
81    for (unsigned j = 0; j < 8; ++j) {
82        hi_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
83    }
84    Value * hi_bytes[8];
85    p2s(iBuilder, hi_input, hi_bytes);   
86    Value * lo_input[8];
87    for (unsigned j = 0; j < 8; ++j) {
88        lo_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
89    }
90    Value * lo_bytes[8];
91    p2s(iBuilder, lo_input, lo_bytes);   
92    for (unsigned j = 0; j < 8; ++j) {
93        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
94        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
95        iBuilder->storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j), merge0);
96        iBuilder->storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j + 1), merge1);
97    }
98}
99       
100void P2S16KernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
101    IntegerType * i32Ty = iBuilder->getInt32Ty();
102    PointerType * int16PtrTy = iBuilder->getInt16Ty()->getPointerTo();
103    PointerType * bitBlockPtrTy = iBuilder->getBitBlockType()->getPointerTo();
104    ConstantInt * blockMask = iBuilder->getSize(iBuilder->getBitBlockWidth() - 1);
105
106    Value * hi_input[8];
107    for (unsigned j = 0; j < 8; ++j) {
108        hi_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
109    }
110    Value * hi_bytes[8];
111    p2s(iBuilder, hi_input, hi_bytes);
112
113    Value * lo_input[8];
114    for (unsigned j = 0; j < 8; ++j) {
115        lo_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
116    }
117    Value * lo_bytes[8];
118    p2s(iBuilder, lo_input, lo_bytes);
119
120    Value * delCount = iBuilder->loadInputStreamBlock("deletionCounts", iBuilder->getInt32(0));
121    Value * unitCounts = iBuilder->fwCast(iBuilder->getBitBlockWidth() / 16, delCount);
122    Value * outputPtr = iBuilder->getOutputStreamBlockPtr("i16Stream", iBuilder->getInt32(0));
123    outputPtr = iBuilder->CreatePointerCast(outputPtr, int16PtrTy);
124    Value * i16UnitsGenerated = iBuilder->getProducedItemCount("i16Stream"); // units generated to buffer
125    outputPtr = iBuilder->CreateGEP(outputPtr, iBuilder->CreateAnd(i16UnitsGenerated, blockMask));
126
127    Value * offset = ConstantInt::get(i32Ty, 0);
128
129    for (unsigned j = 0; j < 8; ++j) {
130        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
131        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(outputPtr, offset), bitBlockPtrTy), 1);
132        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unitCounts, iBuilder->getInt32(2 * j)), i32Ty);
133
134        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
135        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(outputPtr, offset), bitBlockPtrTy), 1);
136        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unitCounts, iBuilder->getInt32(2 * j + 1)), i32Ty);
137    }
138
139    Value * i16UnitsFinal = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
140    iBuilder->setProducedItemCount("i16Stream", i16UnitsFinal);
141}
142
143P2SKernel::P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
144: BlockOrientedKernel("p2s",
145              {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}},
146              {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"}},
147              {}, {}, {}) {
148}
149
150P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
151: BlockOrientedKernel("p2s_compress",
152              {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}, Binding{iBuilder->getStreamSetTy(1, 1), "deletionCounts"}},
153              {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", BoundedRate(0, 1)}},
154              {}, {}, {}) {
155}
156
157P2S16Kernel::P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
158: BlockOrientedKernel("p2s_16",
159              {Binding{iBuilder->getStreamSetTy(16, 1), "basisBits"}},
160              {Binding{iBuilder->getStreamSetTy(1, 16), "i16Stream"}},
161              {}, {}, {}) {
162}
163
164
165P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b)
166: BlockOrientedKernel("p2s_16_compress",
167              {Binding{b->getStreamSetTy(16, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "deletionCounts"}},
168              {Binding{b->getStreamSetTy(1, 16), "i16Stream", BoundedRate(0, 1)}},
169              {},
170              {},
171              {}) {
172
173}
174   
175   
176}
Note: See TracBrowser for help on using the repository browser.