source: icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp @ 5317

Last change on this file since 5317 was 5317, checked in by cameron, 3 years ago

Simplify stream set access; better naming of access functions

File size: 9.4 KB
Line 
1#include "p2s_kernel.h"
2#include "IR_Gen/idisa_builder.h"  // for IDISA_Builder
3#include "llvm/IR/Constant.h"      // for Constant
4#include "llvm/IR/Constants.h"     // for ConstantInt
5#include "llvm/IR/DerivedTypes.h"  // for PointerType, VectorType
6#include "llvm/IR/Function.h"      // for Function, Function::arg_iterator
7#include <llvm/IR/Module.h>
8#include <kernels/streamset.h>
9namespace llvm { class Value; }
10
11using namespace llvm;
12using namespace parabix;
13
14namespace kernel{
15       
16void p2s_step(IDISA::IDISA_Builder * iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
17    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
18    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
19    s1 = iBuilder->esimd_mergeh(8, t1, t0);
20    s0 = iBuilder->esimd_mergel(8, t1, t0);
21}
22
23inline void p2s(IDISA::IDISA_Builder * iBuilder, Value * p[], Value * s[]) {
24    Value * bit00004444[2];
25    Value * bit22226666[2];
26    Value * bit11115555[2];
27    Value * bit33337777[2];
28    p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
29    p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
30    p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
31    p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
32    Value * bit00224466[4];
33    Value * bit11335577[4];
34    for (unsigned j = 0; j<2; j++) {
35        p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
36        p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
37    }
38    for (unsigned j = 0; j<4; j++) {
39        p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
40    }
41}
42               
43void P2SKernel::generateDoBlockMethod() {
44    Value * p_bitblock[8];
45    for (unsigned i = 0; i < 8; i++) {
46        p_bitblock[i] = loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
47    }
48    Value * s_bytepack[8];
49    p2s(iBuilder, p_bitblock, s_bytepack);
50    for (unsigned j = 0; j < 8; ++j) {
51        storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), s_bytepack[j]);
52    }
53}
54
55P2SKernel::P2SKernel(IDISA::IDISA_Builder * iBuilder)
56: BlockOrientedKernel(iBuilder, "p2s",
57              {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}},
58              {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"}},
59              {}, {}, {}) {
60
61}
62
63
64void P2SKernelWithCompressedOutput::generateDoBlockMethod() {
65    IntegerType * i32 = iBuilder->getInt32Ty();
66    PointerType * bitBlockPtrTy = PointerType::get(iBuilder->getBitBlockType(), 0);
67
68    Value * basisBits[8];
69    for (unsigned i = 0; i < 8; i++) {
70        basisBits[i] = loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
71    }
72    Value * bytePack[8];
73    p2s(iBuilder, basisBits, bytePack);
74
75    unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
76    Value * delCountBlock_ptr = getInputStreamBlockPtr("deletionCounts", iBuilder->getInt32(0));
77    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
78
79    Value * output_ptr = getOutputStreamBlockPtr("byteStream", iBuilder->getInt32(0));
80    output_ptr = iBuilder->CreatePointerCast(output_ptr, iBuilder->getInt8PtrTy());
81    Value * offset = iBuilder->getInt32(0);
82    for (unsigned j = 0; j < 8; ++j) {
83        iBuilder->CreateStore(bytePack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy));
84        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(j)), i32);
85    }
86
87    Value * unitsGenerated = getProducedItemCount("byteStream"); // units generated to buffer
88    unitsGenerated = iBuilder->CreateAdd(unitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
89    setProducedItemCount("byteStream", unitsGenerated);
90}
91   
92P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(IDISA::IDISA_Builder * iBuilder)
93: BlockOrientedKernel(iBuilder, "p2s_compress",
94              {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}, Binding{iBuilder->getStreamSetTy(1, 1), "deletionCounts"}},
95              {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"}},
96              {}, {}, {}) {
97    setDoBlockUpdatesProducedItemCountsAttribute(true);
98}
99   
100   
101
102void P2S16Kernel::generateDoBlockMethod() {
103    Value * hi_input[8];
104    for (unsigned j = 0; j < 8; ++j) {
105        hi_input[j] = loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
106    }
107    Value * hi_bytes[8];
108    p2s(iBuilder, hi_input, hi_bytes);   
109    Value * lo_input[8];
110    for (unsigned j = 0; j < 8; ++j) {
111        lo_input[j] = loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
112    }
113    Value * lo_bytes[8];
114    p2s(iBuilder, lo_input, lo_bytes);   
115    for (unsigned j = 0; j < 8; ++j) {
116        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
117        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
118        storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j), merge0);
119        storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j + 1), merge1);
120    }
121}
122   
123
124P2S16Kernel::P2S16Kernel(IDISA::IDISA_Builder * iBuilder)
125: BlockOrientedKernel(iBuilder, "p2s_16",
126              {Binding{iBuilder->getStreamSetTy(16, 1), "basisBits"}},
127              {Binding{iBuilder->getStreamSetTy(1, 16), "i16Stream"}},
128              {}, {}, {}) {
129
130}
131
132   
133void P2S16KernelWithCompressedOutput::generateDoBlockMethod() {
134    IntegerType * i32Ty = iBuilder->getInt32Ty();
135    PointerType * int16PtrTy = iBuilder->getInt16Ty()->getPointerTo();
136    PointerType * bitBlockPtrTy = iBuilder->getBitBlockType()->getPointerTo();
137    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
138
139    Value * hi_input[8];
140    for (unsigned j = 0; j < 8; ++j) {
141        hi_input[j] = loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
142    }
143    Value * hi_bytes[8];
144    p2s(iBuilder, hi_input, hi_bytes);
145
146    Value * lo_input[8];
147    for (unsigned j = 0; j < 8; ++j) {
148        lo_input[j] = loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
149    }
150    Value * lo_bytes[8];
151    p2s(iBuilder, lo_input, lo_bytes);
152
153    Value * delCountBlock_ptr = getInputStreamBlockPtr("deletionCounts", iBuilder->getInt32(0));
154    Value * unit_counts = iBuilder->fwCast(iBuilder->getBitBlockWidth() / 16, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
155
156
157    Value * u16_output_ptr = getOutputStreamBlockPtr("i16Stream", iBuilder->getInt32(0));
158    u16_output_ptr = iBuilder->CreatePointerCast(u16_output_ptr, int16PtrTy);
159    Value * i16UnitsGenerated = getProducedItemCount("i16Stream"); // units generated to buffer
160    u16_output_ptr = iBuilder->CreateGEP(u16_output_ptr, iBuilder->CreateURem(i16UnitsGenerated, stride));
161
162    Value * offset = ConstantInt::get(i32Ty, 0);
163
164    for (unsigned j = 0; j < 8; ++j) {
165        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
166        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
167        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j)), i32Ty);
168
169        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
170        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
171        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j + 1)), i32Ty);
172    }   
173    Value * i16UnitsFinal = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
174    setProducedItemCount("i16Stream", i16UnitsFinal);
175    const auto b  = getOutputStreamSetBuffer("i16Stream");
176
177    if (auto cb = dyn_cast<CircularCopybackBuffer>(b)) {
178        BasicBlock * copyBack = CreateBasicBlock("copyBack");
179        BasicBlock * p2sCompressDone = CreateBasicBlock("p2sCompressDone");
180       
181        // Check for overflow into the buffer overflow area and copy data back if so.
182        Value * accessible = cb->getLinearlyAccessibleItems(i16UnitsGenerated);
183        offset = iBuilder->CreateZExt(offset, iBuilder->getSizeTy());
184        Value * wraparound = iBuilder->CreateICmpULT(accessible, offset);
185        iBuilder->CreateCondBr(wraparound, copyBack, p2sCompressDone);
186       
187        iBuilder->SetInsertPoint(copyBack);
188        Value * copyItems = iBuilder->CreateSub(offset, accessible);
189        cb->createCopyBack(getStreamSetBufferPtr("i16Stream"), copyItems);
190        iBuilder->CreateBr(p2sCompressDone);
191        iBuilder->SetInsertPoint(p2sCompressDone);
192    }
193}
194   
195P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(IDISA::IDISA_Builder * b)
196: BlockOrientedKernel(b, "p2s_16_compress",
197              {Binding{b->getStreamSetTy(16, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "deletionCounts"}},
198              {Binding{b->getStreamSetTy(1, 16), "i16Stream", b->getStride()}},
199              {},
200              {},
201              {Binding{b->getSizeTy(), "unitsGenerated"}, Binding{b->getSizeTy(), "unitsWritten"}}) {
202    setDoBlockUpdatesProducedItemCountsAttribute(true);
203}
204   
205   
206}
Note: See TracBrowser for help on using the repository browser.