source: icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp @ 5508

Last change on this file since 5508 was 5440, checked in by nmedfort, 2 years ago

Large refactoring step. Removed IR generation code from Kernel (formally KernelBuilder?) and moved it into the new KernelBuilder? class.

File size: 8.7 KB
RevLine 
[4987]1#include "p2s_kernel.h"
[5436]2//#include "llvm/IR/Constant.h"      // for Constant
3//#include "llvm/IR/Constants.h"     // for ConstantInt
4//#include "llvm/IR/DerivedTypes.h"  // for PointerType, VectorType
5//#include "llvm/IR/Function.h"      // for Function, Function::arg_iterator
6//#include <llvm/IR/Module.h>
[5267]7#include <kernels/streamset.h>
[5436]8#include <kernels/kernel_builder.h>
9
[5267]10namespace llvm { class Value; }
[4987]11
[5260]12using namespace llvm;
[5303]13using namespace parabix;
[5040]14
[4987]15namespace kernel{
16       
[5440]17void p2s_step(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
[4987]18    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
19    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
20    s1 = iBuilder->esimd_mergeh(8, t1, t0);
21    s0 = iBuilder->esimd_mergel(8, t1, t0);
22}
23
[5440]24inline void p2s(const std::unique_ptr<KernelBuilder> & iBuilder, Value * p[], Value * s[]) {
[4987]25    Value * bit00004444[2];
26    Value * bit22226666[2];
27    Value * bit11115555[2];
28    Value * bit33337777[2];
29    p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
30    p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
31    p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
32    p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
33    Value * bit00224466[4];
34    Value * bit11335577[4];
35    for (unsigned j = 0; j<2; j++) {
36        p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
37        p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
38    }
39    for (unsigned j = 0; j<4; j++) {
40        p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
41    }
42}
43               
[5440]44void P2SKernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
[5071]45    Value * p_bitblock[8];
46    for (unsigned i = 0; i < 8; i++) {
[5440]47        p_bitblock[i] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
[4987]48    }
[5071]49    Value * s_bytepack[8];
50    p2s(iBuilder, p_bitblock, s_bytepack);
[4987]51    for (unsigned j = 0; j < 8; ++j) {
[5440]52        iBuilder->storeOutputStreamPack("byteStream", iBuilder->getInt32(0), iBuilder->getInt32(j), s_bytepack[j]);
[4987]53    }
54}
[5260]55
[5440]56void P2SKernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
[5285]57    IntegerType * i32 = iBuilder->getInt32Ty();
58    PointerType * bitBlockPtrTy = PointerType::get(iBuilder->getBitBlockType(), 0);
[5260]59
[5246]60    Value * basisBits[8];
[5079]61    for (unsigned i = 0; i < 8; i++) {
[5440]62        basisBits[i] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(i));
[5079]63    }
[5246]64    Value * bytePack[8];
65    p2s(iBuilder, basisBits, bytePack);
[5260]66
[5079]67    unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
[5440]68    Value * delCountBlock_ptr = iBuilder->getInputStreamBlockPtr("deletionCounts", iBuilder->getInt32(0));
[5260]69    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
70
[5440]71    Value * output_ptr = iBuilder->getOutputStreamBlockPtr("byteStream", iBuilder->getInt32(0));
[5307]72    output_ptr = iBuilder->CreatePointerCast(output_ptr, iBuilder->getInt8PtrTy());
[5260]73    Value * offset = iBuilder->getInt32(0);
[5079]74    for (unsigned j = 0; j < 8; ++j) {
[5297]75        iBuilder->CreateStore(bytePack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy));
[5079]76        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(j)), i32);
77    }
[5307]78
[5440]79    Value * unitsGenerated = iBuilder->getProducedItemCount("byteStream"); // units generated to buffer
[5261]80    unitsGenerated = iBuilder->CreateAdd(unitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
[5440]81    iBuilder->setProducedItemCount("byteStream", unitsGenerated);
[5079]82}
[5261]83
[5440]84void P2S16Kernel::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
[5006]85    Value * hi_input[8];
86    for (unsigned j = 0; j < 8; ++j) {
[5440]87        hi_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
[5006]88    }
89    Value * hi_bytes[8];
[5285]90    p2s(iBuilder, hi_input, hi_bytes);   
[5006]91    Value * lo_input[8];
92    for (unsigned j = 0; j < 8; ++j) {
[5440]93        lo_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
[5006]94    }
95    Value * lo_bytes[8];
[5285]96    p2s(iBuilder, lo_input, lo_bytes);   
[5006]97    for (unsigned j = 0; j < 8; ++j) {
[5007]98        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
99        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
[5440]100        iBuilder->storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j), merge0);
101        iBuilder->storeOutputStreamPack("i16Stream", iBuilder->getInt32(0), iBuilder->getInt32(2 * j + 1), merge1);
[5006]102    }
[4987]103}
[5347]104       
[5440]105void P2S16KernelWithCompressedOutput::generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & iBuilder) {
[5285]106    IntegerType * i32Ty = iBuilder->getInt32Ty();
[5307]107    PointerType * int16PtrTy = iBuilder->getInt16Ty()->getPointerTo();
[5285]108    PointerType * bitBlockPtrTy = iBuilder->getBitBlockType()->getPointerTo();
[5307]109    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
[5297]110
[5009]111    Value * hi_input[8];
112    for (unsigned j = 0; j < 8; ++j) {
[5440]113        hi_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j));
[5009]114    }
115    Value * hi_bytes[8];
116    p2s(iBuilder, hi_input, hi_bytes);
[5297]117
[5009]118    Value * lo_input[8];
119    for (unsigned j = 0; j < 8; ++j) {
[5440]120        lo_input[j] = iBuilder->loadInputStreamBlock("basisBits", iBuilder->getInt32(j + 8));
[5009]121    }
122    Value * lo_bytes[8];
123    p2s(iBuilder, lo_input, lo_bytes);
[5297]124
[5440]125    Value * delCountBlock_ptr = iBuilder->getInputStreamBlockPtr("deletionCounts", iBuilder->getInt32(0));
[5260]126    Value * unit_counts = iBuilder->fwCast(iBuilder->getBitBlockWidth() / 16, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr));
[5297]127
[5307]128
[5440]129    Value * u16_output_ptr = iBuilder->getOutputStreamBlockPtr("i16Stream", iBuilder->getInt32(0));
[5307]130    u16_output_ptr = iBuilder->CreatePointerCast(u16_output_ptr, int16PtrTy);
[5440]131    Value * i16UnitsGenerated = iBuilder->getProducedItemCount("i16Stream"); // units generated to buffer
[5307]132    u16_output_ptr = iBuilder->CreateGEP(u16_output_ptr, iBuilder->CreateURem(i16UnitsGenerated, stride));
[5297]133
[5267]134    Value * offset = ConstantInt::get(i32Ty, 0);
[5297]135
[5009]136    for (unsigned j = 0; j < 8; ++j) {
137        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
[5071]138        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
[5267]139        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j)), i32Ty);
[5297]140
141        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
[5071]142        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
[5267]143        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2 * j + 1)), i32Ty);
[5307]144    }   
[5303]145    Value * i16UnitsFinal = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
[5440]146    iBuilder->setProducedItemCount("i16Stream", i16UnitsFinal);
[5006]147}
[5347]148
[5436]149P2SKernel::P2SKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
[5435]150: BlockOrientedKernel("p2s",
[5347]151              {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}},
152              {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream"}},
153              {}, {}, {}) {
154}
155
[5436]156P2SKernelWithCompressedOutput::P2SKernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
[5435]157: BlockOrientedKernel("p2s_compress",
[5347]158              {Binding{iBuilder->getStreamSetTy(8, 1), "basisBits"}, Binding{iBuilder->getStreamSetTy(1, 1), "deletionCounts"}},
159                      {Binding{iBuilder->getStreamSetTy(1, 8), "byteStream", MaxRatio(1)}},
160              {}, {}, {}) {
161}
162
[5436]163P2S16Kernel::P2S16Kernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder)
[5435]164: BlockOrientedKernel("p2s_16",
[5347]165              {Binding{iBuilder->getStreamSetTy(16, 1), "basisBits"}},
166              {Binding{iBuilder->getStreamSetTy(1, 16), "i16Stream"}},
167              {}, {}, {}) {
168}
169
170
[5436]171P2S16KernelWithCompressedOutput::P2S16KernelWithCompressedOutput(const std::unique_ptr<kernel::KernelBuilder> & b)
[5435]172: BlockOrientedKernel("p2s_16_compress",
[5307]173              {Binding{b->getStreamSetTy(16, 1), "basisBits"}, Binding{b->getStreamSetTy(1, 1), "deletionCounts"}},
[5328]174              {Binding{b->getStreamSetTy(1, 16), "i16Stream", MaxRatio(1)}},
[5283]175              {},
176              {},
[5325]177              {}) {
[5347]178
[5109]179}
[5261]180   
181   
182}
Note: See TracBrowser for help on using the repository browser.