source: icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp @ 5238

Last change on this file since 5238 was 5238, checked in by cameron, 2 years ago

IR_Gen subdirectory for all IR generation utility functions

File size: 10.8 KB
RevLine 
[4987]1#include "p2s_kernel.h"
2#include "kernels/kernel.h"
[5238]3#include "IR_Gen/idisa_builder.h"
[5009]4#include <llvm/IR/Type.h>
5#include <iostream>
[5040]6#include <stdint.h>
7#include <llvm/Support/FileSystem.h>
8#include <llvm/Support/raw_ostream.h>
[4987]9
[5040]10
11
[4987]12namespace kernel{
13       
14void p2s_step(IDISA::IDISA_Builder * iBuilder, Value * p0, Value * p1, Value * hi_mask, unsigned shift, Value * &s1, Value * &s0) {
15    Value * t0 = iBuilder->simd_if(1, hi_mask, p0, iBuilder->simd_srli(16, p1, shift));
16    Value * t1 = iBuilder->simd_if(1, hi_mask, iBuilder->simd_slli(16, p0, shift), p1);
17    s1 = iBuilder->esimd_mergeh(8, t1, t0);
18    s0 = iBuilder->esimd_mergel(8, t1, t0);
19}
20
[4988]21inline void p2s(IDISA::IDISA_Builder * iBuilder, Value * p[], Value * s[]) {
[4987]22    Value * bit00004444[2];
23    Value * bit22226666[2];
24    Value * bit11115555[2];
25    Value * bit33337777[2];
26    p2s_step(iBuilder, p[0], p[4], iBuilder->simd_himask(8), 4, bit00004444[1], bit00004444[0]);
27    p2s_step(iBuilder, p[1], p[5], iBuilder->simd_himask(8), 4, bit11115555[1], bit11115555[0]);
28    p2s_step(iBuilder, p[2], p[6], iBuilder->simd_himask(8), 4, bit22226666[1], bit22226666[0]);
29    p2s_step(iBuilder, p[3], p[7], iBuilder->simd_himask(8), 4, bit33337777[1], bit33337777[0]);
30
31    Value * bit00224466[4];
32    Value * bit11335577[4];
33    for (unsigned j = 0; j<2; j++) {
34        p2s_step(iBuilder, bit00004444[j], bit22226666[j],iBuilder->simd_himask(4), 2, bit00224466[2*j+1], bit00224466[2*j]);
35        p2s_step(iBuilder, bit11115555[j], bit33337777[j],iBuilder->simd_himask(4), 2, bit11335577[2*j+1], bit11335577[2*j]);
36    }
37    for (unsigned j = 0; j<4; j++) {
38        p2s_step(iBuilder, bit00224466[j], bit11335577[j], iBuilder->simd_himask(2), 1, s[2*j+1], s[2*j]);
39    }
40}
41               
[5074]42void p2sKernel::generateDoBlockMethod() {
[5202]43    auto savePoint = iBuilder->saveIP();
[5071]44    Module * m = iBuilder->getModule();
[5074]45   
[5071]46    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
47   
48    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
49   
[5097]50    Value * self = getParameter(doBlockFunction, "self");
51    Value * blockNo = getScalarField(self, blockNoScalar);
[5104]52    Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
53    Value * byteStreamBlock_ptr = getStreamSetBlockPtr(self, "byteStream", blockNo);
[5097]54
[5071]55    Value * p_bitblock[8];
56    for (unsigned i = 0; i < 8; i++) {
57        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
[4987]58    }
[5071]59    Value * s_bytepack[8];
60    p2s(iBuilder, p_bitblock, s_bytepack);
[4987]61    for (unsigned j = 0; j < 8; ++j) {
[5071]62        iBuilder->CreateBlockAlignedStore(s_bytepack[j], byteStreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j)});
[4987]63    }
[5071]64    iBuilder->CreateRetVoid();
65    iBuilder->restoreIP(savePoint);
[4987]66}
[5071]67       
[5079]68void p2sKernel_withCompressedOutput::generateDoBlockMethod() {
[5202]69    auto savePoint = iBuilder->saveIP();
[5079]70    Module * m = iBuilder->getModule();
71    Type * i8PtrTy = iBuilder->getInt8PtrTy(); 
72    Type * i32 = iBuilder->getIntNTy(32); 
73    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0); 
74   
75    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
76   
77    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
[5097]78    Value * self = getParameter(doBlockFunction, "self");
79    Value * blockNo = getScalarField(self, blockNoScalar);
[5104]80    Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
81    Value * delCountBlock_ptr = getStreamSetBlockPtr(self, "deletionCounts", blockNo);
82    Value * byteStreamBlock_ptr = getStreamSetBlockPtr(self, "byteStream", blockNo);
83   
[5079]84    Value * p_bitblock[8];
85    for (unsigned i = 0; i < 8; i++) {
86        p_bitblock[i] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(i)});
87    }
88    Value * s_bytepack[8];
89    p2s(iBuilder, p_bitblock, s_bytepack);
90   
91    unsigned units_per_register = iBuilder->getBitBlockWidth()/8;
92   
93    Value * unit_counts = iBuilder->fwCast(units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
94   
95    Value * output_ptr = iBuilder->CreateBitCast(byteStreamBlock_ptr, i8PtrTy);
96    Value * offset = ConstantInt::get(i32, 0);
97   
98    for (unsigned j = 0; j < 8; ++j) {
99        iBuilder->CreateAlignedStore(s_bytepack[j], iBuilder->CreateBitCast(iBuilder->CreateGEP(output_ptr, offset), bitBlockPtrTy), 1);
100        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(j)), i32);
101    }
[5111]102    iBuilder->CreateRetVoid();
[5079]103    iBuilder->restoreIP(savePoint);
104}
[5217]105
[5074]106void p2s_16Kernel::generateDoBlockMethod() {
[5202]107    auto savePoint = iBuilder->saveIP();
[5071]108    Module * m = iBuilder->getModule();
[5074]109   
[5071]110    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
111   
112    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
[5097]113    Value * self = getParameter(doBlockFunction, "self");
114    Value * blockNo = getScalarField(self, blockNoScalar);
[5104]115    Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
116    Value * i16StreamBlock_ptr = getStreamSetBlockPtr(self, "i16Stream", blockNo);
[5071]117   
[5006]118    Value * hi_input[8];
119    for (unsigned j = 0; j < 8; ++j) {
[5229]120        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j)});
[5006]121    }
122    Value * hi_bytes[8];
123    p2s(iBuilder, hi_input, hi_bytes);
124   
125    Value * lo_input[8];
126    for (unsigned j = 0; j < 8; ++j) {
[5229]127        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
[5006]128    }
129    Value * lo_bytes[8];
130    p2s(iBuilder, lo_input, lo_bytes);
131   
132    for (unsigned j = 0; j < 8; ++j) {
[5007]133        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
134        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
[5217]135        // iBuilder->getInt32(0),
136        iBuilder->CreateBlockAlignedStore(merge0, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2*j)});
137        iBuilder->CreateBlockAlignedStore(merge1, i16StreamBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(2*j+1)});
[5006]138    }
[5071]139    iBuilder->CreateRetVoid();
140    iBuilder->restoreIP(savePoint);
[4987]141}
[5079]142
143void p2s_16Kernel_withCompressedOutput::generateDoBlockMethod() {
[5202]144    auto savePoint = iBuilder->saveIP();
[5071]145    Module * m = iBuilder->getModule();
[5217]146    Type * i32 = iBuilder->getIntNTy(32);
147    Type * bitBlockPtrTy = llvm::PointerType::get(iBuilder->getBitBlockType(), 0);
[5109]148
[5071]149    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
[5217]150
[5071]151    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
[5185]152    Constant * stride = ConstantInt::get(iBuilder->getSizeTy(), iBuilder->getStride());
153
[5097]154    Value * self = getParameter(doBlockFunction, "self");
155    Value * blockNo = getScalarField(self, blockNoScalar);
[5104]156    Value * basisBitsBlock_ptr = getStreamSetBlockPtr(self, "basisBits", blockNo);
157    Value * delCountBlock_ptr = getStreamSetBlockPtr(self, "deletionCounts", blockNo);
[5185]158    Value * i16UnitsGenerated = getProducedItemCount(self); // units generated to buffer
159    Value * i16BlockNo = iBuilder->CreateUDiv(i16UnitsGenerated, stride);
[5217]160
[5185]161    Value * i16StreamBase_ptr = iBuilder->CreateBitCast(getStreamSetBlockPtr(self, "i16Stream", i16BlockNo), PointerType::get(iBuilder->getInt16Ty(), 0));
[5217]162
[5185]163    Value * u16_output_ptr = iBuilder->CreateGEP(i16StreamBase_ptr, iBuilder->CreateURem(i16UnitsGenerated, stride));
[5109]164
[5217]165
[5009]166    Value * hi_input[8];
167    for (unsigned j = 0; j < 8; ++j) {
[5071]168        hi_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
[5009]169    }
170    Value * hi_bytes[8];
171    p2s(iBuilder, hi_input, hi_bytes);
[5217]172
[5009]173    Value * lo_input[8];
174    for (unsigned j = 0; j < 8; ++j) {
[5071]175        lo_input[j] = iBuilder->CreateBlockAlignedLoad(basisBitsBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(j+8)});
[5009]176    }
177    Value * lo_bytes[8];
178    p2s(iBuilder, lo_input, lo_bytes);
[5217]179
180    const auto UTF_16_units_per_register = iBuilder->getBitBlockWidth() / 16;
181
[5071]182    Value * unit_counts = iBuilder->fwCast(UTF_16_units_per_register, iBuilder->CreateBlockAlignedLoad(delCountBlock_ptr, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
[5217]183
[5076]184    Value * offset = ConstantInt::get(i32, 0);
[5217]185
[5009]186    for (unsigned j = 0; j < 8; ++j) {
187        Value * merge0 = iBuilder->bitCast(iBuilder->esimd_mergel(8, hi_bytes[j], lo_bytes[j]));
188        Value * merge1 = iBuilder->bitCast(iBuilder->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
[5071]189        iBuilder->CreateAlignedStore(merge0, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
[5076]190        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j)), i32);
[5071]191        iBuilder->CreateAlignedStore(merge1, iBuilder->CreateBitCast(iBuilder->CreateGEP(u16_output_ptr, offset), bitBlockPtrTy), 1);
[5076]192        offset = iBuilder->CreateZExt(iBuilder->CreateExtractElement(unit_counts, iBuilder->getInt32(2*j+1)), i32);
[5009]193    }
[5217]194
[5109]195    i16UnitsGenerated = iBuilder->CreateAdd(i16UnitsGenerated, iBuilder->CreateZExt(offset, iBuilder->getSizeTy()));
[5185]196    setProducedItemCount(self, i16UnitsGenerated);
[5109]197    iBuilder->CreateRetVoid();
[5071]198    iBuilder->restoreIP(savePoint);
[5006]199}
[5109]200
201void p2s_16Kernel_withCompressedOutput::generateFinalBlockMethod() {
[5202]202    auto savePoint = iBuilder->saveIP();
[5109]203    Module * m = iBuilder->getModule();
204    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
205    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
206    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "fb_entry", finalBlockFunction, 0));
207    // Final Block arguments: self, remaining, then the standard DoBlock args.
208    Function::arg_iterator args = finalBlockFunction->arg_begin();
209    Value * self = &*(args++);
210    /* Skip "remaining" arg */ args++;
211    std::vector<Value *> doBlockArgs = {self};
212    while (args != finalBlockFunction->arg_end()){
213        doBlockArgs.push_back(&*args++);
214    }
[5185]215    Value * i16UnitsGenerated = getProducedItemCount(self); // units generated to buffer
216
[5109]217    iBuilder->CreateCall(doBlockFunction, doBlockArgs);
[5185]218    i16UnitsGenerated = getProducedItemCount(self); // units generated to buffer
219    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
[5202]220        Value * ssStructPtr = getStreamSetStructPtr(self, mStreamSetOutputs[i].name);
[5185]221        Value * producerPosPtr = mStreamSetOutputBuffers[i]->getProducerPosPtr(ssStructPtr);
[5192]222        iBuilder->CreateAtomicStoreRelease(i16UnitsGenerated, producerPosPtr);
[5193]223    }
[5109]224    iBuilder->CreateRetVoid();
225    iBuilder->restoreIP(savePoint);
[5009]226}
[5109]227   
228   
229}
Note: See TracBrowser for help on using the repository browser.