source: icGREP/icgrep-devel/icgrep/kernels/deletion.cpp @ 5261

Last change on this file since 5261 was 5261, checked in by cameron, 9 months ago

Move responsibility for ProducedItemCount? into doSegment unless overridden

File size: 6.2 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "deletion.h"
7#include <IR_Gen/idisa_builder.h>
8#include <llvm/IR/Value.h>
9#include <llvm/IR/Module.h>
10
11using namespace llvm;
12
13namespace kernel {
14
15inline std::vector<Value *> parallel_prefix_deletion_masks(IDISA::IDISA_Builder * iBuilder, const unsigned fw, Value * del_mask) {
16    Value * m = iBuilder->simd_not(del_mask);
17    Value * mk = iBuilder->simd_slli(fw, del_mask, 1);
18    std::vector<Value *> move_masks;
19    for (unsigned shift = 1; shift < fw; shift *= 2) {
20        Value * mp = mk;
21        for (unsigned lookright = 1; lookright < fw; lookright *= 2) {
22            mp = iBuilder->simd_xor(mp, iBuilder->simd_slli(fw, mp, lookright));
23        }
24        Value * mv = iBuilder->simd_and(mp, m);
25        m = iBuilder->simd_or(iBuilder->simd_xor(m, mv), iBuilder->simd_srli(fw, mv, shift));
26        mk = iBuilder->simd_and(mk, iBuilder->simd_not(mp));
27        move_masks.push_back(mv);
28    }
29    return move_masks;
30}
31
32inline Value * apply_parallel_prefix_deletion(IDISA::IDISA_Builder * iBuilder, const unsigned fw, Value * del_mask, const std::vector<Value *> & mv, Value * strm) {
33    Value * s = iBuilder->simd_and(strm, iBuilder->simd_not(del_mask));
34    for (unsigned i = 0; i < mv.size(); i++) {
35        unsigned shift = 1 << i;
36        Value * t = iBuilder->simd_and(s, mv[i]);
37        s = iBuilder->simd_or(iBuilder->simd_xor(s, t), iBuilder->simd_srli(fw, t, shift));
38    }
39    return s;
40}
41
42inline Value * partial_sum_popcount(IDISA::IDISA_Builder * iBuilder, const unsigned fw, Value * mask) {
43    Value * field = iBuilder->simd_popcount(fw, mask);
44    const auto count = iBuilder->getBitBlockWidth() / fw;
45    for (unsigned move = 1; move < count; move *= 2) {
46        field = iBuilder->simd_add(fw, field, iBuilder->mvmd_slli(fw, field, move));
47    }
48    return field;
49}
50
51// Apply deletion to a set of stream_count input streams to produce a set of output streams.
52// Kernel inputs: stream_count data streams plus one del_mask stream
53// Outputs: the deleted streams, plus a partial sum popcount
54
55void DeletionKernel::generateDoBlockMethod() const {
56
57    auto savePoint = iBuilder->saveIP();
58    Module * m = iBuilder->getModule();
59
60    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
61
62    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
63
64    Value * self = getParameter(doBlockFunction, "self");
65
66    Value * blockNo = getScalarField(self, blockNoScalar);
67
68    Value * delMaskPtr = getStream(self, "delMaskSet", blockNo, iBuilder->getInt32(0));
69    Value * delMask = iBuilder->CreateBlockAlignedLoad(delMaskPtr);
70
71    std::vector<Value *> move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
72
73    for (unsigned j = 0; j < mStreamCount; ++j) {
74        Value * inputStreamPtr = getStream(self, "inputStreamSet", blockNo, iBuilder->getInt32(j));
75        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamPtr);
76
77        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
78
79        Value * outputStreamPtr = getStream(self, "outputStreamSet", blockNo, iBuilder->getInt32(j));
80        iBuilder->CreateBlockAlignedStore(output, outputStreamPtr);
81    }
82    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
83    Value * delCountPtr = getStream(self, "deletionCounts", blockNo, iBuilder->getInt32(0));
84    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(delCount), delCountPtr);
85    /* Stream deletion has only been applied within fields; the actual number of data items
86     * has not yet changed.   */
87    Value * produced = getProducedItemCount(self, "outputStreamSet");
88    produced = iBuilder->CreateAdd(produced, iBuilder->getSize(iBuilder->getStride()));
89    setProducedItemCount(self, "outputStreamSet", produced);
90    setProducedItemCount(self, "deletionCounts", produced);
91    iBuilder->CreateRetVoid();
92    iBuilder->restoreIP(savePoint);
93}
94
95void DeletionKernel::generateFinalBlockMethod() const {
96    auto savePoint = iBuilder->saveIP();
97    Module * m = iBuilder->getModule();
98
99    unsigned blockSize = iBuilder->getBitBlockWidth();
100    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
101    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
102
103    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", finalBlockFunction, 0));
104    Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
105    Value * self = getParameter(finalBlockFunction, "self");
106    Value * blockNo = getScalarField(self, blockNoScalar);
107    Value * remaining = iBuilder->CreateZExt(remainingBytes, iBuilder->getIntNTy(blockSize));
108    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(iBuilder->getIntNTy(blockSize)), remaining));
109    Value * const delmaskPtr = getStream(self, "delMaskSet", blockNo, iBuilder->getInt32(0));
110    Value * const delmaskVal = iBuilder->CreateBlockAlignedLoad(delmaskPtr);
111    iBuilder->CreateBlockAlignedStore(iBuilder->CreateOr(EOF_del, delmaskVal), delmaskPtr);
112    iBuilder->CreateCall(doBlockFunction, {self});
113    /* Adjust the produced item count */
114    Value * produced = getProducedItemCount(self, "outputStreamSet");
115    produced = iBuilder->CreateSub(produced, iBuilder->getSize(iBuilder->getStride()));
116    produced =  iBuilder->CreateAdd(produced, remainingBytes);
117    setProducedItemCount(self, "outputStreamSet", produced);
118    setProducedItemCount(self, "deletionCounts", produced);
119
120    iBuilder->CreateRetVoid();
121    iBuilder->restoreIP(savePoint);
122}
123
124DeletionKernel::DeletionKernel(IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned streamCount) :
125KernelBuilder(iBuilder, "del",
126              {Binding{iBuilder->getStreamSetTy(streamCount), "inputStreamSet"},
127               Binding{iBuilder->getStreamSetTy(), "delMaskSet"}},
128              {Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet"},
129               Binding{iBuilder->getStreamSetTy(), "deletionCounts"}},
130              {}, {}, {}),
131mDeletionFieldWidth(fw),
132mStreamCount(streamCount) {
133    mDoBlockUpdatesProducedItemCountsAttribute = true;
134}
135
136}
Note: See TracBrowser for help on using the repository browser.