source: icGREP/icgrep-devel/icgrep/kernels/deletion.cpp @ 5247

Last change on this file since 5247 was 5247, checked in by cameron, 2 years ago

Separate processedItemCounts and producedItemCounts for each stream set

File size: 5.8 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include <kernels/kernel.h>
7#include <kernels/deletion.h>
8#include <IR_Gen/idisa_builder.h>
9#include <llvm/IR/Value.h>
10
11std::vector<Value *> parallel_prefix_deletion_masks(IDISA::IDISA_Builder * iBuilder, unsigned fw, Value * del_mask) {
12    Value * m = iBuilder->simd_not(del_mask);
13    Value * mk = iBuilder->simd_slli(fw, del_mask, 1);
14    std::vector<Value *> move_masks;
15    for (unsigned shift = 1; shift < fw; shift *= 2) {
16        Value * mp = mk;
17        for (unsigned lookright = 1; lookright < fw; lookright *= 2) {
18            mp = iBuilder->simd_xor(mp, iBuilder->simd_slli(fw, mp, lookright));
19        }
20        Value * mv = iBuilder->simd_and(mp, m);
21        m = iBuilder->simd_or(iBuilder->simd_xor(m, mv), iBuilder->simd_srli(fw, mv, shift));
22        mk = iBuilder->simd_and(mk, iBuilder->simd_not(mp));
23        move_masks.push_back(mv);
24    }
25    return move_masks;
26}
27
28Value * apply_parallel_prefix_deletion(IDISA::IDISA_Builder * iBuilder, unsigned fw, Value * del_mask, std::vector<Value *> mv, Value * strm) {
29    Value * s = iBuilder->simd_and(strm, iBuilder->simd_not(del_mask));
30    for (unsigned i = 0; i < mv.size(); i++) {
31        unsigned shift = 1 << i;
32        Value * t = iBuilder->simd_and(s, mv[i]);
33        s = iBuilder->simd_or(iBuilder->simd_xor(s, t), iBuilder->simd_srli(fw, t, shift));
34    }
35    return s;
36}
37
38Value * partial_sum_popcount(IDISA::IDISA_Builder * iBuilder, unsigned fw, Value * mask) {
39    Value * per_field = iBuilder->simd_popcount(fw, mask);
40    for (unsigned move = 1; move < iBuilder->getBitBlockWidth()/fw; move *= 2) {
41        per_field = iBuilder->simd_add(fw, per_field, iBuilder->mvmd_slli(fw, per_field, move));
42    }
43    return per_field;
44}
45
46// Apply deletion to a set of stream_count input streams to produce a set of output streams.
47// Kernel inputs: stream_count data streams plus one del_mask stream
48// Outputs: the deleted streams, plus a partial sum popcount
49
50namespace kernel {
51
52void DeletionKernel::generateDoBlockMethod() const {
53    auto savePoint = iBuilder->saveIP();
54    Module * m = iBuilder->getModule();
55
56    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
57
58    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", doBlockFunction, 0));
59
60    Value * self = getParameter(doBlockFunction, "self");
61
62    Value * blockNo = getScalarField(self, blockNoScalar);
63
64    Value * inputStreamPtr = getStreamSetBlockPtr(self, "inputStreamSet", blockNo);
65
66    Value * delMaskPtr = getStreamSetBlockPtr(self, "delMaskSet", blockNo);
67
68    Value * outputStreamPtr = getStreamSetBlockPtr(self, "outputStreamSet", blockNo);
69
70    Value * delCountPtr = getStreamSetBlockPtr(self, "deletionCounts", blockNo);
71
72    Value * delMask = iBuilder->CreateBlockAlignedLoad(delMaskPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
73
74    std::vector<Value *> move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
75
76    for (unsigned j = 0; j < mStreamCount; ++j) {
77        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamPtr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
78        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
79        iBuilder->CreateBlockAlignedStore(output, outputStreamPtr, {iBuilder->getInt32(0), iBuilder->getInt32(j)});
80    }
81    Value * counts = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
82    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(counts), delCountPtr, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
83    /* Stream deletion has only been applied within fields; the actual number of data items
84     * has not yet changed.   */
85    Value * produced = getProducedItemCount(self, "outputStreamSet");
86    produced = iBuilder->CreateAdd(produced, iBuilder->getSize(iBuilder->getStride()));
87    setProducedItemCount(self, "outputStreamSet", produced);
88    setProducedItemCount(self, "deletionCounts", produced);
89    iBuilder->CreateRetVoid();
90    iBuilder->restoreIP(savePoint);
91}
92
93void DeletionKernel::generateFinalBlockMethod() const {
94    auto savePoint = iBuilder->saveIP();
95    Module * m = iBuilder->getModule();
96
97    unsigned blockSize = iBuilder->getBitBlockWidth();
98    Function * doBlockFunction = m->getFunction(mKernelName + doBlock_suffix);
99    Function * finalBlockFunction = m->getFunction(mKernelName + finalBlock_suffix);
100
101    iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "entry", finalBlockFunction, 0));
102    Value * remainingBytes = getParameter(finalBlockFunction, "remainingBytes");
103    Value * self = getParameter(finalBlockFunction, "self");
104    Value * blockNo = getScalarField(self, blockNoScalar);
105    Value * delMaskBlock = getStreamSetBlockPtr(self, "delMaskSet", blockNo);
106    Value * remaining = iBuilder->CreateZExt(remainingBytes, iBuilder->getIntNTy(blockSize));
107    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(iBuilder->getIntNTy(blockSize)), remaining));
108    Value * const delmaskPtr = iBuilder->CreateGEP(delMaskBlock, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
109    Value * const delmaskVal = iBuilder->CreateBlockAlignedLoad(delmaskPtr);
110    iBuilder->CreateBlockAlignedStore(iBuilder->CreateOr(EOF_del, delmaskVal), delmaskPtr);
111    iBuilder->CreateCall(doBlockFunction, {self});
112    /* Adjust the produced item count */
113    Value * produced = getProducedItemCount(self, "outputStreamSet");
114    produced = iBuilder->CreateSub(produced, iBuilder->getSize(iBuilder->getStride()));
115    produced =  iBuilder->CreateAdd(produced, remainingBytes);
116    setProducedItemCount(self, "outputStreamSet", produced);
117    setProducedItemCount(self, "deletionCounts", produced);
118
119    iBuilder->CreateRetVoid();
120    iBuilder->restoreIP(savePoint);
121}
122
123}
Note: See TracBrowser for help on using the repository browser.