source: icGREP/icgrep-devel/icgrep/kernels/deletion.cpp @ 5285

Last change on this file since 5285 was 5285, checked in by nmedfort, 2 years ago

Start of work to simplify kernel writing. Removed generateDoBlockLogic method.

File size: 5.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "deletion.h"
7#include <IR_Gen/idisa_builder.h>
8#include <llvm/IR/Value.h>
9#include <llvm/IR/Module.h>
10
11using namespace llvm;
12
13namespace kernel {
14
15inline std::vector<Value *> parallel_prefix_deletion_masks(IDISA::IDISA_Builder * iBuilder, const unsigned fw, Value * del_mask) {
16    Value * m = iBuilder->simd_not(del_mask);
17    Value * mk = iBuilder->simd_slli(fw, del_mask, 1);
18    std::vector<Value *> move_masks;
19    for (unsigned shift = 1; shift < fw; shift *= 2) {
20        Value * mp = mk;
21        for (unsigned lookright = 1; lookright < fw; lookright *= 2) {
22            mp = iBuilder->simd_xor(mp, iBuilder->simd_slli(fw, mp, lookright));
23        }
24        Value * mv = iBuilder->simd_and(mp, m);
25        m = iBuilder->simd_or(iBuilder->simd_xor(m, mv), iBuilder->simd_srli(fw, mv, shift));
26        mk = iBuilder->simd_and(mk, iBuilder->simd_not(mp));
27        move_masks.push_back(mv);
28    }
29    return move_masks;
30}
31
32inline Value * apply_parallel_prefix_deletion(IDISA::IDISA_Builder * iBuilder, const unsigned fw, Value * del_mask, const std::vector<Value *> & mv, Value * strm) {
33    Value * s = iBuilder->simd_and(strm, iBuilder->simd_not(del_mask));
34    for (unsigned i = 0; i < mv.size(); i++) {
35        unsigned shift = 1 << i;
36        Value * t = iBuilder->simd_and(s, mv[i]);
37        s = iBuilder->simd_or(iBuilder->simd_xor(s, t), iBuilder->simd_srli(fw, t, shift));
38    }
39    return s;
40}
41
42inline Value * partial_sum_popcount(IDISA::IDISA_Builder * iBuilder, const unsigned fw, Value * mask) {
43    Value * field = iBuilder->simd_popcount(fw, mask);
44    const auto count = iBuilder->getBitBlockWidth() / fw;
45    for (unsigned move = 1; move < count; move *= 2) {
46        field = iBuilder->simd_add(fw, field, iBuilder->mvmd_slli(fw, field, move));
47    }
48    return field;
49}
50
51// Apply deletion to a set of stream_count input streams to produce a set of output streams.
52// Kernel inputs: stream_count data streams plus one del_mask stream
53// Outputs: the deleted streams, plus a partial sum popcount
54
55void DeletionKernel::generateDoBlockMethod(Function * function, Value * self, Value * blockNo) const {
56    Value * delMaskPtr = getStream(self, "delMaskSet", blockNo, iBuilder->getInt32(0));
57    Value * delMask = iBuilder->CreateBlockAlignedLoad(delMaskPtr);
58    std::vector<Value *> move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
59    for (unsigned j = 0; j < mStreamCount; ++j) {
60        Value * inputStreamPtr = getStream(self, "inputStreamSet", blockNo, iBuilder->getInt32(j));
61        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamPtr);
62        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
63        Value * outputStreamPtr = getStream(self, "outputStreamSet", blockNo, iBuilder->getInt32(j));
64        iBuilder->CreateBlockAlignedStore(output, outputStreamPtr);
65    }
66    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
67    Value * delCountPtr = getStream(self, "deletionCounts", blockNo, iBuilder->getInt32(0));
68    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(delCount), delCountPtr);
69    // Stream deletion has only been applied within fields; the actual number of data items has not yet changed.
70    Value * produced = getProducedItemCount(self, "outputStreamSet");
71    produced = iBuilder->CreateAdd(produced, iBuilder->getSize(iBuilder->getStride()));
72    setProducedItemCount(self, "outputStreamSet", produced);
73    setProducedItemCount(self, "deletionCounts", produced);
74}
75
76void DeletionKernel::generateFinalBlockMethod(Function * function, Value * self, Value * remainingBytes, Value * blockNo) const {
77    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
78    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
79    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
80    Value * const delmaskPtr = getStream(self, "delMaskSet", blockNo, iBuilder->getInt32(0));
81    Value * const delmaskVal = iBuilder->CreateBlockAlignedLoad(delmaskPtr);
82    iBuilder->CreateBlockAlignedStore(iBuilder->CreateOr(EOF_del, delmaskVal), delmaskPtr);
83    iBuilder->CreateCall(getDoBlockFunction(), {self});
84    // Adjust the produced item count
85    Value * produced = getProducedItemCount(self, "outputStreamSet");
86    produced = iBuilder->CreateSub(produced, iBuilder->getSize(iBuilder->getStride()));
87    produced =  iBuilder->CreateAdd(produced, remainingBytes);
88    setProducedItemCount(self, "outputStreamSet", produced);
89    setProducedItemCount(self, "deletionCounts", produced);
90}
91
92DeletionKernel::DeletionKernel(IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned streamCount)
93: BlockOrientedKernel(iBuilder, "del",
94              {Binding{iBuilder->getStreamSetTy(streamCount), "inputStreamSet"},
95               Binding{iBuilder->getStreamSetTy(), "delMaskSet"}},
96              {Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet"},
97               Binding{iBuilder->getStreamSetTy(), "deletionCounts"}},
98              {}, {}, {})
99, mDeletionFieldWidth(fw)
100, mStreamCount(streamCount) {
101    mDoBlockUpdatesProducedItemCountsAttribute = true;
102}
103
104}
Note: See TracBrowser for help on using the repository browser.