source: icGREP/icgrep-devel/icgrep/kernels/deletion.cpp @ 5313

Last change on this file since 5313 was 5313, checked in by cameron, 3 years ago

DeleteByPEXTkernel

File size: 9.5 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "deletion.h"
7#include <IR_Gen/idisa_builder.h>
8#include <llvm/IR/Value.h>
9#include <llvm/IR/Module.h>
10#include <llvm/Support/raw_ostream.h>
11
12using namespace llvm;
13
14namespace kernel {
15
16inline std::vector<Value *> parallel_prefix_deletion_masks(IDISA::IDISA_Builder * iBuilder, const unsigned fw, Value * del_mask) {
17    Value * m = iBuilder->simd_not(del_mask);
18    Value * mk = iBuilder->simd_slli(fw, del_mask, 1);
19    std::vector<Value *> move_masks;
20    for (unsigned shift = 1; shift < fw; shift *= 2) {
21        Value * mp = mk;
22        for (unsigned lookright = 1; lookright < fw; lookright *= 2) {
23            mp = iBuilder->simd_xor(mp, iBuilder->simd_slli(fw, mp, lookright));
24        }
25        Value * mv = iBuilder->simd_and(mp, m);
26        m = iBuilder->simd_or(iBuilder->simd_xor(m, mv), iBuilder->simd_srli(fw, mv, shift));
27        mk = iBuilder->simd_and(mk, iBuilder->simd_not(mp));
28        move_masks.push_back(mv);
29    }
30    return move_masks;
31}
32
33inline Value * apply_parallel_prefix_deletion(IDISA::IDISA_Builder * iBuilder, const unsigned fw, Value * del_mask, const std::vector<Value *> & mv, Value * strm) {
34    Value * s = iBuilder->simd_and(strm, iBuilder->simd_not(del_mask));
35    for (unsigned i = 0; i < mv.size(); i++) {
36        unsigned shift = 1 << i;
37        Value * t = iBuilder->simd_and(s, mv[i]);
38        s = iBuilder->simd_or(iBuilder->simd_xor(s, t), iBuilder->simd_srli(fw, t, shift));
39    }
40    return s;
41}
42
43inline Value * partial_sum_popcount(IDISA::IDISA_Builder * iBuilder, const unsigned fw, Value * mask) {
44    Value * field = iBuilder->simd_popcount(fw, mask);
45    const auto count = iBuilder->getBitBlockWidth() / fw;
46    for (unsigned move = 1; move < count; move *= 2) {
47        field = iBuilder->simd_add(fw, field, iBuilder->mvmd_slli(fw, field, move));
48    }
49    return field;
50}
51
52// Apply deletion to a set of stream_count input streams to produce a set of output streams.
53// Kernel inputs: stream_count data streams plus one del_mask stream
54// Outputs: the deleted streams, plus a partial sum popcount
55
56void DeletionKernel::generateDoBlockMethod() {
57    Value * delMaskPtr = getInputStream("delMaskSet", iBuilder->getInt32(0));
58    Value * delMask = iBuilder->CreateBlockAlignedLoad(delMaskPtr);
59    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
60    for (unsigned j = 0; j < mStreamCount; ++j) {
61        Value * inputStreamPtr = getInputStream("inputStreamSet", iBuilder->getInt32(j));
62        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamPtr);
63        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
64        Value * outputStreamPtr = getOutputStream("outputStreamSet", iBuilder->getInt32(j));
65        iBuilder->CreateBlockAlignedStore(output, outputStreamPtr);
66    }
67    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
68    Value * delCountPtr = getOutputStream("deletionCounts", iBuilder->getInt32(0));
69    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(delCount), delCountPtr);
70}
71
72void DeletionKernel::generateFinalBlockMethod(Value * remainingBytes) {
73    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
74    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
75    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
76    Value * const delmaskPtr = getInputStream("delMaskSet", iBuilder->getInt32(0));
77    Value * delMask = iBuilder->CreateOr(EOF_del, iBuilder->CreateBlockAlignedLoad(delmaskPtr));
78    const auto move_masks = parallel_prefix_deletion_masks(iBuilder, mDeletionFieldWidth, delMask);
79    for (unsigned j = 0; j < mStreamCount; ++j) {
80        Value * inputStreamPtr = getInputStream("inputStreamSet", iBuilder->getInt32(j));
81        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamPtr);
82        Value * output = apply_parallel_prefix_deletion(iBuilder, mDeletionFieldWidth, delMask, move_masks, input);
83        Value * outputStreamPtr = getOutputStream("outputStreamSet", iBuilder->getInt32(j));
84        iBuilder->CreateBlockAlignedStore(output, outputStreamPtr);
85    }
86    Value * delCount = partial_sum_popcount(iBuilder, mDeletionFieldWidth, iBuilder->simd_not(delMask));
87    Value * delCountPtr = getOutputStream("deletionCounts", iBuilder->getInt32(0));
88    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(delCount), delCountPtr);
89}
90
91DeletionKernel::DeletionKernel(IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned streamCount)
92: BlockOrientedKernel(iBuilder, "del",
93              {Binding{iBuilder->getStreamSetTy(streamCount), "inputStreamSet"},
94               Binding{iBuilder->getStreamSetTy(), "delMaskSet"}},
95              {Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet"},
96               Binding{iBuilder->getStreamSetTy(), "deletionCounts"}},
97              {}, {}, {})
98, mDeletionFieldWidth(fw)
99, mStreamCount(streamCount) {
100    mDoBlockUpdatesProducedItemCountsAttribute = false;
101}
102
103   
104const unsigned PEXT_width = 64;
105
106inline std::vector<Value *> get_PEXT_masks(IDISA::IDISA_Builder * iBuilder, Value * del_mask) {
107    Value * m = iBuilder->fwCast(PEXT_width, iBuilder->simd_not(del_mask));
108    std::vector<Value *> masks;
109    for (unsigned i = 0; i < iBuilder->getBitBlockWidth()/PEXT_width; i++) {
110        masks.push_back(iBuilder->CreateExtractElement(m, i));
111    }
112    return masks;
113}
114
115inline Value * apply_PEXT_deletion(IDISA::IDISA_Builder * iBuilder, const std::vector<Value *> & masks, Value * strm) {
116    Value * PEXT_func = nullptr;
117    if (PEXT_width == 64) PEXT_func = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_bmi_pext_64);
118    else if (PEXT_width == 32) PEXT_func = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::x86_bmi_pext_32);
119    Value * v = iBuilder->fwCast(PEXT_width, strm);
120    Value * output = Constant::getNullValue(v->getType());
121    for (unsigned i = 0; i < iBuilder->getBitBlockWidth()/PEXT_width; i++) {
122        Value * field = iBuilder->CreateExtractElement(v, i);
123        Value * compressed = iBuilder->CreateCall(PEXT_func, {field, masks[i]});
124        output = iBuilder->CreateInsertElement(output, compressed, i);
125    }
126    return output;
127}
128
129// Apply deletion to a set of stream_count input streams to produce a set of output streams.
130// Kernel inputs: stream_count data streams plus one del_mask stream
131// Outputs: the deleted streams, plus a partial sum popcount
132
133void DeleteByPEXTkernel::generateDoBlockMethod() {
134    Value * delMaskPtr = getInputStream("delMaskSet", iBuilder->getInt32(0));
135    Value * delMask = iBuilder->CreateBlockAlignedLoad(delMaskPtr);
136    const auto masks = get_PEXT_masks(iBuilder, delMask);
137    for (unsigned j = 0; j < mStreamCount; ++j) {
138        Value * inputStreamPtr = getInputStream("inputStreamSet", iBuilder->getInt32(j));
139        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamPtr);
140        Value * output = apply_PEXT_deletion(iBuilder, masks, input);
141        Value * outputStreamPtr = getOutputStream("outputStreamSet", iBuilder->getInt32(j));
142        iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(output), outputStreamPtr);
143    }
144    Value * delCount = partial_sum_popcount(iBuilder, mDelCountFieldWidth, apply_PEXT_deletion(iBuilder, masks, iBuilder->simd_not(delMask)));
145    Value * delCountPtr = getOutputStream("deletionCounts", iBuilder->getInt32(0));
146    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(delCount), delCountPtr);
147}
148
149void DeleteByPEXTkernel::generateFinalBlockMethod(Value * remainingBytes) {
150    IntegerType * vecTy = iBuilder->getIntNTy(iBuilder->getBitBlockWidth());
151    Value * remaining = iBuilder->CreateZExt(remainingBytes, vecTy);
152    Value * EOF_del = iBuilder->bitCast(iBuilder->CreateShl(Constant::getAllOnesValue(vecTy), remaining));
153    Value * const delmaskPtr = getInputStream("delMaskSet", iBuilder->getInt32(0));
154    Value * delMask = iBuilder->CreateOr(EOF_del, iBuilder->CreateBlockAlignedLoad(delmaskPtr));
155    const auto masks = get_PEXT_masks(iBuilder, delMask);
156    for (unsigned j = 0; j < mStreamCount; ++j) {
157        Value * inputStreamPtr = getInputStream("inputStreamSet", iBuilder->getInt32(j));
158        Value * input = iBuilder->CreateBlockAlignedLoad(inputStreamPtr);
159        Value * output = apply_PEXT_deletion(iBuilder, masks, input);
160        Value * outputStreamPtr = getOutputStream("outputStreamSet", iBuilder->getInt32(j));
161        iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(output), outputStreamPtr);
162    }
163    Value * delCount = partial_sum_popcount(iBuilder, mDelCountFieldWidth, apply_PEXT_deletion(iBuilder, masks, iBuilder->simd_not(delMask)));
164    Value * delCountPtr = getOutputStream("deletionCounts", iBuilder->getInt32(0));
165    iBuilder->CreateBlockAlignedStore(iBuilder->bitCast(delCount), delCountPtr);
166}
167
168DeleteByPEXTkernel::DeleteByPEXTkernel(IDISA::IDISA_Builder * iBuilder, unsigned fw, unsigned streamCount)
169: BlockOrientedKernel(iBuilder, "PEXTdel",
170                      {Binding{iBuilder->getStreamSetTy(streamCount), "inputStreamSet"},
171                          Binding{iBuilder->getStreamSetTy(), "delMaskSet"}},
172                      {Binding{iBuilder->getStreamSetTy(streamCount), "outputStreamSet"},
173                          Binding{iBuilder->getStreamSetTy(), "deletionCounts"}},
174                      {}, {}, {})
175, mDelCountFieldWidth(fw)
176, mStreamCount(streamCount) {
177    mDoBlockUpdatesProducedItemCountsAttribute = false;
178}
179
180}
Note: See TracBrowser for help on using the repository browser.