source: icGREP/icgrep-devel/icgrep/kernels/block_kernel.cpp @ 6184

Last change on this file since 6184 was 6184, checked in by nmedfort, 12 months ago

Initial version of PipelineKernel? + revised StreamSet? model.

File size: 18.5 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "kernel.h"
7#include <toolchain/toolchain.h>
8#include <kernels/streamset.h>
9#include <kernels/kernel_builder.h>
10#include <llvm/IR/Constants.h>
11#include <llvm/IR/Function.h>
12#include <llvm/IR/Instructions.h>
13#include <llvm/IR/MDBuilder.h>
14#include <llvm/IR/Module.h>
15#include <llvm/Support/raw_ostream.h>
16#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(4, 0, 0)
17#include <llvm/Bitcode/ReaderWriter.h>
18#else
19#include <llvm/Bitcode/BitcodeWriter.h>
20#endif
21#include <llvm/Transforms/Utils/Local.h>
22#include <llvm/Support/Debug.h>
23#include <boost/graph/adjacency_list.hpp>
24#include <util/extended_boost_graph_containers.h>
25#include <sstream>
26#include <functional>
27
28using namespace llvm;
29using namespace boost;
30using boost::container::flat_set;
31
32namespace kernel {
33
34using AttrId = Attribute::KindId;
35using RateId = ProcessingRate::KindId;
36
37// TODO: Break the BlockOrientedKernel into two classes, one with an explicit DoFinal block and another that
38// calls the DoBlock method with optional preamble and postamble hooks. By doing so, we can remove the indirect
39// branches (or function calls) from the following kernel and simplify the cognitive load for the kernel
40// programmer. This is less general than the current method but no evidence that being able to reenter the
41// DoBlock method multiple times from the DoFinal block would ever be useful.
42
43// Can we eliminate some of the kernel state (e.g., EOF) by having a general preamble that can create a stack-
44// allocated struct?
45
46const auto DO_BLOCK_SUFFIX = "_DoBlock";
47const auto FINAL_BLOCK_SUFFIX = "_FinalBlock";
48
49/** ------------------------------------------------------------------------------------------------------------- *
50 * @brief generateMultiBlockLogic
51 ** ------------------------------------------------------------------------------------------------------------- */
52void BlockOrientedKernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, Value * const numOfBlocks) {
53
54    if (LLVM_UNLIKELY(mStride != b->getBitBlockWidth())) {
55        report_fatal_error(getName() + ": the Stride (" + std::to_string(mStride) + ") of BlockOrientedKernel "
56                           "equal to the BitBlockWidth (" + std::to_string(b->getBitBlockWidth()) + ")");
57    }
58
59    BasicBlock * const entryBlock = b->GetInsertBlock();
60    mStrideLoopBody = b->CreateBasicBlock(getName() + "_strideLoopBody");
61    BasicBlock * const incrementCountableItems = b->CreateBasicBlock(getName() + "_incrementCountableItems");
62    BasicBlock * const stridesDone = b->CreateBasicBlock(getName() + "_stridesDone");
63    BasicBlock * const doFinalBlock = b->CreateBasicBlock(getName() + "_doFinalBlock");
64    BasicBlock * const segmentDone = b->CreateBasicBlock(getName() + "_segmentDone");
65
66    ConstantInt * const ZERO = b->getSize(0);
67
68    b->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, mStrideLoopBody);
69
70    /// BLOCK BODY
71
72    b->SetInsertPoint(mStrideLoopBody);
73    if (b->supportsIndirectBr()) {
74        Value * const baseTarget = BlockAddress::get(segmentDone);
75        mStrideLoopTarget = b->CreatePHI(baseTarget->getType(), 2, "strideTarget");
76        mStrideLoopTarget->addIncoming(baseTarget, entryBlock);
77    }
78    mStrideBlockIndex = b->CreatePHI(b->getSizeTy(), 2);
79    mStrideBlockIndex->addIncoming(ZERO, entryBlock);
80
81    /// GENERATE DO BLOCK METHOD
82
83    writeDoBlockMethod(b);
84
85    Value * const nextStrideBlockIndex = b->CreateAdd(mStrideBlockIndex, b->getSize(1));
86    Value * noMore = b->CreateICmpEQ(nextStrideBlockIndex, numOfBlocks);
87    if (hasAttribute(AttrId::CanTerminateEarly) ||  hasAttribute(AttrId::MustExplicitlyTerminate)) {
88        noMore = b->CreateOr(noMore, b->getTerminationSignal());
89    }
90    b->CreateUnlikelyCondBr(noMore, stridesDone, incrementCountableItems);
91
92    b->SetInsertPoint(incrementCountableItems);
93    incrementCountableItemCounts(b);
94    BasicBlock * const bodyEnd = b->GetInsertBlock();
95    if (mStrideLoopTarget) {
96        mStrideLoopTarget->addIncoming(mStrideLoopTarget, bodyEnd);
97    }
98    mStrideBlockIndex->addIncoming(nextStrideBlockIndex, bodyEnd);
99
100    b->CreateBr(mStrideLoopBody);
101
102    stridesDone->moveAfter(bodyEnd);
103
104    /// STRIDE DONE
105
106    b->SetInsertPoint(stridesDone);
107
108    // Now conditionally perform the final block processing depending on the doFinal parameter.
109    if (mStrideLoopTarget) {
110        mStrideLoopBranch = b->CreateIndirectBr(mStrideLoopTarget, 3);
111        mStrideLoopBranch->addDestination(doFinalBlock);
112        mStrideLoopBranch->addDestination(segmentDone);
113    } else {
114        b->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, segmentDone);
115    }
116
117    doFinalBlock->moveAfter(stridesDone);
118
119    /// DO FINAL BLOCK
120
121    b->SetInsertPoint(doFinalBlock);
122    writeFinalBlockMethod(b, getRemainingItems(b));
123    b->CreateBr(segmentDone);
124
125    segmentDone->moveAfter(b->GetInsertBlock());
126
127    b->SetInsertPoint(segmentDone);
128
129    // Update the branch prediction metadata to indicate that the likely target will be segmentDone
130    if (mStrideLoopTarget) {
131        MDBuilder mdb(b->getContext());
132        const auto destinations = mStrideLoopBranch->getNumDestinations();
133        uint32_t weights[destinations];
134        for (unsigned i = 0; i < destinations; ++i) {
135            weights[i] = (mStrideLoopBranch->getDestination(i) == segmentDone) ? 100 : 1;
136        }
137        ArrayRef<uint32_t> bw(weights, destinations);
138        mStrideLoopBranch->setMetadata(LLVMContext::MD_prof, mdb.createBranchWeights(bw));
139    }
140
141}
142
143/** ------------------------------------------------------------------------------------------------------------- *
144 * @brief incrementCountableItemCounts
145 ** ------------------------------------------------------------------------------------------------------------- */
146void BlockOrientedKernel::incrementCountableItemCounts(const std::unique_ptr<KernelBuilder> & b) {
147
148    // Update the processed item counts
149    for (unsigned i = 0; i < mInputStreamSets.size(); ++i) {
150        const Binding & input = mInputStreamSets[i];
151        if (isCountable(input)) {
152            const ProcessingRate & rate = input.getRate();
153            Value * offset = nullptr;
154            if (rate.isFixed()) {
155                offset = b->getSize(ceiling(getUpperBound(input) * getStride()));
156            } else { // if (rate.isPopCount() || rate.isNegatedPopCount())
157                offset = getPopCountRateItemCount(b, rate, mStrideBlockIndex);
158            }
159            Value * const initial = b->getNonDeferredProcessedItemCount(input);
160            Value * const processed = b->CreateAdd(initial, offset);
161            b->setNonDeferredProcessedItemCount(input, processed);
162        }
163    }
164
165    // Update the produced item counts
166    for (unsigned i = 0; i < mOutputStreamSets.size(); ++i) {
167        const Binding & output = mOutputStreamSets[i];
168        if (isCountable(output)) {
169            const ProcessingRate & rate = output.getRate();
170            Value * offset = nullptr;
171            if (rate.isFixed()) {
172                offset = b->getSize(ceiling(getUpperBound(output) * getStride()));
173            } else { // if (rate.isPopCount() || rate.isNegatedPopCount())
174                offset = getPopCountRateItemCount(b, rate, mStrideBlockIndex);
175            }
176            Value * const initial = b->getNonDeferredProducedItemCount(output);
177            Value * const produced = b->CreateAdd(initial, offset);
178            b->setNonDeferredProducedItemCount(output, produced);
179        }
180    }
181}
182/** ------------------------------------------------------------------------------------------------------------- *
183 * @brief getRemainingItems
184 ** ------------------------------------------------------------------------------------------------------------- */
185Value * BlockOrientedKernel::getRemainingItems(const std::unique_ptr<KernelBuilder> & b) {
186    Value * remainingItems = nullptr;
187    const auto count = mInputStreamSets.size();
188    if (count == 1) {
189        return mAccessibleInputItems[0];
190    } else {
191        for (unsigned i = 0; i < count; i++) {
192            if (mInputStreamSets[i].isPrincipal()) {
193                return mAccessibleInputItems[i];
194            }
195        }
196        for (unsigned i = 0; i < count; ++i) {
197            const ProcessingRate & r = mInputStreamSets[i].getRate();
198            if (r.isFixed()) {
199                Value * ic = b->CreateCeilUDiv2(mAccessibleInputItems[i], r.getRate());
200                if (remainingItems) {
201                    remainingItems = b->CreateUMin(remainingItems, ic);
202                } else {
203                    remainingItems = ic;
204                }
205            }
206        }
207    }
208    return remainingItems;
209}
210
211/** ------------------------------------------------------------------------------------------------------------- *
212 * @brief writeDoBlockMethod
213 ** ------------------------------------------------------------------------------------------------------------- */
214inline void BlockOrientedKernel::writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) {
215
216    Value * const self = getHandle();
217    Function * const cp = mCurrentMethod;
218    auto ip = b->saveIP();
219    std::vector<Value *> availableItemCount(0);
220
221    /// Check if the do block method is called and create the function if necessary
222    if (!b->supportsIndirectBr()) {
223
224        std::vector<Type *> params;
225        params.reserve(1 + mAccessibleInputItems.size());
226        params.push_back(self->getType());
227        for (Value * avail : mAccessibleInputItems) {
228            params.push_back(avail->getType());
229        }
230
231        FunctionType * const type = FunctionType::get(b->getVoidTy(), params, false);
232        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, b->getModule());
233        mCurrentMethod->setCallingConv(CallingConv::C);
234        mCurrentMethod->setDoesNotThrow();
235        auto args = mCurrentMethod->arg_begin();
236        args->setName("self");
237        setHandle(b, &*args);
238        availableItemCount.reserve(mAccessibleInputItems.size());
239        while (++args != mCurrentMethod->arg_end()) {
240            availableItemCount.push_back(&*args);
241        }
242        assert (availableItemCount.size() == mAccessibleInputItems.size());
243        mAccessibleInputItems.swap(availableItemCount);
244        b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
245    }
246
247    generateDoBlockMethod(b); // must be implemented by the BlockOrientedKernelBuilder subtype
248
249    if (!b->supportsIndirectBr()) {
250        // Restore the DoSegment function state then call the DoBlock method
251        b->CreateRetVoid();
252        mDoBlockMethod = mCurrentMethod;
253        b->restoreIP(ip);
254        setHandle(b, self);
255        mCurrentMethod = cp;
256        mAccessibleInputItems.swap(availableItemCount);
257        CreateDoBlockMethodCall(b);
258    }
259
260}
261
262/** ------------------------------------------------------------------------------------------------------------- *
263 * @brief writeFinalBlockMethod
264 ** ------------------------------------------------------------------------------------------------------------- */
265inline void BlockOrientedKernel::writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, Value * remainingItems) {
266
267    Value * const self = getHandle();
268    Function * const cp = mCurrentMethod;
269    Value * const remainingItemCount = remainingItems;
270    auto ip = b->saveIP();
271    std::vector<Value *> availableItemCount(0);
272
273    if (!b->supportsIndirectBr()) {
274        std::vector<Type *> params;
275        params.reserve(2 + mAccessibleInputItems.size());
276        params.push_back(self->getType());
277        params.push_back(b->getSizeTy());
278        for (Value * avail : mAccessibleInputItems) {
279            params.push_back(avail->getType());
280        }
281        FunctionType * const type = FunctionType::get(b->getVoidTy(), params, false);
282        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, b->getModule());
283        mCurrentMethod->setCallingConv(CallingConv::C);
284        mCurrentMethod->setDoesNotThrow();
285        auto args = mCurrentMethod->arg_begin();
286        args->setName("self");
287        setHandle(b, &*args);
288        remainingItems = &*(++args);
289        remainingItems->setName("remainingItems");
290        availableItemCount.reserve(mAccessibleInputItems.size());
291        while (++args != mCurrentMethod->arg_end()) {
292            availableItemCount.push_back(&*args);
293        }
294        assert (availableItemCount.size() == mAccessibleInputItems.size());
295        mAccessibleInputItems.swap(availableItemCount);
296        b->SetInsertPoint(BasicBlock::Create(b->getContext(), "entry", mCurrentMethod));
297    }
298
299    generateFinalBlockMethod(b, remainingItems); // may be implemented by the BlockOrientedKernel subtype
300
301    if (!b->supportsIndirectBr()) {
302        b->CreateRetVoid();
303        b->restoreIP(ip);
304        setHandle(b, self);
305        mAccessibleInputItems.swap(availableItemCount);
306        // Restore the DoSegment function state then call the DoFinal method
307        std::vector<Value *> args;
308        args.reserve(2 + mAccessibleInputItems.size());
309        args.push_back(self);
310        args.push_back(remainingItemCount);
311        args.insert(args.end(), mAccessibleInputItems.begin(), mAccessibleInputItems.end());
312        b->CreateCall(mCurrentMethod, args);
313        mCurrentMethod = cp;
314    }
315
316}
317
318/** ------------------------------------------------------------------------------------------------------------- *
319 * @brief generateFinalBlockMethod
320 ** ------------------------------------------------------------------------------------------------------------- */
321void BlockOrientedKernel::generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, Value * /* remainingItems */) {
322    //  The default finalBlock method simply dispatches to the doBlock routine.
323    CreateDoBlockMethodCall(b);
324}
325
326/** ------------------------------------------------------------------------------------------------------------- *
327 * @brief CreateDoBlockMethodCall
328 ** ------------------------------------------------------------------------------------------------------------- */
329void BlockOrientedKernel::CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & b) {
330    if (b->supportsIndirectBr()) {
331        BasicBlock * const bb = b->CreateBasicBlock("resume");
332        mStrideLoopBranch->addDestination(bb);
333        BasicBlock * const current = b->GetInsertBlock();
334        mStrideLoopTarget->addIncoming(BlockAddress::get(bb), current);
335        mStrideBlockIndex->addIncoming(b->getSize(0), current);
336        b->CreateBr(mStrideLoopBody);
337        bb->moveAfter(current);
338        b->SetInsertPoint(bb);
339    } else {
340        std::vector<Value *> args;
341        args.reserve(1 + mAccessibleInputItems.size());
342        args.push_back(getHandle());
343        args.insert(args.end(), mAccessibleInputItems.begin(), mAccessibleInputItems.end());
344        b->CreateCall(mDoBlockMethod, args);
345    }
346}
347
348/** ------------------------------------------------------------------------------------------------------------- *
349 * @brief makePopCountRateGraph
350 *
351 * Returns a graph with a vertex for each input port and a directed edge annotated with the appropriate RateId
352 ** ------------------------------------------------------------------------------------------------------------- */
353
354using PopCountRateGraph = adjacency_list<hash_setS, vecS, directedS>;
355using Key = std::reference_wrapper<const std::string>;
356using PopCountRateMap = boost::container::flat_map<Key, PopCountRateGraph::vertex_descriptor, std::less<std::string>>;
357
358#define POP_COUNT 0
359#define NEGATED_POP_COUNT 1
360
361void checkPopCount(const Binding & binding, PopCountRateGraph & G, const PopCountRateMap & M) {
362    const ProcessingRate & rate = binding.getRate();
363    if (LLVM_UNLIKELY(rate.isPopCount() || rate.isNegatedPopCount())) {
364        const auto f = M.find(rate.getReference());
365        assert ("pop count rate cannot refer to an output stream" && f != M.end());
366        const auto targetType = (rate.getKind() == RateId::PopCount) ? POP_COUNT : NEGATED_POP_COUNT;
367        add_edge(f->second, targetType, G);
368    }
369}
370
371inline PopCountRateGraph makePopCountRateGraph(const Bindings & inputStreamSets, const Bindings & outputStreamSets) {
372    const auto n = inputStreamSets.size();
373    PopCountRateGraph G(n);
374    PopCountRateMap M;
375    M.reserve(n);
376    for (unsigned i = 0; i < n; ++i) {
377        M.emplace(inputStreamSets[i].getName(), i);
378        checkPopCount(inputStreamSets[i], G, M);
379    }
380    const auto m = outputStreamSets.size();
381    for (unsigned i = 0; i < m; ++i) {
382        checkPopCount(outputStreamSets[i], G, M);
383    }
384    return G;
385}
386
387/** ------------------------------------------------------------------------------------------------------------- *
388 * @brief annotateInputBindingsWithPopCountArrayAttributes
389 ** ------------------------------------------------------------------------------------------------------------- */
390void annotateInputBindingsWithPopCountArrayAttributes(Bindings & inputStreamSets, const Bindings & outputStreamSets) {
391    if (LLVM_LIKELY(inputStreamSets.size() > 1)) {
392        const auto G = makePopCountRateGraph(inputStreamSets, outputStreamSets);
393        for (const auto e : make_iterator_range(edges(G))) {
394            const auto i = source(e, G);
395            assert (i < inputStreamSets.size());
396            Binding & input = inputStreamSets[i];
397            switch (target(e, G)) {
398                case POP_COUNT:
399                    input.addAttribute(RequiresPopCountArray());
400                    break;
401                case NEGATED_POP_COUNT:
402                    input.addAttribute(RequiresNegatedPopCountArray());
403                    break;
404                default: llvm_unreachable("unhandled pop count type!");
405            }
406        }
407    }
408}
409
410
411// CONSTRUCTOR
412BlockOrientedKernel::BlockOrientedKernel(std::string && kernelName,
413                                         Bindings && stream_inputs,
414                                         Bindings && stream_outputs,
415                                         Bindings && scalar_parameters,
416                                         Bindings && scalar_outputs,
417                                         Bindings && internal_scalars)
418: MultiBlockKernel(std::move(kernelName),
419                   std::move(stream_inputs),
420                   std::move(stream_outputs),
421                   std::move(scalar_parameters),
422                   std::move(scalar_outputs),
423                   std::move(internal_scalars))
424, mDoBlockMethod(nullptr)
425, mStrideLoopBody(nullptr)
426, mStrideLoopBranch(nullptr)
427, mStrideLoopTarget(nullptr)
428, mStrideBlockIndex(nullptr) {
429    annotateInputBindingsWithPopCountArrayAttributes(mInputStreamSets, mOutputStreamSets);
430}
431
432
433}
Note: See TracBrowser for help on using the repository browser.