source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5755

Last change on this file since 5755 was 5755, checked in by nmedfort, 16 months ago

Bug fixes and simplified MultiBlockKernel? logic

File size: 20.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11
12namespace llvm { class BasicBlock; }
13namespace llvm { class Constant; }
14namespace llvm { class Function; }
15namespace llvm { class IntegerType; }
16namespace llvm { class IndirectBrInst; }
17namespace llvm { class PHINode; }
18namespace llvm { class LoadInst; }
19namespace llvm { class Type; }
20namespace llvm { class Value; }
21namespace parabix { class StreamSetBuffer; }
22
23namespace kernel {
24   
25class KernelBuilder;
26
27class Kernel : public KernelInterface {
28    friend class KernelBuilder;
29protected:
30
31    static const std::string DO_BLOCK_SUFFIX;
32    static const std::string FINAL_BLOCK_SUFFIX;
33    static const std::string MULTI_BLOCK_SUFFIX;
34    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
35    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
36    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
37    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
38    static const std::string TERMINATION_SIGNAL;
39    static const std::string BUFFER_PTR_SUFFIX;
40    static const std::string CONSUMER_SUFFIX;
41    static const std::string CYCLECOUNT_SCALAR;
42
43public:
44   
45    enum class Port { Input, Output };
46    using StreamPort = std::pair<Port, unsigned>;
47    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
48    using KernelFieldMap = boost::container::flat_map<std::string, unsigned>;
49    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
50
51    // Kernel Signatures and Module IDs
52    //
53    // A kernel signature uniquely identifies a kernel and its full functionality.
54    // In the event that a particular kernel instance is to be generated and compiled
55    // to produce object code, and we have a cached kernel object code instance with
56    // the same signature and targetting the same IDISA architecture, then the cached
57    // object code may safely be used to avoid recompilation.
58    //
59    // A kernel signature is a byte string of arbitrary length.
60    //
61    // Kernel developers should take responsibility for designing appropriate signature
62    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
63    // based on the semantics of the kernel.
64    //
65    // If no other mechanism is available, the default makeSignature() method uses the
66    // full LLVM IR (before optimization) of the kernel instance.
67    //
68    // A kernel Module ID is short string that is used as a name for a particular kernel
69    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
70    // instances and so should be highly likely to uniquely identify a kernel instance.
71    //
72    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
73    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
74    //
75
76    //
77    // Kernel builder subtypes define their logic of kernel construction
78    // in terms of 3 virtual methods for
79    // (a) preparing the Kernel state data structure
80    // (c) defining the logic of the finalBlock function.
81    //
82    // Note: the kernel state data structure must only be finalized after
83    // all scalar fields have been added.   If there are no fields to
84    // be added, the default method for preparing kernel state may be used.
85
86
87    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
88
89    // Can the module ID itself serve as the unique signature?
90    virtual bool hasSignature() const { return true; }
91
92    bool isCachable() const override { return false; }
93
94    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
95
96    llvm::Module * setModule(llvm::Module * const module);
97
98    llvm::Module * makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb);
99
100    llvm::Module * getModule() const {
101        return mModule;
102    }
103
104    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
105   
106    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
107
108    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
109
110    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
111
112    bool hasNoTerminateAttribute() const {
113        return mNoTerminateAttribute;
114    }
115
116    StreamPort getStreamPort(const std::string & name) const;
117
118    const Binding & getBinding(const std::string & name) const;
119
120    const StreamSetBuffers & getStreamSetInputBuffers() const {
121        return mStreamSetInputBuffers;
122    }
123
124    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
125        assert (i < mStreamSetInputBuffers.size());
126        assert (mStreamSetInputBuffers[i]);
127        return mStreamSetInputBuffers[i];
128    }
129
130    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
131        const auto port = getStreamPort(name);
132        assert (port.first == Port::Input);
133        return getStreamSetInputBuffer(port.second);
134    }
135
136    const StreamSetBuffers & getStreamSetOutputBuffers() const {
137        return mStreamSetOutputBuffers;
138    }
139
140    const Binding & getStreamInput(const unsigned i) const {
141        return KernelInterface::getStreamInput(i);
142    }
143
144    const Binding & getStreamInput(const std::string & name) const {
145        const auto port = getStreamPort(name);
146        assert (port.first == Port::Input);
147        return KernelInterface::getStreamInput(port.second);
148    }
149
150    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
151        assert (i < mStreamSetOutputBuffers.size());
152        assert (mStreamSetOutputBuffers[i]);
153        return mStreamSetOutputBuffers[i];
154    }
155
156    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
157        const auto port = getStreamPort(name);
158        assert (port.first == Port::Output);
159        return getStreamSetOutputBuffer(port.second);
160    }
161
162    const Binding & getStreamOutput(const unsigned i) const {
163        return KernelInterface::getStreamOutput(i);
164    }
165
166    const Binding & getStreamOutput(const std::string & name) const {
167        const auto port = getStreamPort(name);
168        assert (port.first == Port::Output);
169        return KernelInterface::getStreamOutput(port.second);
170    }
171   
172    // Kernels typically perform block-at-a-time processing, but some kernels may require
173    // a different stride.   In the case of multiblock kernels, the stride attribute
174    // determines the number of minimum number of items that will be provided to the kernel
175    // on each doMultiBlock call.
176    //
177   
178    unsigned getStride() const { return mStride; }
179   
180    virtual ~Kernel() = 0;
181
182    void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
183
184    void prepareCachedKernel(const std::unique_ptr<KernelBuilder> & idb);
185
186    std::string getCacheName(const std::unique_ptr<KernelBuilder> & idb) const;
187
188protected:
189
190    virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> & idb) { }
191
192    void getDoSegmentFunctionArguments(const std::vector<llvm::Value *> & availItems) const;
193
194    // Constructor
195    Kernel(std::string && kernelName, Bindings && stream_inputs,
196          Bindings && stream_outputs,
197          Bindings && scalar_parameters,
198          Bindings && scalar_outputs,
199          Bindings && internal_scalars);
200
201    void setNoTerminateAttribute(const bool noTerminate = true) {
202        mNoTerminateAttribute = noTerminate;
203    }
204
205    llvm::Value * getPrincipalItemCount() const {
206        return mAvailablePrincipalItemCount;
207    }
208
209    unsigned getScalarIndex(const std::string & name) const;
210
211    void prepareStreamSetNameMap();
212   
213    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
214
215    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
216   
217    virtual void generateKernelMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
218
219    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
220
221    // Add an additional scalar field to the KernelState struct.
222    // Must occur before any call to addKernelDeclarations or createKernelModule.
223    unsigned addScalar(llvm::Type * type, const std::string & name);
224
225    unsigned addUnnamedScalar(llvm::Type * type);
226
227    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
228
229    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
230
231    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
232
233    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
234        unsigned index; Port port;
235        std::tie(port, index) = getStreamPort(name);
236        if (port == Port::Input) {
237            assert (index < mStreamSetInputBuffers.size());
238            assert (mStreamSetInputBuffers[index]);
239            return mStreamSetInputBuffers[index];
240        } else {
241            assert (index < mStreamSetOutputBuffers.size());
242            assert (mStreamSetOutputBuffers[index]);
243            return mStreamSetOutputBuffers[index];
244        }
245    }
246
247    void setStride(unsigned stride) { mStride = stride; }
248
249private:
250
251    void addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & idb);
252
253    llvm::Value * getStreamSetInputAddress(const std::string & name) const {
254        const Kernel::StreamPort p = getStreamPort(name);
255        assert (p.first == Port::Input);
256        return mStreamSetInputBaseAddress[p.second];
257    }
258
259    llvm::Value * getStreamSetOutputAddress(const std::string & name) const {
260        const Kernel::StreamPort p = getStreamPort(name);
261        assert (p.first == Port::Output);
262        return mStreamSetOutputBaseAddress[p.second];
263    }
264
265    llvm::Value * getAvailableItemCount(const unsigned i) const {
266        return mAvailableItemCount[i];
267    }
268
269    void normalizeStreamProcessingRates();
270
271    bool normalizeRelativeToFixedProcessingRate(const ProcessingRate & base, ProcessingRate & toUpdate);
272
273protected:
274
275    llvm::Function *                    mCurrentMethod;
276    llvm::Value *                       mAvailablePrincipalItemCount;
277    bool                                mNoTerminateAttribute;
278    bool                                mIsGenerated;
279    unsigned                            mStride;
280    llvm::Value *                       mIsFinal;
281    llvm::Value *                       mOutputScalarResult;
282    std::vector<llvm::Value *>          mAvailableItemCount;
283
284    KernelFieldMap                      mKernelFieldMap;
285    std::vector<llvm::Type *>           mKernelFields;
286
287    StreamMap                           mStreamMap;
288
289    StreamSetBuffers                    mStreamSetInputBuffers;
290    std::vector<llvm::Value *>          mStreamSetInputBaseAddress;
291    StreamSetBuffers                    mStreamSetOutputBuffers;
292    std::vector<llvm::Value *>          mStreamSetOutputBaseAddress;
293};
294
295using Kernels = std::vector<Kernel *>;
296
297class SegmentOrientedKernel : public Kernel {
298protected:
299
300    SegmentOrientedKernel(std::string && kernelName,
301                          Bindings && stream_inputs,
302                          Bindings && stream_outputs,
303                          Bindings && scalar_parameters,
304                          Bindings && scalar_outputs,
305                          Bindings && internal_scalars);
306protected:
307
308    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
309
310    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) = 0;
311
312};
313
314/*
315The Multi-Block Kernel Builder
316------------------------------
317
318The Multi-Block Kernel Builder is designed to simplify the programming of
319efficient kernels with possibly variable and/or nonaligned output, subject to
320exact or MaxRatio processing constraints.   The following restrictions apply.
321
322#.  The input consists of one or more stream sets, the first of which is
323    known as the principal input stream set.
324
325#.  If there is more than one input stream set, the additional stream sets
326    are first classified as having either a derived processing rate or
327    a variable processing rate.   Stream sets with a derived processing rate
328    have a processing rate defined with respect to the input stream set of one
329    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
330    declared without a processing rate attribute have the FixedRate(1) attribute
331    by default and therefore satisfy this constraint.  All other processing rate
332    types are classified as variable rate.
333
334#.  All output stream sets must be declared with processing rate attributes
335    of one of the following types:
336    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
337    *  FixedRate with respect to some other output stream set.
338
339    When using the Multi-Block Kernel Builder to program a new type of kernel,
340    the programmer must implement the generateDoMultiBlockMethod for normal
341    multi-block processing according to the requirements below, as well as
342    providing for special final block processing, if necessary.
343
344#.  The doMultiBlockMethod will be called with the following parameters:
345    * the number of items of the principal input stream to process (itemsToDo),
346    * additional items available parameters for each additional input stream set
347      that is classified as a variable rate stream set
348    * pointers to linear contiguous buffer areas for each of the input stream sets, and
349    * pointers to linear contiguous output buffer areas for each of the output stream sets.
350
351    Notes:
352    * if the kernel has a Lookahead dependency declared on any input stream set, then
353      there will be two buffer pointers for that stream set, one for accessing stream set
354      items without lookahead and one for accessing the items with lookahead.   
355    * pointers are to the beginning of the block corresponding to the
356      processedItemCount or producedItemCount of the given stream set.
357    * the base type of each pointer is the StreamSetBlockType of that streamset
358
359#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
360    processed under the following simplifying assumptions.
361    * the number of itemsToDo will either be an exact multiple of the kernel stride,
362      or, for processing the final block, a value less than the kernel stride
363    * the input buffer of the principal stream set and all input buffers of stream sets
364      with derived processing rates will be safe to access and have data available in
365      accord with their processing rates based on the given number of itemsToDo
366      of the principal input stream set; no further bounds checking is needed. 
367    * input buffers of stream sets with MaxRatio attributes will be safe to access,
368      but will only have valid data as specified by the available items parameter for
369      that stream set.
370    * the kernel programmer is responsible for safe access and bounds checking for any
371      input stream set classified as Unknown rate.   No temporary buffers are used
372      for such stream sets.
373    * all output buffers will be safe to access and have space available
374      for the given maximum output generation rates based on the given number
375      of itemsToDo of the principal input stream set; no further bounds checking
376      is needed.
377    * for final block processing, all input buffers will be extended to be safely
378      treated as containing data corresponding to a full block of the principal
379      input stream set, with the actual data in each buffer padded with null values
380      beyond the end of input.  Similarly, all output buffers will contain space
381      sufficient for the maximum output that can be generated for a full block of
382      input processing.
383    * input and output pointers will be typed to allow convenient and logical access
384      to corresponding streams based on their declared stream set type and processing rate.
385    * for any input pointer p, a GEP instruction with a single int32 index i
386      will produce a pointer to the buffer position corresponding to the ith block of the
387      input stream set, relative to the initial block based on the processedItemCount.
388    * for any output stream set declared with a Fixed or Add1 processing rate with respect
389      to the principal input stream set, a GEP instruction with a single int32 index i
390      will produce a pointer to the buffer position corresponding to the ith block of the
391      stream set, relative to the initial block based on the producedItemCount.
392
393#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
394    processed and produced item counts are updated for all stream sets that have exact
395    processing rate attributes.   Programmers are responsible for updating the counts
396    of any stream set declared with a variable attribute (MaxRatio or Unknown).
397
398#.  An important caveat is that buffer areas may change arbitrarily between
399    calls to the doMultiBlockMethod.   In no case should a kernel store a
400    buffer pointer in its internal state.   Furthermore a kernel must not make
401    any assumptions about the accessibility of stream set data outside of the
402    processing range outside of the block boundaries associated with the given itemsToDo.
403*/
404
405class MultiBlockKernel : public Kernel {
406protected:
407
408    MultiBlockKernel(std::string && kernelName,
409                     Bindings && stream_inputs,
410                     Bindings && stream_outputs,
411                     Bindings && scalar_parameters,
412                     Bindings && scalar_outputs,
413                     Bindings && internal_scalars);
414
415    // Each multi-block kernel subtype must provide its own logic for handling
416    // doMultiBlock calls, subject to the requirements laid out above.
417    // The generateMultiBlockLogic must be written to generate this logic, given
418    // a created but empty function.  Upon entry to generateMultiBlockLogic,
419    // the builder insertion point will be set to the entry block; upone
420    // exit the RetVoid instruction will be added to complete the method.
421    //
422    virtual llvm::Value * generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) = 0;
423
424private:
425
426    // Given a kernel subtype with an appropriate interface, the generateDoSegment
427    // method of the multi-block kernel builder makes all the necessary arrangements
428    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
429    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
430
431    unsigned getItemAlignment(const Binding & binding) const;
432
433    ProcessingRate::RateValue getLowerBound(const ProcessingRate &rate) const;
434
435    ProcessingRate::RateValue getUpperBound(const ProcessingRate & rate) const;
436
437    bool isTransitivelyUnknownRate(const ProcessingRate & rate) const;
438
439    llvm::Value * getStrideSize(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate);
440
441    bool requiresCopyBack(const ProcessingRate & rate) const;
442
443    void reviseFinalProducedItemCounts(const std::unique_ptr<KernelBuilder> & b);
444
445protected:
446
447    std::vector<llvm::Value *>      mInitialAvailableItemCount;
448    std::vector<llvm::Value *>      mInitialProcessedItemCount;
449    std::vector<llvm::Value *>      mInitialProducedItemCount;
450
451};
452
453
454class BlockOrientedKernel : public MultiBlockKernel {
455protected:
456
457    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & b);
458
459    // Each kernel builder subtype must provide its own logic for generating
460    // doBlock calls.
461    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) = 0;
462
463    // Each kernel builder subtypre must also specify the logic for processing the
464    // final block of stream data, if there is any special processing required
465    // beyond simply calling the doBlock function.   In the case that the final block
466    // processing may be trivially implemented by dispatching to the doBlock method
467    // without additional preparation, the default generateFinalBlockMethod need
468    // not be overridden.
469
470    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
471
472    BlockOrientedKernel(std::string && kernelName,
473                        Bindings && stream_inputs,
474                        Bindings && stream_outputs,
475                        Bindings && scalar_parameters,
476                        Bindings && scalar_outputs,
477                        Bindings && internal_scalars);
478
479private:
480
481    llvm::Value * generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
482
483    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & b);
484
485    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
486
487    llvm::Value * getRemainingItems(const std::unique_ptr<KernelBuilder> & b);
488
489private:
490
491    llvm::Function *            mDoBlockMethod;
492    llvm::BasicBlock *          mStrideLoopBody;
493    llvm::IndirectBrInst *      mStrideLoopBranch;
494    llvm::PHINode *             mStrideLoopTarget;
495    llvm::PHINode *             mStrideBlockIndex;
496};
497
498}
499
500#endif
Note: See TracBrowser for help on using the repository browser.