source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5772

Last change on this file since 5772 was 5757, checked in by nmedfort, 19 months ago

Bug fixes + more assertions to prevent similar errors.

File size: 20.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11
12namespace llvm { class BasicBlock; }
13namespace llvm { class Constant; }
14namespace llvm { class Function; }
15namespace llvm { class IntegerType; }
16namespace llvm { class IndirectBrInst; }
17namespace llvm { class PHINode; }
18namespace llvm { class LoadInst; }
19namespace llvm { class Type; }
20namespace llvm { class Value; }
21namespace parabix { class StreamSetBuffer; }
22
23namespace kernel {
24   
25class KernelBuilder;
26
27class Kernel : public KernelInterface {
28    friend class KernelBuilder;
29protected:
30
31    static const std::string DO_BLOCK_SUFFIX;
32    static const std::string FINAL_BLOCK_SUFFIX;
33    static const std::string MULTI_BLOCK_SUFFIX;
34    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
35    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
36    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
37    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
38    static const std::string TERMINATION_SIGNAL;
39    static const std::string BUFFER_PTR_SUFFIX;
40    static const std::string CONSUMER_SUFFIX;
41    static const std::string CYCLECOUNT_SCALAR;
42
43public:
44   
45    enum class Port { Input, Output };
46    using StreamPort = std::pair<Port, unsigned>;
47    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
48    using KernelFieldMap = boost::container::flat_map<std::string, unsigned>;
49    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
50
51    // Kernel Signatures and Module IDs
52    //
53    // A kernel signature uniquely identifies a kernel and its full functionality.
54    // In the event that a particular kernel instance is to be generated and compiled
55    // to produce object code, and we have a cached kernel object code instance with
56    // the same signature and targetting the same IDISA architecture, then the cached
57    // object code may safely be used to avoid recompilation.
58    //
59    // A kernel signature is a byte string of arbitrary length.
60    //
61    // Kernel developers should take responsibility for designing appropriate signature
62    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
63    // based on the semantics of the kernel.
64    //
65    // If no other mechanism is available, the default makeSignature() method uses the
66    // full LLVM IR (before optimization) of the kernel instance.
67    //
68    // A kernel Module ID is short string that is used as a name for a particular kernel
69    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
70    // instances and so should be highly likely to uniquely identify a kernel instance.
71    //
72    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
73    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
74    //
75
76    //
77    // Kernel builder subtypes define their logic of kernel construction
78    // in terms of 3 virtual methods for
79    // (a) preparing the Kernel state data structure
80    // (c) defining the logic of the finalBlock function.
81    //
82    // Note: the kernel state data structure must only be finalized after
83    // all scalar fields have been added.   If there are no fields to
84    // be added, the default method for preparing kernel state may be used.
85
86
87    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
88
89    // Can the module ID itself serve as the unique signature?
90    virtual bool hasSignature() const { return true; }
91
92    bool isCachable() const override { return false; }
93
94    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
95
96    llvm::Module * setModule(llvm::Module * const module);
97
98    llvm::Module * makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb);
99
100    llvm::Module * getModule() const {
101        return mModule;
102    }
103
104    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
105   
106    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
107
108    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
109
110    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
111
112    bool hasNoTerminateAttribute() const {
113        return mNoTerminateAttribute;
114    }
115
116    StreamPort getStreamPort(const std::string & name) const;
117
118    const Binding & getBinding(const std::string & name) const;
119
120    ProcessingRate::RateValue getLowerBound(const ProcessingRate &rate) const;
121
122    ProcessingRate::RateValue getUpperBound(const ProcessingRate & rate) const;
123
124    const StreamSetBuffers & getStreamSetInputBuffers() const {
125        return mStreamSetInputBuffers;
126    }
127
128    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
129        assert (i < mStreamSetInputBuffers.size());
130        assert (mStreamSetInputBuffers[i]);
131        return mStreamSetInputBuffers[i];
132    }
133
134    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
135        const auto port = getStreamPort(name);
136        assert (port.first == Port::Input);
137        return getStreamSetInputBuffer(port.second);
138    }
139
140    const StreamSetBuffers & getStreamSetOutputBuffers() const {
141        return mStreamSetOutputBuffers;
142    }
143
144    const Binding & getStreamInput(const unsigned i) const {
145        return KernelInterface::getStreamInput(i);
146    }
147
148    const Binding & getStreamInput(const std::string & name) const {
149        const auto port = getStreamPort(name);
150        assert (port.first == Port::Input);
151        return KernelInterface::getStreamInput(port.second);
152    }
153
154    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
155        assert (i < mStreamSetOutputBuffers.size());
156        assert (mStreamSetOutputBuffers[i]);
157        return mStreamSetOutputBuffers[i];
158    }
159
160    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
161        const auto port = getStreamPort(name);
162        assert (port.first == Port::Output);
163        return getStreamSetOutputBuffer(port.second);
164    }
165
166    const Binding & getStreamOutput(const unsigned i) const {
167        return KernelInterface::getStreamOutput(i);
168    }
169
170    const Binding & getStreamOutput(const std::string & name) const {
171        const auto port = getStreamPort(name);
172        assert (port.first == Port::Output);
173        return KernelInterface::getStreamOutput(port.second);
174    }
175   
176    // Kernels typically perform block-at-a-time processing, but some kernels may require
177    // a different stride.   In the case of multiblock kernels, the stride attribute
178    // determines the number of minimum number of items that will be provided to the kernel
179    // on each doMultiBlock call.
180    //
181   
182    unsigned getStride() const { return mStride; }
183   
184    virtual ~Kernel() = 0;
185
186    void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
187
188    void prepareCachedKernel(const std::unique_ptr<KernelBuilder> & idb);
189
190    std::string getCacheName(const std::unique_ptr<KernelBuilder> & idb) const;
191
192protected:
193
194    virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> & idb) { }
195
196    void getDoSegmentFunctionArguments(const std::vector<llvm::Value *> & availItems) const;
197
198    // Constructor
199    Kernel(std::string && kernelName, Bindings && stream_inputs,
200          Bindings && stream_outputs,
201          Bindings && scalar_parameters,
202          Bindings && scalar_outputs,
203          Bindings && internal_scalars);
204
205    void setNoTerminateAttribute(const bool noTerminate = true) {
206        mNoTerminateAttribute = noTerminate;
207    }
208
209    llvm::Value * getPrincipalItemCount() const {
210        return mAvailablePrincipalItemCount;
211    }
212
213    unsigned getScalarIndex(const std::string & name) const;
214
215    void prepareStreamSetNameMap();
216   
217    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
218
219    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
220   
221    virtual void generateKernelMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
222
223    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
224
225    // Add an additional scalar field to the KernelState struct.
226    // Must occur before any call to addKernelDeclarations or createKernelModule.
227    unsigned addScalar(llvm::Type * type, const std::string & name);
228
229    unsigned addUnnamedScalar(llvm::Type * type);
230
231    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
232
233    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
234
235    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
236
237    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
238        unsigned index; Port port;
239        std::tie(port, index) = getStreamPort(name);
240        if (port == Port::Input) {
241            assert (index < mStreamSetInputBuffers.size());
242            assert (mStreamSetInputBuffers[index]);
243            return mStreamSetInputBuffers[index];
244        } else {
245            assert (index < mStreamSetOutputBuffers.size());
246            assert (mStreamSetOutputBuffers[index]);
247            return mStreamSetOutputBuffers[index];
248        }
249    }
250
251    void setStride(unsigned stride) { mStride = stride; }
252
253private:
254
255    void addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & idb);
256
257    llvm::Value * getStreamSetInputAddress(const std::string & name) const {
258        const Kernel::StreamPort p = getStreamPort(name);
259        assert (p.first == Port::Input);
260        return mStreamSetInputBaseAddress[p.second];
261    }
262
263    llvm::Value * getStreamSetOutputAddress(const std::string & name) const {
264        const Kernel::StreamPort p = getStreamPort(name);
265        assert (p.first == Port::Output);
266        return mStreamSetOutputBaseAddress[p.second];
267    }
268
269    llvm::Value * getAvailableItemCount(const unsigned i) const {
270        return mAvailableItemCount[i];
271    }
272
273    void normalizeStreamProcessingRates();
274
275    bool normalizeRelativeToFixedProcessingRate(const ProcessingRate & base, ProcessingRate & toUpdate);
276
277protected:
278
279    llvm::Function *                    mCurrentMethod;
280    llvm::Value *                       mAvailablePrincipalItemCount;
281    bool                                mNoTerminateAttribute;
282    bool                                mIsGenerated;
283    unsigned                            mStride;
284    llvm::Value *                       mIsFinal;
285    llvm::Value *                       mOutputScalarResult;
286    std::vector<llvm::Value *>          mAvailableItemCount;
287
288    KernelFieldMap                      mKernelFieldMap;
289    std::vector<llvm::Type *>           mKernelFields;
290
291    StreamMap                           mStreamMap;
292
293    StreamSetBuffers                    mStreamSetInputBuffers;
294    std::vector<llvm::Value *>          mStreamSetInputBaseAddress;
295    StreamSetBuffers                    mStreamSetOutputBuffers;
296    std::vector<llvm::Value *>          mStreamSetOutputBaseAddress;
297};
298
299using Kernels = std::vector<Kernel *>;
300
301class SegmentOrientedKernel : public Kernel {
302protected:
303
304    SegmentOrientedKernel(std::string && kernelName,
305                          Bindings && stream_inputs,
306                          Bindings && stream_outputs,
307                          Bindings && scalar_parameters,
308                          Bindings && scalar_outputs,
309                          Bindings && internal_scalars);
310protected:
311
312    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
313
314    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) = 0;
315
316};
317
318/*
319The Multi-Block Kernel Builder
320------------------------------
321
322The Multi-Block Kernel Builder is designed to simplify the programming of
323efficient kernels with possibly variable and/or nonaligned output, subject to
324exact or MaxRatio processing constraints.   The following restrictions apply.
325
326#.  The input consists of one or more stream sets, the first of which is
327    known as the principal input stream set.
328
329#.  If there is more than one input stream set, the additional stream sets
330    are first classified as having either a derived processing rate or
331    a variable processing rate.   Stream sets with a derived processing rate
332    have a processing rate defined with respect to the input stream set of one
333    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
334    declared without a processing rate attribute have the FixedRate(1) attribute
335    by default and therefore satisfy this constraint.  All other processing rate
336    types are classified as variable rate.
337
338#.  All output stream sets must be declared with processing rate attributes
339    of one of the following types:
340    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
341    *  FixedRate with respect to some other output stream set.
342
343    When using the Multi-Block Kernel Builder to program a new type of kernel,
344    the programmer must implement the generateDoMultiBlockMethod for normal
345    multi-block processing according to the requirements below, as well as
346    providing for special final block processing, if necessary.
347
348#.  The doMultiBlockMethod will be called with the following parameters:
349    * the number of items of the principal input stream to process (itemsToDo),
350    * additional items available parameters for each additional input stream set
351      that is classified as a variable rate stream set
352    * pointers to linear contiguous buffer areas for each of the input stream sets, and
353    * pointers to linear contiguous output buffer areas for each of the output stream sets.
354
355    Notes:
356    * if the kernel has a Lookahead dependency declared on any input stream set, then
357      there will be two buffer pointers for that stream set, one for accessing stream set
358      items without lookahead and one for accessing the items with lookahead.   
359    * pointers are to the beginning of the block corresponding to the
360      processedItemCount or producedItemCount of the given stream set.
361    * the base type of each pointer is the StreamSetBlockType of that streamset
362
363#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
364    processed under the following simplifying assumptions.
365    * the number of itemsToDo will either be an exact multiple of the kernel stride,
366      or, for processing the final block, a value less than the kernel stride
367    * the input buffer of the principal stream set and all input buffers of stream sets
368      with derived processing rates will be safe to access and have data available in
369      accord with their processing rates based on the given number of itemsToDo
370      of the principal input stream set; no further bounds checking is needed. 
371    * input buffers of stream sets with MaxRatio attributes will be safe to access,
372      but will only have valid data as specified by the available items parameter for
373      that stream set.
374    * the kernel programmer is responsible for safe access and bounds checking for any
375      input stream set classified as Unknown rate.   No temporary buffers are used
376      for such stream sets.
377    * all output buffers will be safe to access and have space available
378      for the given maximum output generation rates based on the given number
379      of itemsToDo of the principal input stream set; no further bounds checking
380      is needed.
381    * for final block processing, all input buffers will be extended to be safely
382      treated as containing data corresponding to a full block of the principal
383      input stream set, with the actual data in each buffer padded with null values
384      beyond the end of input.  Similarly, all output buffers will contain space
385      sufficient for the maximum output that can be generated for a full block of
386      input processing.
387    * input and output pointers will be typed to allow convenient and logical access
388      to corresponding streams based on their declared stream set type and processing rate.
389    * for any input pointer p, a GEP instruction with a single int32 index i
390      will produce a pointer to the buffer position corresponding to the ith block of the
391      input stream set, relative to the initial block based on the processedItemCount.
392    * for any output stream set declared with a Fixed or Add1 processing rate with respect
393      to the principal input stream set, a GEP instruction with a single int32 index i
394      will produce a pointer to the buffer position corresponding to the ith block of the
395      stream set, relative to the initial block based on the producedItemCount.
396
397#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
398    processed and produced item counts are updated for all stream sets that have exact
399    processing rate attributes.   Programmers are responsible for updating the counts
400    of any stream set declared with a variable attribute (MaxRatio or Unknown).
401
402#.  An important caveat is that buffer areas may change arbitrarily between
403    calls to the doMultiBlockMethod.   In no case should a kernel store a
404    buffer pointer in its internal state.   Furthermore a kernel must not make
405    any assumptions about the accessibility of stream set data outside of the
406    processing range outside of the block boundaries associated with the given itemsToDo.
407*/
408
409class MultiBlockKernel : public Kernel {
410protected:
411
412    MultiBlockKernel(std::string && kernelName,
413                     Bindings && stream_inputs,
414                     Bindings && stream_outputs,
415                     Bindings && scalar_parameters,
416                     Bindings && scalar_outputs,
417                     Bindings && internal_scalars);
418
419    // Each multi-block kernel subtype must provide its own logic for handling
420    // doMultiBlock calls, subject to the requirements laid out above.
421    // The generateMultiBlockLogic must be written to generate this logic, given
422    // a created but empty function.  Upon entry to generateMultiBlockLogic,
423    // the builder insertion point will be set to the entry block; upone
424    // exit the RetVoid instruction will be added to complete the method.
425    //
426    virtual llvm::Value * generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) = 0;
427
428private:
429
430    // Given a kernel subtype with an appropriate interface, the generateDoSegment
431    // method of the multi-block kernel builder makes all the necessary arrangements
432    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
433    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
434
435    unsigned getItemAlignment(const Binding & binding) const;
436
437    bool isTransitivelyUnknownRate(const ProcessingRate & rate) const;
438
439    llvm::Value * getStrideSize(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate);
440
441    bool requiresCopyBack(const ProcessingRate & rate) const;
442
443    void reviseFinalProducedItemCounts(const std::unique_ptr<KernelBuilder> & b);
444
445protected:
446
447    std::vector<llvm::Value *>      mInitialAvailableItemCount;
448    std::vector<llvm::Value *>      mInitialProcessedItemCount;
449    std::vector<llvm::Value *>      mInitialProducedItemCount;
450
451};
452
453
454class BlockOrientedKernel : public MultiBlockKernel {
455protected:
456
457    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & b);
458
459    // Each kernel builder subtype must provide its own logic for generating
460    // doBlock calls.
461    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) = 0;
462
463    // Each kernel builder subtypre must also specify the logic for processing the
464    // final block of stream data, if there is any special processing required
465    // beyond simply calling the doBlock function.   In the case that the final block
466    // processing may be trivially implemented by dispatching to the doBlock method
467    // without additional preparation, the default generateFinalBlockMethod need
468    // not be overridden.
469
470    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
471
472    BlockOrientedKernel(std::string && kernelName,
473                        Bindings && stream_inputs,
474                        Bindings && stream_outputs,
475                        Bindings && scalar_parameters,
476                        Bindings && scalar_outputs,
477                        Bindings && internal_scalars);
478
479private:
480
481    llvm::Value * generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
482
483    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & b);
484
485    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
486
487    llvm::Value * getRemainingItems(const std::unique_ptr<KernelBuilder> & b);
488
489private:
490
491    llvm::Function *            mDoBlockMethod;
492    llvm::BasicBlock *          mStrideLoopBody;
493    llvm::IndirectBrInst *      mStrideLoopBranch;
494    llvm::PHINode *             mStrideLoopTarget;
495    llvm::PHINode *             mStrideBlockIndex;
496};
497
498}
499
500#endif
Note: See TracBrowser for help on using the repository browser.