source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5706

Last change on this file since 5706 was 5706, checked in by nmedfort, 23 months ago

First stage of MultiBlockKernel? and pipeline restructuring

File size: 19.5 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11
12namespace llvm { class BasicBlock; }
13namespace llvm { class Function; }
14namespace llvm { class IntegerType; }
15namespace llvm { class IndirectBrInst; }
16namespace llvm { class PHINode; }
17namespace llvm { class LoadInst; }
18namespace llvm { class Type; }
19namespace llvm { class Value; }
20namespace parabix { class StreamSetBuffer; }
21
22namespace kernel {
23   
24class KernelBuilder;
25
26class Kernel : public KernelInterface {
27    friend class KernelBuilder;
28public:
29    enum class Port { Input, Output };
30
31    using StreamPort = std::pair<Port, unsigned>;
32
33protected:
34
35    using KernelMap = boost::container::flat_map<std::string, unsigned>;
36    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
37    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
38    using Kernels = std::vector<Kernel *>;
39
40    static const std::string DO_BLOCK_SUFFIX;
41    static const std::string FINAL_BLOCK_SUFFIX;
42    static const std::string MULTI_BLOCK_SUFFIX;
43    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
44    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
45    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
46    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
47    static const std::string TERMINATION_SIGNAL;
48    static const std::string BUFFER_PTR_SUFFIX;
49    static const std::string CONSUMER_SUFFIX;
50    static const std::string CYCLECOUNT_SCALAR;
51
52public:
53   
54    // Kernel Signatures and Module IDs
55    //
56    // A kernel signature uniquely identifies a kernel and its full functionality.
57    // In the event that a particular kernel instance is to be generated and compiled
58    // to produce object code, and we have a cached kernel object code instance with
59    // the same signature and targetting the same IDISA architecture, then the cached
60    // object code may safely be used to avoid recompilation.
61    //
62    // A kernel signature is a byte string of arbitrary length.
63    //
64    // Kernel developers should take responsibility for designing appropriate signature
65    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
66    // based on the semantics of the kernel. 
67    //
68    // If no other mechanism is available, the default makeSignature() method uses the
69    // full LLVM IR (before optimization) of the kernel instance.
70    //
71    // A kernel Module ID is short string that is used as a name for a particular kernel
72    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
73    // instances and so should be highly likely to uniquely identify a kernel instance.
74    //
75    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
76    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
77    //
78
79    //
80    // Kernel builder subtypes define their logic of kernel construction
81    // in terms of 3 virtual methods for
82    // (a) preparing the Kernel state data structure
83    // (c) defining the logic of the finalBlock function.
84    //
85    // Note: the kernel state data structure must only be finalized after
86    // all scalar fields have been added.   If there are no fields to
87    // be added, the default method for preparing kernel state may be used.
88
89       
90    bool isCachable() const override { return false; }
91
92    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
93
94    // Can the module ID itself serve as the unique signature?
95    virtual bool hasSignature() const { return true; }
96
97    // Create a module stub for the kernel, populated only with its Module ID.     
98    //
99
100    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
101
102    StreamPort getStreamPort(const std::string & name) const;
103
104    llvm::Module * setModule(llvm::Module * const module);
105
106    llvm::Module * makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb);
107
108    llvm::Module * getModule() const {
109        return mModule;
110    }
111
112    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
113   
114    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
115
116    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
117
118    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
119
120    bool hasNoTerminateAttribute() const {
121        return mNoTerminateAttribute;
122    }
123
124    const StreamSetBuffers & getStreamSetInputBuffers() const {
125        return mStreamSetInputBuffers;
126    }
127
128    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
129        return mStreamSetInputBuffers[i];
130    }
131
132    const StreamSetBuffers & getStreamSetOutputBuffers() const {
133        return mStreamSetOutputBuffers;
134    }
135
136    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
137        return mStreamSetOutputBuffers[i];
138    }
139   
140    // Kernels typically perform block-at-a-time processing, but some kernels may require
141    // a different stride.   In the case of multiblock kernels, the stride attribute
142    // determines the number of minimum number of items that will be provided to the kernel
143    // on each doMultiBlock call.
144    //
145   
146    unsigned getKernelStride() const { return mStride; }
147   
148    virtual ~Kernel() = 0;
149
150    void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
151
152    void prepareCachedKernel(const std::unique_ptr<KernelBuilder> & idb);
153
154    std::string getCacheName(const std::unique_ptr<KernelBuilder> & idb) const;
155
156protected:
157
158    void setKernelStride(unsigned stride) { mStride = stride; }
159
160    virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> & idb) { }
161
162    void getDoSegmentFunctionArguments(const std::vector<llvm::Value *> & availItems) const;
163
164    // Constructor
165    Kernel(std::string && kernelName,
166                  std::vector<Binding> && stream_inputs,
167                  std::vector<Binding> && stream_outputs,
168                  std::vector<Binding> && scalar_parameters,
169                  std::vector<Binding> && scalar_outputs,
170                  std::vector<Binding> && internal_scalars);
171
172    void setNoTerminateAttribute(const bool noTerminate = true) {
173        mNoTerminateAttribute = noTerminate;
174    }
175
176    llvm::Value * getPrincipleItemCount() const {
177        return mAvailablePrincipleItemCount;
178    }
179
180    unsigned getScalarIndex(const std::string & name) const;
181
182    void prepareStreamSetNameMap();
183   
184    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
185
186    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
187   
188    virtual void generateKernelMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
189
190    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
191
192    // Add an additional scalar field to the KernelState struct.
193    // Must occur before any call to addKernelDeclarations or createKernelModule.
194    unsigned addScalar(llvm::Type * type, const std::string & name);
195
196    unsigned addUnnamedScalar(llvm::Type * type);
197
198    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
199
200    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
201
202    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
203
204
205    std::pair<unsigned, unsigned> getStreamRate(const Port p, const unsigned i) const;
206
207    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
208        const auto port = getStreamPort(name);
209        assert (port.first == Port::Input);
210        assert (port.second < mStreamSetInputBuffers.size());
211        assert (mStreamSetInputBuffers[port.second]);
212        return mStreamSetInputBuffers[port.second];
213    }
214
215    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
216        const auto port = getStreamPort(name);
217        assert (port.first == Port::Output);
218        assert (port.second < mStreamSetOutputBuffers.size());
219        assert (mStreamSetOutputBuffers[port.second]);
220        return mStreamSetOutputBuffers[port.second];
221    }
222
223    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
224        unsigned index; Port port;
225        std::tie(port, index) = getStreamPort(name);
226        if (port == Port::Input) {
227            assert (index < mStreamSetInputBuffers.size());
228            assert (mStreamSetInputBuffers[index]);
229            return mStreamSetInputBuffers[index];
230        } else {
231            assert (index < mStreamSetOutputBuffers.size());
232            assert (mStreamSetOutputBuffers[index]);
233            return mStreamSetOutputBuffers[index];
234        }
235    }
236
237    llvm::Value * getStreamSetInputBufferPtr(const unsigned i) const {
238        return mStreamSetInputBufferPtr[i];
239    }
240
241    llvm::Value * getStreamSetOutputBufferPtr(const unsigned i) const {
242        return mStreamSetOutputBufferPtr[i];
243    }
244
245private:
246
247    void addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & idb);
248
249    llvm::Value * getAvailableItemCount(const unsigned i) const {
250        return mAvailableItemCount[i];
251    }
252
253protected:
254
255    llvm::Function *                    mCurrentMethod;
256    llvm::Value *                       mAvailablePrincipleItemCount;
257    bool                                mNoTerminateAttribute;
258    bool                                mIsGenerated;
259    unsigned                            mStride;
260    llvm::Value *                       mIsFinal;
261    llvm::Value *                       mOutputScalarResult;
262
263
264    std::vector<llvm::Value *>          mAvailableItemCount;
265
266    std::vector<llvm::Type *>           mKernelFields;
267    KernelMap                           mKernelMap;
268    StreamMap                           mStreamMap;
269    StreamSetBuffers                    mStreamSetInputBuffers;
270    std::vector<llvm::Value *>          mStreamSetInputBufferPtr;
271    StreamSetBuffers                    mStreamSetOutputBuffers;
272    std::vector<llvm::Value *>          mStreamSetOutputBufferPtr;
273
274};
275
276class SegmentOrientedKernel : public Kernel {
277protected:
278
279    SegmentOrientedKernel(std::string && kernelName,
280                          std::vector<Binding> && stream_inputs,
281                          std::vector<Binding> && stream_outputs,
282                          std::vector<Binding> && scalar_parameters,
283                          std::vector<Binding> && scalar_outputs,
284                          std::vector<Binding> && internal_scalars);
285protected:
286
287    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
288
289    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) = 0;
290
291};
292
293/*
294The Multi-Block Kernel Builder
295------------------------------
296
297The Multi-Block Kernel Builder is designed to simplify the programming of
298efficient kernels with possibly variable and/or nonaligned output, subject to
299exact or MaxRatio processing constraints.   The following restrictions apply.
300
301#.  The input consists of one or more stream sets, the first of which is
302    known as the principal input stream set.
303
304#.  If there is more than one input stream set, the additional stream sets
305    are first classified as having either a derived processing rate or
306    a variable processing rate.   Stream sets with a derived processing rate
307    have a processing rate defined with respect to the input stream set of one
308    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
309    declared without a processing rate attribute have the FixedRate(1) attribute
310    by default and therefore satisfy this constraint.  All other processing rate
311    types are classified as variable rate.
312
313#.  All output stream sets must be declared with processing rate attributes
314    of one of the following types:
315    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
316    *  FixedRate with respect to some other output stream set.
317
318    When using the Multi-Block Kernel Builder to program a new type of kernel,
319    the programmer must implement the generateDoMultiBlockMethod for normal
320    multi-block processing according to the requirements below, as well as
321    providing for special final block processing, if necessary.
322
323#.  The doMultiBlockMethod will be called with the following parameters:
324    * the number of items of the principal input stream to process (itemsToDo),
325    * additional items available parameters for each additional input stream set
326      that is classified as a variable rate stream set
327    * pointers to linear contiguous buffer areas for each of the input stream sets, and
328    * pointers to linear contiguous output buffer areas for each of the output stream sets.
329
330    Notes:
331    * if the kernel has a Lookahead dependency declared on any input stream set, then
332      there will be two buffer pointers for that stream set, one for accessing stream set
333      items without lookahead and one for accessing the items with lookahead.   
334    * pointers are to the beginning of the block corresponding to the
335      processedItemCount or producedItemCount of the given stream set.
336    * the base type of each pointer is the StreamSetBlockType of that streamset
337
338#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
339    processed under the following simplifying assumptions.
340    * the number of itemsToDo will either be an exact multiple of the kernel stride,
341      or, for processing the final block, a value less than the kernel stride
342    * the input buffer of the principal stream set and all input buffers of stream sets
343      with derived processing rates will be safe to access and have data available in
344      accord with their processing rates based on the given number of itemsToDo
345      of the principal input stream set; no further bounds checking is needed. 
346    * input buffers of stream sets with MaxRatio attributes will be safe to access,
347      but will only have valid data as specified by the available items parameter for
348      that stream set.
349    * the kernel programmer is responsible for safe access and bounds checking for any
350      input stream set classified as Unknown rate.   No temporary buffers are used
351      for such stream sets.
352    * all output buffers will be safe to access and have space available
353      for the given maximum output generation rates based on the given number
354      of itemsToDo of the principal input stream set; no further bounds checking
355      is needed.
356    * for final block processing, all input buffers will be extended to be safely
357      treated as containing data corresponding to a full block of the principal
358      input stream set, with the actual data in each buffer padded with null values
359      beyond the end of input.  Similarly, all output buffers will contain space
360      sufficient for the maximum output that can be generated for a full block of
361      input processing.
362    * input and output pointers will be typed to allow convenient and logical access
363      to corresponding streams based on their declared stream set type and processing rate.
364    * for any input pointer p, a GEP instruction with a single int32 index i
365      will produce a pointer to the buffer position corresponding to the ith block of the
366      input stream set, relative to the initial block based on the processedItemCount.
367    * for any output stream set declared with a Fixed or Add1 processing rate with respect
368      to the principal input stream set, a GEP instruction with a single int32 index i
369      will produce a pointer to the buffer position corresponding to the ith block of the
370      stream set, relative to the initial block based on the producedItemCount.
371
372#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
373    processed and produced item counts are updated for all stream sets that have exact
374    processing rate attributes.   Programmers are responsible for updating the counts
375    of any stream set declared with a variable attribute (MaxRatio or Unknown).
376
377#.  An important caveat is that buffer areas may change arbitrarily between
378    calls to the doMultiBlockMethod.   In no case should a kernel store a
379    buffer pointer in its internal state.   Furthermore a kernel must not make
380    any assumptions about the accessibility of stream set data outside of the
381    processing range outside of the block boundaries associated with the given itemsToDo.
382*/
383
384class MultiBlockKernel : public Kernel {
385protected:
386
387    MultiBlockKernel(std::string && kernelName,
388                     std::vector<Binding> && stream_inputs,
389                     std::vector<Binding> && stream_outputs,
390                     std::vector<Binding> && scalar_parameters,
391                     std::vector<Binding> && scalar_outputs,
392                     std::vector<Binding> && internal_scalars);
393
394    // Each multi-block kernel subtype must provide its own logic for handling
395    // doMultiBlock calls, subject to the requirements laid out above.
396    // The generateMultiBlockLogic must be written to generate this logic, given
397    // a created but empty function.  Upon entry to generateMultiBlockLogic,
398    // the builder insertion point will be set to the entry block; upone
399    // exit the RetVoid instruction will be added to complete the method.
400    //
401    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * const numOfStrides) = 0;
402
403private:
404
405    // Given a kernel subtype with an appropriate interface, the generateDoSegment
406    // method of the multi-block kernel builder makes all the necessary arrangements
407    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
408    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & kb) final;
409
410    bool requiresCopyBack(const ProcessingRate & rate) const;
411
412};
413
414
415class BlockOrientedKernel : public MultiBlockKernel {
416protected:
417
418    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb);
419
420    // Each kernel builder subtype must provide its own logic for generating
421    // doBlock calls.
422    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) = 0;
423
424    // Each kernel builder subtypre must also specify the logic for processing the
425    // final block of stream data, if there is any special processing required
426    // beyond simply calling the doBlock function.   In the case that the final block
427    // processing may be trivially implemented by dispatching to the doBlock method
428    // without additional preparation, the default generateFinalBlockMethod need
429    // not be overridden.
430
431    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
432
433    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * const numOfStrides) final;
434
435    BlockOrientedKernel(std::string && kernelName,
436                        std::vector<Binding> && stream_inputs,
437                        std::vector<Binding> && stream_outputs,
438                        std::vector<Binding> && scalar_parameters,
439                        std::vector<Binding> && scalar_outputs,
440                        std::vector<Binding> && internal_scalars);
441
442private:
443
444    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb);
445
446    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
447
448private:
449
450    llvm::Function *        mDoBlockMethod;
451    llvm::BasicBlock *      mStrideLoopBody;
452    llvm::IndirectBrInst *  mStrideLoopBranch;
453    llvm::PHINode *         mStrideLoopTarget;
454};
455
456}
457
458#endif
Note: See TracBrowser for help on using the repository browser.