source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5615

Last change on this file since 5615 was 5615, checked in by cameron, 2 years ago

Automatic expansion of dynamic buffers in pipeline

File size: 18.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11#include <llvm/IR/Constants.h>
12
13namespace llvm { class Function; }
14namespace llvm { class IntegerType; }
15namespace llvm { class IndirectBrInst; }
16namespace llvm { class PHINode; }
17namespace llvm { class LoadInst; }
18namespace llvm { class Type; }
19namespace llvm { class Value; }
20namespace parabix { class StreamSetBuffer; }
21
22namespace kernel {
23   
24class KernelBuilder;
25
26class Kernel : public KernelInterface {
27    friend class KernelBuilder;
28public:
29    enum class Port { Input, Output };
30protected:
31    using KernelMap = boost::container::flat_map<std::string, unsigned>;
32    using StreamPort = std::pair<Port, unsigned>;
33    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
34    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
35    using Kernels = std::vector<Kernel *>;
36
37    static const std::string DO_BLOCK_SUFFIX;
38    static const std::string FINAL_BLOCK_SUFFIX;
39    static const std::string MULTI_BLOCK_SUFFIX;
40    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
41    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
42    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
43    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
44    static const std::string TERMINATION_SIGNAL;
45    static const std::string BUFFER_PTR_SUFFIX;
46    static const std::string CONSUMER_SUFFIX;
47public:
48    static const std::string CYCLECOUNT_SCALAR;
49
50public:
51   
52    // Kernel Signatures and Module IDs
53    //
54    // A kernel signature uniquely identifies a kernel and its full functionality.
55    // In the event that a particular kernel instance is to be generated and compiled
56    // to produce object code, and we have a cached kernel object code instance with
57    // the same signature and targetting the same IDISA architecture, then the cached
58    // object code may safely be used to avoid recompilation.
59    //
60    // A kernel signature is a byte string of arbitrary length.
61    //
62    // Kernel developers should take responsibility for designing appropriate signature
63    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
64    // based on the semantics of the kernel. 
65    //
66    // If no other mechanism is available, the default makeSignature() method uses the
67    // full LLVM IR (before optimization) of the kernel instance.
68    //
69    // A kernel Module ID is short string that is used as a name for a particular kernel
70    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
71    // instances and so should be highly likely to uniquely identify a kernel instance.
72    //
73    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
74    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
75    //
76       
77    bool isCachable() const override { return false; }
78
79    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
80
81    // Can the module ID itself serve as the unique signature?
82    virtual bool hasSignature() const { return true; }
83
84    // Create a module stub for the kernel, populated only with its Module ID.     
85    //
86
87    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
88
89    StreamPort getStreamPort(const std::string & name) const;
90   
91    llvm::Module * makeModule(const std::unique_ptr<KernelBuilder> & idb);
92
93    llvm::Module * setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module);
94
95    llvm::Module * getModule() const {
96        return mModule;
97    }
98
99    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
100   
101    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
102
103    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
104
105    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
106
107    bool hasNoTerminateAttribute() const {
108        return mNoTerminateAttribute;
109    }
110
111    const StreamSetBuffers & getStreamSetInputBuffers() const {
112        return mStreamSetInputBuffers;
113    }
114
115    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
116        return mStreamSetInputBuffers[i];
117    }
118
119    const StreamSetBuffers & getStreamSetOutputBuffers() const {
120        return mStreamSetOutputBuffers;
121    }
122
123    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
124        return mStreamSetOutputBuffers[i];
125    }
126   
127    // Kernels typically perform block-at-a-time processing, but some kernels may require
128    // a different stride.   In the case of multiblock kernels, the stride attribute
129    // determines the number of minimum number of items that will be provided to the kernel
130    // on each doMultiBlock call.
131    //
132   
133    unsigned getKernelStride() const { return mStride;}
134   
135    void setKernelStride(unsigned stride) {mStride = stride;}
136   
137    virtual ~Kernel() = 0;
138
139protected:
140
141    // Constructor
142    Kernel(std::string && kernelName,
143                  std::vector<Binding> && stream_inputs,
144                  std::vector<Binding> && stream_outputs,
145                  std::vector<Binding> && scalar_parameters,
146                  std::vector<Binding> && scalar_outputs,
147                  std::vector<Binding> && internal_scalars);
148
149    //
150    // Kernel builder subtypes define their logic of kernel construction
151    // in terms of 3 virtual methods for
152    // (a) preparing the Kernel state data structure
153    // (c) defining the logic of the finalBlock function.
154    //
155    // Note: the kernel state data structure must only be finalized after
156    // all scalar fields have been added.   If there are no fields to
157    // be added, the default method for preparing kernel state may be used.
158
159    void setNoTerminateAttribute(const bool noTerminate = true) {
160        mNoTerminateAttribute = noTerminate;
161    }
162
163    unsigned getScalarIndex(const std::string & name) const;
164
165    void prepareStreamSetNameMap();
166   
167    void processingRateAnalysis();
168
169    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
170
171    virtual void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
172
173    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
174   
175    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
176
177    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
178
179    // Add an additional scalar field to the KernelState struct.
180    // Must occur before any call to addKernelDeclarations or createKernelModule.
181    unsigned addScalar(llvm::Type * type, const std::string & name);
182
183    unsigned addUnnamedScalar(llvm::Type * type);
184
185    llvm::Value * getIsFinal() const {
186        return mIsFinal;
187    }
188
189    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
190
191    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
192
193    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
194
195    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
196        const auto port = getStreamPort(name);
197        assert (port.first == Port::Input);
198        assert (port.second < mStreamSetInputBuffers.size());
199        assert (mStreamSetInputBuffers[port.second]);
200        return mStreamSetInputBuffers[port.second];
201    }
202
203    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
204        const auto port = getStreamPort(name);
205        assert (port.first == Port::Output);
206        assert (port.second < mStreamSetOutputBuffers.size());
207        assert (mStreamSetOutputBuffers[port.second]);
208        return mStreamSetOutputBuffers[port.second];
209    }
210
211    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
212        unsigned index; Port port;
213        std::tie(port, index) = getStreamPort(name);
214        if (port == Port::Input) {
215            assert (index < mStreamSetInputBuffers.size());
216            assert (mStreamSetInputBuffers[index]);
217            return mStreamSetInputBuffers[index];
218        } else {
219            assert (index < mStreamSetOutputBuffers.size());
220            assert (mStreamSetOutputBuffers[index]);
221            return mStreamSetOutputBuffers[index];
222        }
223    }
224
225private:
226
227    llvm::Value * getAvailableItemCount(const unsigned i) const {
228        return mAvailableItemCount[i];
229    }
230
231protected:
232
233    llvm::Function *                    mCurrentMethod;
234    bool                                mNoTerminateAttribute;
235    bool                                mIsGenerated;
236
237    llvm::Value *                       mIsFinal;
238    std::vector<llvm::Value *>          mAvailableItemCount;
239    llvm::Value *                       mOutputScalarResult;
240
241    std::vector<llvm::Type *>           mKernelFields;
242    KernelMap                           mKernelMap;
243    StreamMap                           mStreamMap;
244    StreamSetBuffers                    mStreamSetInputBuffers;
245    StreamSetBuffers                    mStreamSetOutputBuffers;
246    unsigned                            mStride;
247    std::vector<unsigned>               mItemsPerStride;
248    std::vector<unsigned>               mIsDerived;
249
250};
251
252class SegmentOrientedKernel : public Kernel {
253protected:
254
255    SegmentOrientedKernel(std::string && kernelName,
256                          std::vector<Binding> && stream_inputs,
257                          std::vector<Binding> && stream_outputs,
258                          std::vector<Binding> && scalar_parameters,
259                          std::vector<Binding> && scalar_outputs,
260                          std::vector<Binding> && internal_scalars);
261
262};
263
264class BlockOrientedKernel : public Kernel {
265protected:
266
267    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb);
268
269    // Each kernel builder subtype must provide its own logic for generating
270    // doBlock calls.
271    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) = 0;
272
273    // Each kernel builder subtypre must also specify the logic for processing the
274    // final block of stream data, if there is any special processing required
275    // beyond simply calling the doBlock function.   In the case that the final block
276    // processing may be trivially implemented by dispatching to the doBlock method
277    // without additional preparation, the default generateFinalBlockMethod need
278    // not be overridden.
279
280    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
281
282    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
283
284    BlockOrientedKernel(std::string && kernelName,
285                        std::vector<Binding> && stream_inputs,
286                        std::vector<Binding> && stream_outputs,
287                        std::vector<Binding> && scalar_parameters,
288                        std::vector<Binding> && scalar_outputs,
289                        std::vector<Binding> && internal_scalars);
290
291private:
292
293    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb);
294
295    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
296
297private:
298
299    llvm::Function *        mDoBlockMethod;
300    llvm::BasicBlock *      mStrideLoopBody;
301    llvm::IndirectBrInst *  mStrideLoopBranch;
302    llvm::PHINode *         mStrideLoopTarget;
303};
304
305/*
306The Multi-Block Kernel Builder
307------------------------------
308
309The Multi-Block Kernel Builder is designed to simplify the programming of
310efficient kernels with possibly variable and/or nonaligned output, subject to
311exact or MaxRatio processing constraints.   The following restrictions apply.
312
313#.  The input consists of one or more stream sets, the first of which is
314    known as the principal input stream set.
315
316#.  If there is more than one input stream set, the additional stream sets
317    are first classified as having either a derived processing rate or
318    a variable processing rate.   Stream sets with a derived processing rate
319    have a processing rate defined with respect to the input stream set of one
320    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
321    declared without a processing rate attribute have the FixedRate(1) attribute
322    by default and therefore satisfy this constraint.  All other processing rate
323    types are classified as variable rate.
324
325#.  All output stream sets must be declared with processing rate attributes
326    of one of the following types:
327    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
328    *  FixedRate with respect to some other output stream set.
329
330    When using the Multi-Block Kernel Builder to program a new type of kernel,
331    the programmer must implement the generateDoMultiBlockMethod for normal
332    multi-block processing according to the requirements below, as well as
333    providing for special final block processing, if necessary.
334
335#.  The doMultiBlockMethod will be called with the following parameters:
336    * the number of items of the principal input stream to process (itemsToDo),
337    * additional items available parameters for each additional input stream set
338      that is classified as a variable rate stream set
339    * pointers to linear contiguous buffer areas for each of the input stream sets, and
340    * pointers to linear contiguous output buffer areas for each of the output stream sets.
341
342    Notes:
343    * if the kernel has a Lookahead dependency declared on any input stream set, then
344      there will be two buffer pointers for that stream set, one for accessing stream set
345      items without lookahead and one for accessing the items with lookahead.   
346    * pointers are to the beginning of the block corresponding to the
347      processedItemCount or producedItemCount of the given stream set.
348    * the base type of each pointer is the StreamSetBlockType of that streamset
349
350#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
351    processed under the following simplifying assumptions.
352    * the number of itemsToDo will either be an exact multiple of the kernel stride,
353      or, for processing the final block, a value less than the kernel stride
354    * the input buffer of the principal stream set and all input buffers of stream sets
355      with derived processing rates will be safe to access and have data available in
356      accord with their processing rates based on the given number of itemsToDo
357      of the principal input stream set; no further bounds checking is needed. 
358    * input buffers of stream sets with MaxRatio attributes will be safe to access,
359      but will only have valid data as specified by the available items parameter for
360      that stream set.
361    * the kernel programmer is responsible for safe access and bounds checking for any
362      input stream set classified as Unknown rate.   No temporary buffers are used
363      for such stream sets.
364    * all output buffers will be safe to access and have space available
365      for the given maximum output generation rates based on the given number
366      of itemsToDo of the principal input stream set; no further bounds checking
367      is needed.
368    * for final block processing, all input buffers will be extended to be safely
369      treated as containing data corresponding to a full block of the principal
370      input stream set, with the actual data in each buffer padded with null values
371      beyond the end of input.  Similarly, all output buffers will contain space
372      sufficient for the maximum output that can be generated for a full block of
373      input processing.
374    * input and output pointers will be typed to allow convenient and logical access
375      to corresponding streams based on their declared stream set type and processing rate.
376    * for any input pointer p, a GEP instruction with a single int32 index i
377      will produce a pointer to the buffer position corresponding to the ith block of the
378      input stream set, relative to the initial block based on the processedItemCount.
379    * for any output stream set declared with a Fixed or Add1 processing rate with respect
380      to the principal input stream set, a GEP instruction with a single int32 index i
381      will produce a pointer to the buffer position corresponding to the ith block of the
382      stream set, relative to the initial block based on the producedItemCount.
383
384#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
385    processed and produced item counts are updated for all stream sets that have exact
386    processing rate attributes.   Programmers are responsible for updating the counts
387    of any stream set declared with a variable attribute (MaxRatio or Unknown).
388
389#.  An important caveat is that buffer areas may change arbitrarily between
390    calls to the doMultiBlockMethod.   In no case should a kernel store a
391    buffer pointer in its internal state.   Furthermore a kernel must not make
392    any assumptions about the accessibility of stream set data outside of the
393    processing range outside of the block boundaries associated with the given itemsToDo.
394*/
395
396class MultiBlockKernel : public Kernel {
397protected:
398
399    MultiBlockKernel(std::string && kernelName,
400                     std::vector<Binding> && stream_inputs,
401                     std::vector<Binding> && stream_outputs,
402                     std::vector<Binding> && scalar_parameters,
403                     std::vector<Binding> && scalar_outputs,
404                     std::vector<Binding> && internal_scalars);
405
406    // Each multi-block kernel subtype must provide its own logic for handling
407    // doMultiBlock calls, subject to the requirements laid out above.
408    // The generateMultiBlockLogic must be written to generate this logic, given
409    // a created but empty function.  Upon entry to generateMultiBlockLogic,
410    // the builder insertion point will be set to the entry block; upone
411    // exit the RetVoid instruction will be added to complete the method.
412    //
413    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & idb) = 0;
414   
415private:
416    // Given a kernel subtype with an appropriate interface, the generateDoSegment
417    // method of the multi-block kernel builder makes all the necessary arrangements
418    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
419    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) final;
420
421};
422
423void applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & kb,
424                                 std::vector<llvm::Value *> inputAvailable,
425                                 llvm::Value * doFinal);
426   
427   
428
429}
430#endif
Note: See TracBrowser for help on using the repository browser.