source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5501

Last change on this file since 5501 was 5497, checked in by cameron, 23 months ago

Fix for read_source kernel; stride attribute for multiblock kernels

File size: 17.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11#include <llvm/IR/Constants.h>
12
13namespace llvm { class Function; }
14namespace llvm { class IntegerType; }
15namespace llvm { class IndirectBrInst; }
16namespace llvm { class PHINode; }
17namespace llvm { class LoadInst; }
18namespace llvm { class Type; }
19namespace llvm { class Value; }
20namespace parabix { class StreamSetBuffer; }
21
22namespace kernel {
23   
24class KernelBuilder;
25
26class Kernel : public KernelInterface {
27    friend class KernelBuilder;
28protected:
29    using KernelMap = boost::container::flat_map<std::string, unsigned>;
30    enum class Port { Input, Output };
31    using StreamPort = std::pair<Port, unsigned>;
32    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
33    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
34    using Kernels = std::vector<Kernel *>;
35
36    static const std::string DO_BLOCK_SUFFIX;
37    static const std::string FINAL_BLOCK_SUFFIX;
38    static const std::string MULTI_BLOCK_SUFFIX;
39    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
40    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
41    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
42    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
43    static const std::string TERMINATION_SIGNAL;
44    static const std::string BUFFER_PTR_SUFFIX;
45    static const std::string CONSUMER_SUFFIX;
46public:
47    static const std::string CYCLECOUNT_SCALAR;
48
49public:
50   
51    // Kernel Signatures and Module IDs
52    //
53    // A kernel signature uniquely identifies a kernel and its full functionality.
54    // In the event that a particular kernel instance is to be generated and compiled
55    // to produce object code, and we have a cached kernel object code instance with
56    // the same signature and targetting the same IDISA architecture, then the cached
57    // object code may safely be used to avoid recompilation.
58    //
59    // A kernel signature is a byte string of arbitrary length.
60    //
61    // Kernel developers should take responsibility for designing appropriate signature
62    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
63    // based on the semantics of the kernel. 
64    //
65    // If no other mechanism is available, the default makeSignature() method uses the
66    // full LLVM IR (before optimization) of the kernel instance.
67    //
68    // A kernel Module ID is short string that is used as a name for a particular kernel
69    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
70    // instances and so should be highly likely to uniquely identify a kernel instance.
71    //
72    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
73    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
74    //
75       
76    bool isCachable() const override { return false; }
77
78    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
79
80    // Can the module ID itself serve as the unique signature?
81    virtual bool hasSignature() const { return true; }
82
83    // Create a module stub for the kernel, populated only with its Module ID.     
84    //
85
86    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
87
88    llvm::Module * makeModule(const std::unique_ptr<KernelBuilder> & idb);
89
90    llvm::Module * setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module);
91
92    llvm::Module * getModule() const {
93        return mModule;
94    }
95
96    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
97   
98    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
99
100    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
101
102    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
103
104    bool hasNoTerminateAttribute() const {
105        return mNoTerminateAttribute;
106    }
107
108    const StreamSetBuffers & getStreamSetInputBuffers() const {
109        return mStreamSetInputBuffers;
110    }
111
112    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
113        return mStreamSetInputBuffers[i];
114    }
115
116    const StreamSetBuffers & getStreamSetOutputBuffers() const {
117        return mStreamSetOutputBuffers;
118    }
119
120    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
121        return mStreamSetOutputBuffers[i];
122    }
123   
124    virtual ~Kernel() = 0;
125
126protected:
127
128    // Constructor
129    Kernel(std::string && kernelName,
130                  std::vector<Binding> && stream_inputs,
131                  std::vector<Binding> && stream_outputs,
132                  std::vector<Binding> && scalar_parameters,
133                  std::vector<Binding> && scalar_outputs,
134                  std::vector<Binding> && internal_scalars);
135
136    //
137    // Kernel builder subtypes define their logic of kernel construction
138    // in terms of 3 virtual methods for
139    // (a) preparing the Kernel state data structure
140    // (b) defining the logic of the doBlock function, and
141    // (c) defining the logic of the finalBlock function.
142    //
143    // Note: the kernel state data structure must only be finalized after
144    // all scalar fields have been added.   If there are no fields to
145    // be added, the default method for preparing kernel state may be used.
146
147    void setNoTerminateAttribute(const bool noTerminate = true) {
148        mNoTerminateAttribute = noTerminate;
149    }
150
151    unsigned getScalarIndex(const std::string & name) const;
152
153    void prepareStreamSetNameMap();
154
155    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
156
157    virtual void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
158
159    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
160   
161    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
162
163    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
164
165    // Add an additional scalar field to the KernelState struct.
166    // Must occur before any call to addKernelDeclarations or createKernelModule.
167    unsigned addScalar(llvm::Type * type, const std::string & name);
168
169    unsigned addUnnamedScalar(llvm::Type * type);
170
171    llvm::Value * getIsFinal() const {
172        return mIsFinal;
173    }
174
175    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
176
177    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
178
179    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
180
181    StreamPort getStreamPort(const std::string & name) const;
182
183    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
184        const auto port = getStreamPort(name);
185        assert (port.first == Port::Input);
186        assert (port.second < mStreamSetInputBuffers.size());
187        assert (mStreamSetInputBuffers[port.second]);
188        return mStreamSetInputBuffers[port.second];
189    }
190
191    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
192        const auto port = getStreamPort(name);
193        assert (port.first == Port::Output);
194        assert (port.second < mStreamSetOutputBuffers.size());
195        assert (mStreamSetOutputBuffers[port.second]);
196        return mStreamSetOutputBuffers[port.second];
197    }
198
199    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
200        unsigned index; Port port;
201        std::tie(port, index) = getStreamPort(name);
202        if (port == Port::Input) {
203            assert (index < mStreamSetInputBuffers.size());
204            assert (mStreamSetInputBuffers[index]);
205            return mStreamSetInputBuffers[index];
206        } else {
207            assert (index < mStreamSetOutputBuffers.size());
208            assert (mStreamSetOutputBuffers[index]);
209            return mStreamSetOutputBuffers[index];
210        }
211    }
212
213private:
214
215    llvm::Value * getAvailableItemCount(const unsigned i) const {
216        return mAvailableItemCount[i];
217    }
218
219protected:
220
221    llvm::Function *                    mCurrentMethod;
222    bool                                mNoTerminateAttribute;
223    bool                                mIsGenerated;
224
225    llvm::Value *                       mIsFinal;
226    std::vector<llvm::Value *>          mAvailableItemCount;
227    llvm::Value *                       mOutputScalarResult;
228
229    std::vector<llvm::Type *>           mKernelFields;
230    KernelMap                           mKernelMap;
231    StreamMap                           mStreamMap;
232    StreamSetBuffers                    mStreamSetInputBuffers;
233    StreamSetBuffers                    mStreamSetOutputBuffers;
234
235};
236
237class SegmentOrientedKernel : public Kernel {
238protected:
239
240    SegmentOrientedKernel(std::string && kernelName,
241                          std::vector<Binding> && stream_inputs,
242                          std::vector<Binding> && stream_outputs,
243                          std::vector<Binding> && scalar_parameters,
244                          std::vector<Binding> && scalar_outputs,
245                          std::vector<Binding> && internal_scalars);
246
247};
248
249class BlockOrientedKernel : public Kernel {
250protected:
251
252    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb);
253
254    // Each kernel builder subtype must provide its own logic for generating
255    // doBlock calls.
256    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) = 0;
257
258    // Each kernel builder subtypre must also specify the logic for processing the
259    // final block of stream data, if there is any special processing required
260    // beyond simply calling the doBlock function.   In the case that the final block
261    // processing may be trivially implemented by dispatching to the doBlock method
262    // without additional preparation, the default generateFinalBlockMethod need
263    // not be overridden.
264
265    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
266
267    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
268
269    BlockOrientedKernel(std::string && kernelName,
270                        std::vector<Binding> && stream_inputs,
271                        std::vector<Binding> && stream_outputs,
272                        std::vector<Binding> && scalar_parameters,
273                        std::vector<Binding> && scalar_outputs,
274                        std::vector<Binding> && internal_scalars);
275
276private:
277
278    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb);
279
280    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
281
282private:
283
284    llvm::Function *        mDoBlockMethod;
285    llvm::BasicBlock *      mStrideLoopBody;
286    llvm::IndirectBrInst *  mStrideLoopBranch;
287    llvm::PHINode *         mStrideLoopTarget;
288};
289
290/*
291The Multi-Block Kernel Builder
292------------------------------
293
294The Multi-Block Kernel Builder is designed to simplify the programming of
295efficient kernels with possibly variable and/or nonaligned output, subject to
296exact or MaxRatio processing constraints.   The following restrictions apply.
297
298#.  The input consists of one or more stream sets, the first of which is
299    known as the principal input stream set.
300
301#.  If there is more than one input stream set, the additional stream sets
302    are first classified as having either a derived processing rate or
303    a variable processing rate.   Stream sets with a derived processing rate
304    have a processing rate defined with respect to the input stream set of one
305    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
306    declared without a processing rate attribute have the FixedRate(1) attribute
307    by default and therefore satisfy this constraint.  All other processing rate
308    types are classified as variable rate.
309
310#.  All output stream sets must be declared with processing rate attributes
311    of one of the following types:
312    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
313    *  FixedRate with respect to some other output stream set.
314
315    When using the Multi-Block Kernel Builder to program a new type of kernel,
316    the programmer must implement the generateDoMultiBlockMethod for normal
317    multi-block processing according to the requirements below, as well as
318    providing for special final block processing, if necessary.
319
320#.  The doMultiBlockMethod will be called with the following parameters:
321    * the number of items of the principal input stream to process (itemsToDo),
322    * additional items available parameters for each additional input stream set
323      that is classified as a variable rate stream set
324    * pointers to linear contiguous buffer areas for each of the input stream sets, and
325    * pointers to linear contiguous output buffer areas for each of the output stream sets.
326 
327    Notes:
328    * if the kernel has a Lookahead dependency declared on any input stream set, then
329      there will be two buffer pointers for that stream set, one for accessing stream set
330      items without lookahead and one for accessing the items with lookahead.   
331    * pointers are to the address of the first unprocessed item of the first stream
332      of the stream set.
333    * the base type of each pointer is the StreamSetBlockType of that streamset
334
335#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
336    processed under the following simplifying assumptions.
337    * the number of itemsToDo will either be an exact multiple of the kernel stride,
338      or, for processing the final block, a value less than the kernel stride
339    * the input buffer of the principal stream set and all input buffers of stream sets
340      with derived processing rates will be safe to access and have data available in
341      accord with their processing rates based on the given number of itemsToDo
342      of the principal input stream set; no further bounds checking is needed.
343    * the kernel programmer is responsible for safe access and bounds checking for any
344      input stream set classified as variable rate.
345    * all output buffers will be safe to access and have space available
346      for the given maximum output generation rates based on the given number
347      of itemsToDo of the principal input stream set; no further bounds checking
348      is needed.
349    * for final block processing, all input buffers will be extended to be safely
350      treated as containing data corresponding to a full block of the principal
351      input stream set, with the actual data in each buffer padded with null values
352      beyond the end of input.  Similarly, all output buffers will contain space
353      sufficient for the maximum output that can be generated for a full block of
354      input processing.
355    * input and output pointers will be typed to allow convenient and logical access
356      to corresponding streams based on their declared stream set type and processing rate.
357    * for any input pointer p, a GEP instruction with a single int32 index i
358      will produce a pointer to the buffer position corresponding to the ith block of the
359      input stream set.
360    * for any output stream set declared with a Fixed or Add1 processing rate with respect
361      to the principal input stream set, a GEP instruction with a single int32 index i
362      will produce a pointer to the buffer position corresponding to the ith block of the
363      stream set.
364
365#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
366    processed and produced item counts are updated for all stream sets that have exact
367    processing rate attributes.   Programmers are responsible for updating the producedItemCount
368    of any stream set declared with a variable attribute (MaxRatio).
369
370#.  An important caveat is that buffer areas may change arbitrarily between
371    calls to the doMultiBlockMethod.   In no case should a kernel store a
372    buffer pointer in its internal state.   Furthermore a kernel must not make
373    any assumptions about the accessibility of stream set data outside of the
374    processing range outside of the block boundaries associated with the given itemsToDo.
375*/
376
377class MultiBlockKernel : public Kernel {
378protected:
379
380    MultiBlockKernel(std::string && kernelName,
381                     std::vector<Binding> && stream_inputs,
382                     std::vector<Binding> && stream_outputs,
383                     std::vector<Binding> && scalar_parameters,
384                     std::vector<Binding> && scalar_outputs,
385                     std::vector<Binding> && internal_scalars);
386
387    // Each multi-block kernel subtype must provide its own logic for handling
388    // doMultiBlock calls, subject to the requirements laid out above.
389    // The generateMultiBlockLogic must be written to generate this logic, given
390    // a created but empty function.  Upon entry to generateMultiBlockLogic,
391    // the builder insertion point will be set to the entry block; upone
392    // exit the RetVoid instruction will be added to complete the method.
393    //
394    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & idb) = 0;
395   
396    // Kernels typically perform block-at-a-time processing, but some kernels may require
397    // a different stride.   In the case of multiblock kernels, the stride attribute
398    // determines the number of minimum number of items that will be provided to the kernel
399    // on each doMultiBlock call.
400    //
401   
402    unsigned getKernelStride() const { return mStride;}
403       
404    void setKernelStride(unsigned stride) {mStride = stride;}
405       
406       
407
408private:
409    size_t                            mStride;
410
411
412    // Given a kernel subtype with an appropriate interface, the generateDoSegment
413    // method of the multi-block kernel builder makes all the necessary arrangements
414    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
415    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) final;
416
417};
418
419
420}
421#endif
Note: See TracBrowser for help on using the repository browser.