source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5630

Last change on this file since 5630 was 5630, checked in by nmedfort, 22 months ago

Partial check-in for avoidance of compiling Pablo/LLVM code to determine the Kernel struct type when using a cached object. Inactive RE alternation minimization check in.

File size: 18.8 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11#include <llvm/IR/Constants.h>
12
13namespace llvm { class Function; }
14namespace llvm { class IntegerType; }
15namespace llvm { class IndirectBrInst; }
16namespace llvm { class PHINode; }
17namespace llvm { class LoadInst; }
18namespace llvm { class Type; }
19namespace llvm { class Value; }
20namespace parabix { class StreamSetBuffer; }
21
22namespace kernel {
23   
24class KernelBuilder;
25
26class Kernel : public KernelInterface {
27    friend class KernelBuilder;
28public:
29    enum class Port { Input, Output };
30protected:
31    using KernelMap = boost::container::flat_map<std::string, unsigned>;
32    using StreamPort = std::pair<Port, unsigned>;
33    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
34    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
35    using Kernels = std::vector<Kernel *>;
36
37    static const std::string DO_BLOCK_SUFFIX;
38    static const std::string FINAL_BLOCK_SUFFIX;
39    static const std::string MULTI_BLOCK_SUFFIX;
40    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
41    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
42    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
43    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
44    static const std::string TERMINATION_SIGNAL;
45    static const std::string BUFFER_PTR_SUFFIX;
46    static const std::string CONSUMER_SUFFIX;
47public:
48    static const std::string CYCLECOUNT_SCALAR;
49
50public:
51   
52    // Kernel Signatures and Module IDs
53    //
54    // A kernel signature uniquely identifies a kernel and its full functionality.
55    // In the event that a particular kernel instance is to be generated and compiled
56    // to produce object code, and we have a cached kernel object code instance with
57    // the same signature and targetting the same IDISA architecture, then the cached
58    // object code may safely be used to avoid recompilation.
59    //
60    // A kernel signature is a byte string of arbitrary length.
61    //
62    // Kernel developers should take responsibility for designing appropriate signature
63    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
64    // based on the semantics of the kernel. 
65    //
66    // If no other mechanism is available, the default makeSignature() method uses the
67    // full LLVM IR (before optimization) of the kernel instance.
68    //
69    // A kernel Module ID is short string that is used as a name for a particular kernel
70    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
71    // instances and so should be highly likely to uniquely identify a kernel instance.
72    //
73    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
74    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
75    //
76       
77    bool isCachable() const override { return false; }
78
79    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
80
81    // Can the module ID itself serve as the unique signature?
82    virtual bool hasSignature() const { return true; }
83
84    // Create a module stub for the kernel, populated only with its Module ID.     
85    //
86
87    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
88
89    StreamPort getStreamPort(const std::string & name) const;
90
91    llvm::Module * setModule(llvm::Module * const module);
92
93    llvm::Module * makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb);
94
95    llvm::Module * getModule() const {
96        return mModule;
97    }
98
99    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
100   
101    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
102
103    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
104
105    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
106
107    bool hasNoTerminateAttribute() const {
108        return mNoTerminateAttribute;
109    }
110
111    const StreamSetBuffers & getStreamSetInputBuffers() const {
112        return mStreamSetInputBuffers;
113    }
114
115    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
116        return mStreamSetInputBuffers[i];
117    }
118
119    const StreamSetBuffers & getStreamSetOutputBuffers() const {
120        return mStreamSetOutputBuffers;
121    }
122
123    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
124        return mStreamSetOutputBuffers[i];
125    }
126   
127    // Kernels typically perform block-at-a-time processing, but some kernels may require
128    // a different stride.   In the case of multiblock kernels, the stride attribute
129    // determines the number of minimum number of items that will be provided to the kernel
130    // on each doMultiBlock call.
131    //
132   
133    unsigned getKernelStride() const { return mStride;}
134   
135    void setKernelStride(unsigned stride) {mStride = stride;}
136   
137    virtual ~Kernel() = 0;
138
139    void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
140
141    void prepareCachedKernel(const std::unique_ptr<KernelBuilder> & idb);
142
143    std::string getCacheName(const std::unique_ptr<KernelBuilder> & idb) const;
144
145protected:
146
147    virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> & idb) { }
148
149    // Constructor
150    Kernel(std::string && kernelName,
151                  std::vector<Binding> && stream_inputs,
152                  std::vector<Binding> && stream_outputs,
153                  std::vector<Binding> && scalar_parameters,
154                  std::vector<Binding> && scalar_outputs,
155                  std::vector<Binding> && internal_scalars);
156
157    //
158    // Kernel builder subtypes define their logic of kernel construction
159    // in terms of 3 virtual methods for
160    // (a) preparing the Kernel state data structure
161    // (c) defining the logic of the finalBlock function.
162    //
163    // Note: the kernel state data structure must only be finalized after
164    // all scalar fields have been added.   If there are no fields to
165    // be added, the default method for preparing kernel state may be used.
166
167    void setNoTerminateAttribute(const bool noTerminate = true) {
168        mNoTerminateAttribute = noTerminate;
169    }
170
171    unsigned getScalarIndex(const std::string & name) const;
172
173    void prepareStreamSetNameMap();
174   
175    void processingRateAnalysis();
176
177    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
178
179    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
180   
181    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
182
183    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
184
185    // Add an additional scalar field to the KernelState struct.
186    // Must occur before any call to addKernelDeclarations or createKernelModule.
187    unsigned addScalar(llvm::Type * type, const std::string & name);
188
189    unsigned addUnnamedScalar(llvm::Type * type);
190
191    llvm::Value * getIsFinal() const {
192        return mIsFinal;
193    }
194
195    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
196
197    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
198
199    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
200
201    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
202        const auto port = getStreamPort(name);
203        assert (port.first == Port::Input);
204        assert (port.second < mStreamSetInputBuffers.size());
205        assert (mStreamSetInputBuffers[port.second]);
206        return mStreamSetInputBuffers[port.second];
207    }
208
209    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
210        const auto port = getStreamPort(name);
211        assert (port.first == Port::Output);
212        assert (port.second < mStreamSetOutputBuffers.size());
213        assert (mStreamSetOutputBuffers[port.second]);
214        return mStreamSetOutputBuffers[port.second];
215    }
216
217    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
218        unsigned index; Port port;
219        std::tie(port, index) = getStreamPort(name);
220        if (port == Port::Input) {
221            assert (index < mStreamSetInputBuffers.size());
222            assert (mStreamSetInputBuffers[index]);
223            return mStreamSetInputBuffers[index];
224        } else {
225            assert (index < mStreamSetOutputBuffers.size());
226            assert (mStreamSetOutputBuffers[index]);
227            return mStreamSetOutputBuffers[index];
228        }
229    }
230
231private:
232
233    llvm::Value * getAvailableItemCount(const unsigned i) const {
234        return mAvailableItemCount[i];
235    }
236
237protected:
238
239    llvm::Function *                    mCurrentMethod;
240    bool                                mNoTerminateAttribute;
241    bool                                mIsGenerated;
242
243    llvm::Value *                       mIsFinal;
244    std::vector<llvm::Value *>          mAvailableItemCount;
245    llvm::Value *                       mOutputScalarResult;
246
247    std::vector<llvm::Type *>           mKernelFields;
248    KernelMap                           mKernelMap;
249    StreamMap                           mStreamMap;
250    StreamSetBuffers                    mStreamSetInputBuffers;
251    StreamSetBuffers                    mStreamSetOutputBuffers;
252    unsigned                            mStride;
253    std::vector<unsigned>               mItemsPerStride;
254    std::vector<unsigned>               mIsDerived;
255
256};
257
258class SegmentOrientedKernel : public Kernel {
259protected:
260
261    SegmentOrientedKernel(std::string && kernelName,
262                          std::vector<Binding> && stream_inputs,
263                          std::vector<Binding> && stream_outputs,
264                          std::vector<Binding> && scalar_parameters,
265                          std::vector<Binding> && scalar_outputs,
266                          std::vector<Binding> && internal_scalars);
267
268};
269
270class BlockOrientedKernel : public Kernel {
271protected:
272
273    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb);
274
275    // Each kernel builder subtype must provide its own logic for generating
276    // doBlock calls.
277    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) = 0;
278
279    // Each kernel builder subtypre must also specify the logic for processing the
280    // final block of stream data, if there is any special processing required
281    // beyond simply calling the doBlock function.   In the case that the final block
282    // processing may be trivially implemented by dispatching to the doBlock method
283    // without additional preparation, the default generateFinalBlockMethod need
284    // not be overridden.
285
286    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
287
288    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
289
290    BlockOrientedKernel(std::string && kernelName,
291                        std::vector<Binding> && stream_inputs,
292                        std::vector<Binding> && stream_outputs,
293                        std::vector<Binding> && scalar_parameters,
294                        std::vector<Binding> && scalar_outputs,
295                        std::vector<Binding> && internal_scalars);
296
297private:
298
299    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb);
300
301    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
302
303private:
304
305    llvm::Function *        mDoBlockMethod;
306    llvm::BasicBlock *      mStrideLoopBody;
307    llvm::IndirectBrInst *  mStrideLoopBranch;
308    llvm::PHINode *         mStrideLoopTarget;
309};
310
311/*
312The Multi-Block Kernel Builder
313------------------------------
314
315The Multi-Block Kernel Builder is designed to simplify the programming of
316efficient kernels with possibly variable and/or nonaligned output, subject to
317exact or MaxRatio processing constraints.   The following restrictions apply.
318
319#.  The input consists of one or more stream sets, the first of which is
320    known as the principal input stream set.
321
322#.  If there is more than one input stream set, the additional stream sets
323    are first classified as having either a derived processing rate or
324    a variable processing rate.   Stream sets with a derived processing rate
325    have a processing rate defined with respect to the input stream set of one
326    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
327    declared without a processing rate attribute have the FixedRate(1) attribute
328    by default and therefore satisfy this constraint.  All other processing rate
329    types are classified as variable rate.
330
331#.  All output stream sets must be declared with processing rate attributes
332    of one of the following types:
333    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
334    *  FixedRate with respect to some other output stream set.
335
336    When using the Multi-Block Kernel Builder to program a new type of kernel,
337    the programmer must implement the generateDoMultiBlockMethod for normal
338    multi-block processing according to the requirements below, as well as
339    providing for special final block processing, if necessary.
340
341#.  The doMultiBlockMethod will be called with the following parameters:
342    * the number of items of the principal input stream to process (itemsToDo),
343    * additional items available parameters for each additional input stream set
344      that is classified as a variable rate stream set
345    * pointers to linear contiguous buffer areas for each of the input stream sets, and
346    * pointers to linear contiguous output buffer areas for each of the output stream sets.
347
348    Notes:
349    * if the kernel has a Lookahead dependency declared on any input stream set, then
350      there will be two buffer pointers for that stream set, one for accessing stream set
351      items without lookahead and one for accessing the items with lookahead.   
352    * pointers are to the beginning of the block corresponding to the
353      processedItemCount or producedItemCount of the given stream set.
354    * the base type of each pointer is the StreamSetBlockType of that streamset
355
356#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
357    processed under the following simplifying assumptions.
358    * the number of itemsToDo will either be an exact multiple of the kernel stride,
359      or, for processing the final block, a value less than the kernel stride
360    * the input buffer of the principal stream set and all input buffers of stream sets
361      with derived processing rates will be safe to access and have data available in
362      accord with their processing rates based on the given number of itemsToDo
363      of the principal input stream set; no further bounds checking is needed. 
364    * input buffers of stream sets with MaxRatio attributes will be safe to access,
365      but will only have valid data as specified by the available items parameter for
366      that stream set.
367    * the kernel programmer is responsible for safe access and bounds checking for any
368      input stream set classified as Unknown rate.   No temporary buffers are used
369      for such stream sets.
370    * all output buffers will be safe to access and have space available
371      for the given maximum output generation rates based on the given number
372      of itemsToDo of the principal input stream set; no further bounds checking
373      is needed.
374    * for final block processing, all input buffers will be extended to be safely
375      treated as containing data corresponding to a full block of the principal
376      input stream set, with the actual data in each buffer padded with null values
377      beyond the end of input.  Similarly, all output buffers will contain space
378      sufficient for the maximum output that can be generated for a full block of
379      input processing.
380    * input and output pointers will be typed to allow convenient and logical access
381      to corresponding streams based on their declared stream set type and processing rate.
382    * for any input pointer p, a GEP instruction with a single int32 index i
383      will produce a pointer to the buffer position corresponding to the ith block of the
384      input stream set, relative to the initial block based on the processedItemCount.
385    * for any output stream set declared with a Fixed or Add1 processing rate with respect
386      to the principal input stream set, a GEP instruction with a single int32 index i
387      will produce a pointer to the buffer position corresponding to the ith block of the
388      stream set, relative to the initial block based on the producedItemCount.
389
390#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
391    processed and produced item counts are updated for all stream sets that have exact
392    processing rate attributes.   Programmers are responsible for updating the counts
393    of any stream set declared with a variable attribute (MaxRatio or Unknown).
394
395#.  An important caveat is that buffer areas may change arbitrarily between
396    calls to the doMultiBlockMethod.   In no case should a kernel store a
397    buffer pointer in its internal state.   Furthermore a kernel must not make
398    any assumptions about the accessibility of stream set data outside of the
399    processing range outside of the block boundaries associated with the given itemsToDo.
400*/
401
402class MultiBlockKernel : public Kernel {
403protected:
404
405    MultiBlockKernel(std::string && kernelName,
406                     std::vector<Binding> && stream_inputs,
407                     std::vector<Binding> && stream_outputs,
408                     std::vector<Binding> && scalar_parameters,
409                     std::vector<Binding> && scalar_outputs,
410                     std::vector<Binding> && internal_scalars);
411
412    // Each multi-block kernel subtype must provide its own logic for handling
413    // doMultiBlock calls, subject to the requirements laid out above.
414    // The generateMultiBlockLogic must be written to generate this logic, given
415    // a created but empty function.  Upon entry to generateMultiBlockLogic,
416    // the builder insertion point will be set to the entry block; upone
417    // exit the RetVoid instruction will be added to complete the method.
418    //
419    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & idb) = 0;
420   
421private:
422    // Given a kernel subtype with an appropriate interface, the generateDoSegment
423    // method of the multi-block kernel builder makes all the necessary arrangements
424    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
425    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) final;
426
427};
428
429void applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & kb,
430                                 std::vector<llvm::Value *> inputAvailable,
431                                 llvm::Value * doFinal);
432   
433   
434
435}
436#endif
Note: See TracBrowser for help on using the repository browser.