source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5611

Last change on this file since 5611 was 5599, checked in by cameron, 23 months ago

Bug fixes for multiblock kernel/radix64

File size: 18.4 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11#include <llvm/IR/Constants.h>
12
13namespace llvm { class Function; }
14namespace llvm { class IntegerType; }
15namespace llvm { class IndirectBrInst; }
16namespace llvm { class PHINode; }
17namespace llvm { class LoadInst; }
18namespace llvm { class Type; }
19namespace llvm { class Value; }
20namespace parabix { class StreamSetBuffer; }
21
22namespace kernel {
23   
24class KernelBuilder;
25
26class Kernel : public KernelInterface {
27    friend class KernelBuilder;
28protected:
29    using KernelMap = boost::container::flat_map<std::string, unsigned>;
30    enum class Port { Input, Output };
31    using StreamPort = std::pair<Port, unsigned>;
32    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
33    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
34    using Kernels = std::vector<Kernel *>;
35
36    static const std::string DO_BLOCK_SUFFIX;
37    static const std::string FINAL_BLOCK_SUFFIX;
38    static const std::string MULTI_BLOCK_SUFFIX;
39    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
40    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
41    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
42    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
43    static const std::string TERMINATION_SIGNAL;
44    static const std::string BUFFER_PTR_SUFFIX;
45    static const std::string CONSUMER_SUFFIX;
46public:
47    static const std::string CYCLECOUNT_SCALAR;
48
49public:
50   
51    // Kernel Signatures and Module IDs
52    //
53    // A kernel signature uniquely identifies a kernel and its full functionality.
54    // In the event that a particular kernel instance is to be generated and compiled
55    // to produce object code, and we have a cached kernel object code instance with
56    // the same signature and targetting the same IDISA architecture, then the cached
57    // object code may safely be used to avoid recompilation.
58    //
59    // A kernel signature is a byte string of arbitrary length.
60    //
61    // Kernel developers should take responsibility for designing appropriate signature
62    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
63    // based on the semantics of the kernel. 
64    //
65    // If no other mechanism is available, the default makeSignature() method uses the
66    // full LLVM IR (before optimization) of the kernel instance.
67    //
68    // A kernel Module ID is short string that is used as a name for a particular kernel
69    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
70    // instances and so should be highly likely to uniquely identify a kernel instance.
71    //
72    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
73    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
74    //
75       
76    bool isCachable() const override { return false; }
77
78    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
79
80    // Can the module ID itself serve as the unique signature?
81    virtual bool hasSignature() const { return true; }
82
83    // Create a module stub for the kernel, populated only with its Module ID.     
84    //
85
86    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
87
88    llvm::Module * makeModule(const std::unique_ptr<KernelBuilder> & idb);
89
90    llvm::Module * setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module);
91
92    llvm::Module * getModule() const {
93        return mModule;
94    }
95
96    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
97   
98    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
99
100    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
101
102    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
103
104    bool hasNoTerminateAttribute() const {
105        return mNoTerminateAttribute;
106    }
107
108    const StreamSetBuffers & getStreamSetInputBuffers() const {
109        return mStreamSetInputBuffers;
110    }
111
112    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
113        return mStreamSetInputBuffers[i];
114    }
115
116    const StreamSetBuffers & getStreamSetOutputBuffers() const {
117        return mStreamSetOutputBuffers;
118    }
119
120    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
121        return mStreamSetOutputBuffers[i];
122    }
123   
124    // Kernels typically perform block-at-a-time processing, but some kernels may require
125    // a different stride.   In the case of multiblock kernels, the stride attribute
126    // determines the number of minimum number of items that will be provided to the kernel
127    // on each doMultiBlock call.
128    //
129   
130    unsigned getKernelStride() const { return mStride;}
131   
132    void setKernelStride(unsigned stride) {mStride = stride;}
133   
134    virtual ~Kernel() = 0;
135
136protected:
137
138    // Constructor
139    Kernel(std::string && kernelName,
140                  std::vector<Binding> && stream_inputs,
141                  std::vector<Binding> && stream_outputs,
142                  std::vector<Binding> && scalar_parameters,
143                  std::vector<Binding> && scalar_outputs,
144                  std::vector<Binding> && internal_scalars);
145
146    //
147    // Kernel builder subtypes define their logic of kernel construction
148    // in terms of 3 virtual methods for
149    // (a) preparing the Kernel state data structure
150    // (c) defining the logic of the finalBlock function.
151    //
152    // Note: the kernel state data structure must only be finalized after
153    // all scalar fields have been added.   If there are no fields to
154    // be added, the default method for preparing kernel state may be used.
155
156    void setNoTerminateAttribute(const bool noTerminate = true) {
157        mNoTerminateAttribute = noTerminate;
158    }
159
160    unsigned getScalarIndex(const std::string & name) const;
161
162    void prepareStreamSetNameMap();
163   
164    void processingRateAnalysis();
165
166    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
167
168    virtual void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
169
170    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
171   
172    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
173
174    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
175
176    // Add an additional scalar field to the KernelState struct.
177    // Must occur before any call to addKernelDeclarations or createKernelModule.
178    unsigned addScalar(llvm::Type * type, const std::string & name);
179
180    unsigned addUnnamedScalar(llvm::Type * type);
181
182    llvm::Value * getIsFinal() const {
183        return mIsFinal;
184    }
185
186    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
187
188    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
189
190    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
191
192    StreamPort getStreamPort(const std::string & name) const;
193
194    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
195        const auto port = getStreamPort(name);
196        assert (port.first == Port::Input);
197        assert (port.second < mStreamSetInputBuffers.size());
198        assert (mStreamSetInputBuffers[port.second]);
199        return mStreamSetInputBuffers[port.second];
200    }
201
202    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
203        const auto port = getStreamPort(name);
204        assert (port.first == Port::Output);
205        assert (port.second < mStreamSetOutputBuffers.size());
206        assert (mStreamSetOutputBuffers[port.second]);
207        return mStreamSetOutputBuffers[port.second];
208    }
209
210    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
211        unsigned index; Port port;
212        std::tie(port, index) = getStreamPort(name);
213        if (port == Port::Input) {
214            assert (index < mStreamSetInputBuffers.size());
215            assert (mStreamSetInputBuffers[index]);
216            return mStreamSetInputBuffers[index];
217        } else {
218            assert (index < mStreamSetOutputBuffers.size());
219            assert (mStreamSetOutputBuffers[index]);
220            return mStreamSetOutputBuffers[index];
221        }
222    }
223
224private:
225
226    llvm::Value * getAvailableItemCount(const unsigned i) const {
227        return mAvailableItemCount[i];
228    }
229
230protected:
231
232    llvm::Function *                    mCurrentMethod;
233    bool                                mNoTerminateAttribute;
234    bool                                mIsGenerated;
235
236    llvm::Value *                       mIsFinal;
237    std::vector<llvm::Value *>          mAvailableItemCount;
238    llvm::Value *                       mOutputScalarResult;
239
240    std::vector<llvm::Type *>           mKernelFields;
241    KernelMap                           mKernelMap;
242    StreamMap                           mStreamMap;
243    StreamSetBuffers                    mStreamSetInputBuffers;
244    StreamSetBuffers                    mStreamSetOutputBuffers;
245    unsigned                            mStride;
246    std::vector<unsigned>               mItemsPerStride;
247    std::vector<unsigned>               mIsDerived;
248
249};
250
251class SegmentOrientedKernel : public Kernel {
252protected:
253
254    SegmentOrientedKernel(std::string && kernelName,
255                          std::vector<Binding> && stream_inputs,
256                          std::vector<Binding> && stream_outputs,
257                          std::vector<Binding> && scalar_parameters,
258                          std::vector<Binding> && scalar_outputs,
259                          std::vector<Binding> && internal_scalars);
260
261};
262
263class BlockOrientedKernel : public Kernel {
264protected:
265
266    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb);
267
268    // Each kernel builder subtype must provide its own logic for generating
269    // doBlock calls.
270    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) = 0;
271
272    // Each kernel builder subtypre must also specify the logic for processing the
273    // final block of stream data, if there is any special processing required
274    // beyond simply calling the doBlock function.   In the case that the final block
275    // processing may be trivially implemented by dispatching to the doBlock method
276    // without additional preparation, the default generateFinalBlockMethod need
277    // not be overridden.
278
279    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
280
281    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
282
283    BlockOrientedKernel(std::string && kernelName,
284                        std::vector<Binding> && stream_inputs,
285                        std::vector<Binding> && stream_outputs,
286                        std::vector<Binding> && scalar_parameters,
287                        std::vector<Binding> && scalar_outputs,
288                        std::vector<Binding> && internal_scalars);
289
290private:
291
292    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb);
293
294    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
295
296private:
297
298    llvm::Function *        mDoBlockMethod;
299    llvm::BasicBlock *      mStrideLoopBody;
300    llvm::IndirectBrInst *  mStrideLoopBranch;
301    llvm::PHINode *         mStrideLoopTarget;
302};
303
304/*
305The Multi-Block Kernel Builder
306------------------------------
307
308The Multi-Block Kernel Builder is designed to simplify the programming of
309efficient kernels with possibly variable and/or nonaligned output, subject to
310exact or MaxRatio processing constraints.   The following restrictions apply.
311
312#.  The input consists of one or more stream sets, the first of which is
313    known as the principal input stream set.
314
315#.  If there is more than one input stream set, the additional stream sets
316    are first classified as having either a derived processing rate or
317    a variable processing rate.   Stream sets with a derived processing rate
318    have a processing rate defined with respect to the input stream set of one
319    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
320    declared without a processing rate attribute have the FixedRate(1) attribute
321    by default and therefore satisfy this constraint.  All other processing rate
322    types are classified as variable rate.
323
324#.  All output stream sets must be declared with processing rate attributes
325    of one of the following types:
326    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
327    *  FixedRate with respect to some other output stream set.
328
329    When using the Multi-Block Kernel Builder to program a new type of kernel,
330    the programmer must implement the generateDoMultiBlockMethod for normal
331    multi-block processing according to the requirements below, as well as
332    providing for special final block processing, if necessary.
333
334#.  The doMultiBlockMethod will be called with the following parameters:
335    * the number of items of the principal input stream to process (itemsToDo),
336    * additional items available parameters for each additional input stream set
337      that is classified as a variable rate stream set
338    * pointers to linear contiguous buffer areas for each of the input stream sets, and
339    * pointers to linear contiguous output buffer areas for each of the output stream sets.
340
341    Notes:
342    * if the kernel has a Lookahead dependency declared on any input stream set, then
343      there will be two buffer pointers for that stream set, one for accessing stream set
344      items without lookahead and one for accessing the items with lookahead.   
345    * pointers are to the beginning of the block corresponding to the
346      processedItemCount or producedItemCount of the given stream set.
347    * the base type of each pointer is the StreamSetBlockType of that streamset
348
349#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
350    processed under the following simplifying assumptions.
351    * the number of itemsToDo will either be an exact multiple of the kernel stride,
352      or, for processing the final block, a value less than the kernel stride
353    * the input buffer of the principal stream set and all input buffers of stream sets
354      with derived processing rates will be safe to access and have data available in
355      accord with their processing rates based on the given number of itemsToDo
356      of the principal input stream set; no further bounds checking is needed. 
357    * input buffers of stream sets with MaxRatio attributes will be safe to access,
358      but will only have valid data as specified by the available items parameter for
359      that stream set.
360    * the kernel programmer is responsible for safe access and bounds checking for any
361      input stream set classified as Unknown rate.   No temporary buffers are used
362      for such stream sets.
363    * all output buffers will be safe to access and have space available
364      for the given maximum output generation rates based on the given number
365      of itemsToDo of the principal input stream set; no further bounds checking
366      is needed.
367    * for final block processing, all input buffers will be extended to be safely
368      treated as containing data corresponding to a full block of the principal
369      input stream set, with the actual data in each buffer padded with null values
370      beyond the end of input.  Similarly, all output buffers will contain space
371      sufficient for the maximum output that can be generated for a full block of
372      input processing.
373    * input and output pointers will be typed to allow convenient and logical access
374      to corresponding streams based on their declared stream set type and processing rate.
375    * for any input pointer p, a GEP instruction with a single int32 index i
376      will produce a pointer to the buffer position corresponding to the ith block of the
377      input stream set, relative to the initial block based on the processedItemCount.
378    * for any output stream set declared with a Fixed or Add1 processing rate with respect
379      to the principal input stream set, a GEP instruction with a single int32 index i
380      will produce a pointer to the buffer position corresponding to the ith block of the
381      stream set, relative to the initial block based on the producedItemCount.
382
383#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
384    processed and produced item counts are updated for all stream sets that have exact
385    processing rate attributes.   Programmers are responsible for updating the counts
386    of any stream set declared with a variable attribute (MaxRatio or Unknown).
387
388#.  An important caveat is that buffer areas may change arbitrarily between
389    calls to the doMultiBlockMethod.   In no case should a kernel store a
390    buffer pointer in its internal state.   Furthermore a kernel must not make
391    any assumptions about the accessibility of stream set data outside of the
392    processing range outside of the block boundaries associated with the given itemsToDo.
393*/
394
395class MultiBlockKernel : public Kernel {
396protected:
397
398    MultiBlockKernel(std::string && kernelName,
399                     std::vector<Binding> && stream_inputs,
400                     std::vector<Binding> && stream_outputs,
401                     std::vector<Binding> && scalar_parameters,
402                     std::vector<Binding> && scalar_outputs,
403                     std::vector<Binding> && internal_scalars);
404
405    // Each multi-block kernel subtype must provide its own logic for handling
406    // doMultiBlock calls, subject to the requirements laid out above.
407    // The generateMultiBlockLogic must be written to generate this logic, given
408    // a created but empty function.  Upon entry to generateMultiBlockLogic,
409    // the builder insertion point will be set to the entry block; upone
410    // exit the RetVoid instruction will be added to complete the method.
411    //
412    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & idb) = 0;
413   
414private:
415    // Given a kernel subtype with an appropriate interface, the generateDoSegment
416    // method of the multi-block kernel builder makes all the necessary arrangements
417    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
418    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) final;
419
420};
421
422
423}
424#endif
Note: See TracBrowser for help on using the repository browser.