source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5454

Last change on this file since 5454 was 5454, checked in by nmedfort, 2 years ago

Bug fix check in for DumpTrace?, compilation of DoBlock? / DoFinalBlock? functions. Pablo CodeMotionPass? optimized and enabled by default.

File size: 16.4 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11#include <IR_Gen/idisa_builder.h>
12#include <toolchain/pipeline.h>
13#include <llvm/IR/Constants.h>
14
15namespace llvm { class Function; }
16namespace llvm { class IntegerType; }
17namespace llvm { class LoadInst; }
18namespace llvm { class Type; }
19namespace llvm { class Value; }
20namespace parabix { class StreamSetBuffer; }
21
22namespace kernel {
23   
24class KernelBuilder;
25
26class Kernel : public KernelInterface {
27    friend class KernelBuilder;
28protected:
29    using KernelMap = boost::container::flat_map<std::string, unsigned>;
30    enum class Port { Input, Output };
31    using StreamPort = std::pair<Port, unsigned>;
32    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
33    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
34    using Kernels = std::vector<Kernel *>;
35
36    static const std::string DO_BLOCK_SUFFIX;
37    static const std::string FINAL_BLOCK_SUFFIX;
38    static const std::string MULTI_BLOCK_SUFFIX;
39    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
40    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
41    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
42    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
43    static const std::string TERMINATION_SIGNAL;
44    static const std::string BUFFER_PTR_SUFFIX;
45    static const std::string CONSUMER_SUFFIX;
46
47public:
48   
49    // Kernel Signatures and Module IDs
50    //
51    // A kernel signature uniquely identifies a kernel and its full functionality.
52    // In the event that a particular kernel instance is to be generated and compiled
53    // to produce object code, and we have a cached kernel object code instance with
54    // the same signature and targetting the same IDISA architecture, then the cached
55    // object code may safely be used to avoid recompilation.
56    //
57    // A kernel signature is a byte string of arbitrary length.
58    //
59    // Kernel developers should take responsibility for designing appropriate signature
60    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
61    // based on the semantics of the kernel. 
62    //
63    // If no other mechanism is available, the default generateKernelSignature() method
64    // uses the full LLVM IR (before optimization) of the kernel instance.
65    //
66    // A kernel Module ID is short string that is used as a name for a particular kernel
67    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel instances
68    // and so should be highly likely to uniquely identify a kernel instance.
69    //
70    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
71    // guaranteeing uniqueness.  In this case, the moduleIDisUnique() method
72    // should return true.
73    //
74       
75    bool isCachable() const override { return false; }
76
77    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
78
79    // Can the module ID itself serve as the unique signature?
80    virtual bool moduleIDisSignature() const { return false; }
81
82    // Create a module stub for the kernel, populated only with its Module ID.     
83    //
84
85    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
86
87    llvm::Module * makeModule(const std::unique_ptr<KernelBuilder> & idb);
88
89    llvm::Module * setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module);
90
91    llvm::Module * getModule() const {
92        return mModule;
93    }
94
95    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
96   
97    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
98
99    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
100
101    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
102
103    bool hasNoTerminateAttribute() const {
104        return mNoTerminateAttribute;
105    }
106
107    const StreamSetBuffers & getStreamSetInputBuffers() const {
108        return mStreamSetInputBuffers;
109    }
110
111    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
112        return mStreamSetInputBuffers[i];
113    }
114
115    const StreamSetBuffers & getStreamSetOutputBuffers() const {
116        return mStreamSetOutputBuffers;
117    }
118
119    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
120        return mStreamSetOutputBuffers[i];
121    }
122
123    virtual ~Kernel() = 0;
124
125protected:
126
127    // Constructor
128    Kernel(std::string && kernelName,
129                  std::vector<Binding> && stream_inputs,
130                  std::vector<Binding> && stream_outputs,
131                  std::vector<Binding> && scalar_parameters,
132                  std::vector<Binding> && scalar_outputs,
133                  std::vector<Binding> && internal_scalars);
134
135    //
136    // Kernel builder subtypes define their logic of kernel construction
137    // in terms of 3 virtual methods for
138    // (a) preparing the Kernel state data structure
139    // (b) defining the logic of the doBlock function, and
140    // (c) defining the logic of the finalBlock function.
141    //
142    // Note: the kernel state data structure must only be finalized after
143    // all scalar fields have been added.   If there are no fields to
144    // be added, the default method for preparing kernel state may be used.
145
146    void setNoTerminateAttribute(const bool noTerminate = true) {
147        mNoTerminateAttribute = noTerminate;
148    }
149
150    unsigned getScalarIndex(const std::string & name) const;
151
152    void prepareStreamSetNameMap();
153
154    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
155
156    virtual void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
157
158    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
159   
160    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
161
162    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
163
164    // Add an additional scalar field to the KernelState struct.
165    // Must occur before any call to addKernelDeclarations or createKernelModule.
166    unsigned addScalar(llvm::Type * type, const std::string & name);
167
168    unsigned addUnnamedScalar(llvm::Type * type);
169
170    llvm::Value * getIsFinal() const {
171        return mIsFinal;
172    }
173
174    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
175
176    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
177
178    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
179
180    StreamPort getStreamPort(const std::string & name) const;
181
182    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
183        const auto port = getStreamPort(name);
184        assert (port.first == Port::Input);
185        assert (port.second < mStreamSetInputBuffers.size());
186        assert (mStreamSetInputBuffers[port.second]);
187        return mStreamSetInputBuffers[port.second];
188    }
189
190    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
191        const auto port = getStreamPort(name);
192        assert (port.first == Port::Output);
193        assert (port.second < mStreamSetOutputBuffers.size());
194        assert (mStreamSetOutputBuffers[port.second]);
195        return mStreamSetOutputBuffers[port.second];
196    }
197
198    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
199        unsigned index; Port port;
200        std::tie(port, index) = getStreamPort(name);
201        if (port == Port::Input) {
202            assert (index < mStreamSetInputBuffers.size());
203            assert (mStreamSetInputBuffers[index]);
204            return mStreamSetInputBuffers[index];
205        } else {
206            assert (index < mStreamSetOutputBuffers.size());
207            assert (mStreamSetOutputBuffers[index]);
208            return mStreamSetOutputBuffers[index];
209        }
210    }
211
212private:
213
214    llvm::Value * getAvailableItemCount(const unsigned i) const {
215        return mAvailableItemCount[i];
216    }
217
218protected:
219
220    llvm::Function *                    mCurrentMethod;
221    bool                                mNoTerminateAttribute;
222    bool                                mIsGenerated;
223
224    llvm::Value *                       mIsFinal;
225    std::vector<llvm::Value *>          mAvailableItemCount;
226    llvm::Value *                       mOutputScalarResult;
227
228    std::vector<llvm::Type *>           mKernelFields;
229    KernelMap                           mKernelMap;
230    StreamMap                           mStreamMap;
231    StreamSetBuffers                    mStreamSetInputBuffers;
232    StreamSetBuffers                    mStreamSetOutputBuffers;
233
234};
235
236class SegmentOrientedKernel : public Kernel {
237protected:
238
239    SegmentOrientedKernel(std::string && kernelName,
240                          std::vector<Binding> && stream_inputs,
241                          std::vector<Binding> && stream_outputs,
242                          std::vector<Binding> && scalar_parameters,
243                          std::vector<Binding> && scalar_outputs,
244                          std::vector<Binding> && internal_scalars);
245
246};
247
248class BlockOrientedKernel : public Kernel {
249protected:
250
251    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb);
252
253    // Each kernel builder subtype must provide its own logic for generating
254    // doBlock calls.
255    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) = 0;
256
257    // Each kernel builder subtypre must also specify the logic for processing the
258    // final block of stream data, if there is any special processing required
259    // beyond simply calling the doBlock function.   In the case that the final block
260    // processing may be trivially implemented by dispatching to the doBlock method
261    // without additional preparation, the default generateFinalBlockMethod need
262    // not be overridden.
263
264    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
265
266    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
267
268    BlockOrientedKernel(std::string && kernelName,
269                        std::vector<Binding> && stream_inputs,
270                        std::vector<Binding> && stream_outputs,
271                        std::vector<Binding> && scalar_parameters,
272                        std::vector<Binding> && scalar_outputs,
273                        std::vector<Binding> && internal_scalars);
274
275private:
276
277    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb);
278
279    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
280
281private:
282
283    llvm::Function *        mDoBlockMethod;
284    llvm::BasicBlock *      mStrideLoopBody;
285    llvm::IndirectBrInst *  mStrideLoopBranch;
286    llvm::PHINode *         mStrideLoopTarget;
287};
288
289/*
290The Multi-Block Kernel Builder
291------------------------------
292
293The Multi-Block Kernel Builder is designed to simplify the programming of
294efficient kernels with possibly variable and/or nonaligned output, subject to
295exact or MaxRatio processing constraints.   The following restrictions apply.
296
297#.  The input consists of one or more stream sets, the first of which is
298    known as the principal input stream set.
299
300#.  If there is more than one input stream set, the additional stream sets must
301    have a processing rate defined with respect to the input stream set of one
302    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
303    declared without a processing rate attribute have the FixedRate(1) attribute
304    by default and therefore satisfy this constraint.
305
306#.  All output stream sets must be declared with processing rate attributes
307    of one of the following types:
308    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
309    *  FixedRate with respect to some other output stream set.
310
311    When using the Multi-Block Kernel Builder to program a new type of kernel,
312    the programmer must implement the generateDoMultiBlockMethod for normal
313    multi-block processing according to the requirements below, as well as
314    providing for special final block processing, if necessary.
315
316#.  The doMultiBlockMethod will be called with the following parameters:
317    * the number of items of the principal input stream to process (itemsToDo),
318    * pointers to linear contiguous buffer areas for each of the input stream sets, and
319    * pointers to linear contiguous output buffer areas for each of the output stream sets.
320    * pointers are to the address of the first item of the first stream of the stream set.
321
322#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
323    processed under the following simplifying assumptions.
324    * the number of itemsToDo will either be an exact multiple of the BlockSize,
325      or, for processing the final block, a value less than BlockSize
326    * all input buffers will be safe to access and have data available in
327      accord with their processing rates based on the given number of itemsToDo
328      of the principal input stream set; no further bounds checking is needed.
329    * all output buffers will be safe to access and have space available
330      for the given maximum output generation rates based on the given number
331      of blocksToDo of the principal input stream set; no further bounds checking
332      is needed.
333    * for final block processing, all input buffers will be extended to be safely
334      treated as containing data corresponding to a full block of the principal
335      input stream set, with the actual data in each buffer padded with null values
336      beyond the end of input.  Similarly, all output buffers will contain space
337      sufficient for the maximum output that can be generated for a full block of
338      input processing.
339    * input and output pointers will be typed to allow convenient and logical access
340      to corresponding streams based on their declared stream set type and processing rate.
341    * for any input pointer p, a GEP instruction with a single int32 index i
342      will produce a pointer to the buffer position corresponding to the ith block of the
343      principal input stream set.
344    * for any output stream set declared with a Fixed or Add1 processing rate with respect
345      to the principal input stream set, a GEP instruction with a single int32 index i
346      will produce a pointer to the buffer position corresponding to the ith block of the
347      principal input stream set.
348
349#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
350    processed and produced item counts are updated for all stream sets that have exact
351    processing rate attributes.   Programmers are responsible for updating the producedItemCount
352    of any stream set declared with a variable attribute (MaxRatio).
353
354#.  An important caveat is that buffer areas may change arbitrarily between
355    calls to the doMultiBlockMethod.   In no case should a kernel store a
356    buffer pointer in its internal state.   Furthermore a kernel must not make
357    any assumptions about the accessibility of stream set data outside of the
358    processing range outside of the block boundaries associated with the given itemsToDo.
359*/
360
361class MultiBlockKernel : public Kernel {
362protected:
363
364    MultiBlockKernel(std::string && kernelName,
365                     std::vector<Binding> && stream_inputs,
366                     std::vector<Binding> && stream_outputs,
367                     std::vector<Binding> && scalar_parameters,
368                     std::vector<Binding> && scalar_outputs,
369                     std::vector<Binding> && internal_scalars);
370
371    // Each multi-block kernel subtype must provide its own logic for handling
372    // doMultiBlock calls, subject to the requirements laid out above.
373    // The generateMultiBlockLogic must be written to generate this logic, given
374    // a created but empty function.  Upon entry to generateMultiBlockLogic,
375    // the builder insertion point will be set to the entry block; upone
376    // exit the RetVoid instruction will be added to complete the method.
377    //
378    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & idb) = 0;
379
380private:
381
382    // Given a kernel subtype with an appropriate interface, the generateDoSegment
383    // method of the multi-block kernel builder makes all the necessary arrangements
384    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
385    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) final;
386
387};
388
389
390}
391#endif
Note: See TracBrowser for help on using the repository browser.