source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5441

Last change on this file since 5441 was 5441, checked in by cameron, 2 years ago

Reimplement StdoutKernel? using MultiBlockKernel?

File size: 16.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11#include <IR_Gen/idisa_builder.h>
12#include <toolchain/pipeline.h>
13#include <llvm/IR/Constants.h>
14
15namespace llvm { class Function; }
16namespace llvm { class IntegerType; }
17namespace llvm { class LoadInst; }
18namespace llvm { class Type; }
19namespace llvm { class Value; }
20namespace parabix { class StreamSetBuffer; }
21
22namespace kernel {
23   
24class KernelBuilder;
25
26class Kernel : public KernelInterface {
27    friend class KernelBuilder;
28protected:
29    using KernelMap = boost::container::flat_map<std::string, unsigned>;
30    enum class Port { Input, Output };
31    using StreamPort = std::pair<Port, unsigned>;
32    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
33    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
34    using Kernels = std::vector<Kernel *>;
35
36    static const std::string DO_BLOCK_SUFFIX;
37    static const std::string FINAL_BLOCK_SUFFIX;
38    static const std::string MULTI_BLOCK_SUFFIX;
39    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
40    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
41    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
42    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
43    static const std::string TERMINATION_SIGNAL;
44    static const std::string BUFFER_PTR_SUFFIX;
45    static const std::string CONSUMER_SUFFIX;
46
47public:
48   
49    // Kernel Signatures and Module IDs
50    //
51    // A kernel signature uniquely identifies a kernel and its full functionality.
52    // In the event that a particular kernel instance is to be generated and compiled
53    // to produce object code, and we have a cached kernel object code instance with
54    // the same signature and targetting the same IDISA architecture, then the cached
55    // object code may safely be used to avoid recompilation.
56    //
57    // A kernel signature is a byte string of arbitrary length.
58    //
59    // Kernel developers should take responsibility for designing appropriate signature
60    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
61    // based on the semantics of the kernel. 
62    //
63    // If no other mechanism is available, the default generateKernelSignature() method
64    // uses the full LLVM IR (before optimization) of the kernel instance.
65    //
66    // A kernel Module ID is short string that is used as a name for a particular kernel
67    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel instances
68    // and so should be highly likely to uniquely identify a kernel instance.
69    //
70    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
71    // guaranteeing uniqueness.  In this case, the moduleIDisUnique() method
72    // should return true.
73    //
74       
75    bool isCachable() const override { return false; }
76
77    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
78
79    // Can the module ID itself serve as the unique signature?
80    virtual bool moduleIDisSignature() const { return false; }
81
82    // Create a module stub for the kernel, populated only with its Module ID.     
83    //
84
85    void createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
86
87    void createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, llvm::Module * const kernelModule);
88
89    llvm::Module * getModule() const {
90        return mModule;
91    }
92
93    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
94   
95    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
96
97    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
98
99    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
100
101    bool hasNoTerminateAttribute() const {
102        return mNoTerminateAttribute;
103    }
104
105    const StreamSetBuffers & getStreamSetInputBuffers() const {
106        return mStreamSetInputBuffers;
107    }
108
109    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
110        return mStreamSetInputBuffers[i];
111    }
112
113    const StreamSetBuffers & getStreamSetOutputBuffers() const {
114        return mStreamSetOutputBuffers;
115    }
116
117    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
118        return mStreamSetOutputBuffers[i];
119    }
120
121    virtual ~Kernel() = 0;
122
123protected:
124
125    // Constructor
126    Kernel(std::string && kernelName,
127                  std::vector<Binding> && stream_inputs,
128                  std::vector<Binding> && stream_outputs,
129                  std::vector<Binding> && scalar_parameters,
130                  std::vector<Binding> && scalar_outputs,
131                  std::vector<Binding> && internal_scalars);
132
133    //
134    // Kernel builder subtypes define their logic of kernel construction
135    // in terms of 3 virtual methods for
136    // (a) preparing the Kernel state data structure
137    // (b) defining the logic of the doBlock function, and
138    // (c) defining the logic of the finalBlock function.
139    //
140    // Note: the kernel state data structure must only be finalized after
141    // all scalar fields have been added.   If there are no fields to
142    // be added, the default method for preparing kernel state may be used.
143
144    void setNoTerminateAttribute(const bool noTerminate = true) {
145        mNoTerminateAttribute = noTerminate;
146    }
147
148    unsigned getScalarIndex(const std::string & name) const;
149
150    void prepareStreamSetNameMap();
151
152    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
153
154    virtual void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
155
156    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
157   
158    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
159
160    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
161
162    // Add an additional scalar field to the KernelState struct.
163    // Must occur before any call to addKernelDeclarations or createKernelModule.
164    unsigned addScalar(llvm::Type * type, const std::string & name);
165
166    unsigned addUnnamedScalar(llvm::Type * type);
167
168    llvm::Value * getIsFinal() const {
169        return mIsFinal;
170    }
171
172    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
173
174    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
175
176    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
177
178    StreamPort getStreamPort(const std::string & name) const;
179
180    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
181        const auto port = getStreamPort(name);
182        assert (port.first == Port::Input);
183        assert (port.second < mStreamSetInputBuffers.size());
184        return mStreamSetInputBuffers[port.second];
185    }
186
187    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
188        const auto port = getStreamPort(name);
189        assert (port.first == Port::Output);
190        assert (port.second < mStreamSetOutputBuffers.size());
191        return mStreamSetOutputBuffers[port.second];
192    }
193
194    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
195        unsigned index; Port port;
196        std::tie(port, index) = getStreamPort(name);
197        if (port == Port::Input) {
198            assert (index < mStreamSetInputBuffers.size());
199            return mStreamSetInputBuffers[index];
200        } else {
201            assert (index < mStreamSetOutputBuffers.size());
202            return mStreamSetOutputBuffers[index];
203        }
204    }
205
206private:
207
208    llvm::Value * getAvailableItemCount(const unsigned i) const {
209        return mAvailableItemCount[i];
210    }
211
212protected:
213
214    llvm::Function *                    mCurrentMethod;
215    bool                                mNoTerminateAttribute;
216    bool                                mIsGenerated;
217
218    llvm::Value *                       mIsFinal;
219    std::vector<llvm::Value *>          mAvailableItemCount;
220    llvm::Value *                       mOutputScalarResult;
221
222
223    std::vector<llvm::Type *>           mKernelFields;
224    KernelMap                           mKernelMap;
225    StreamMap                           mStreamMap;
226    StreamSetBuffers                    mStreamSetInputBuffers;
227    StreamSetBuffers                    mStreamSetOutputBuffers;
228
229};
230
231class SegmentOrientedKernel : public Kernel {
232protected:
233
234    SegmentOrientedKernel(std::string && kernelName,
235                          std::vector<Binding> && stream_inputs,
236                          std::vector<Binding> && stream_outputs,
237                          std::vector<Binding> && scalar_parameters,
238                          std::vector<Binding> && scalar_outputs,
239                          std::vector<Binding> && internal_scalars);
240
241};
242
243class BlockOrientedKernel : public Kernel {
244protected:
245
246    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb);
247
248    // Each kernel builder subtype must provide its own logic for generating
249    // doBlock calls.
250    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) = 0;
251
252    // Each kernel builder subtypre must also specify the logic for processing the
253    // final block of stream data, if there is any special processing required
254    // beyond simply calling the doBlock function.   In the case that the final block
255    // processing may be trivially implemented by dispatching to the doBlock method
256    // without additional preparation, the default generateFinalBlockMethod need
257    // not be overridden.
258
259    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
260
261    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
262
263    BlockOrientedKernel(std::string && kernelName,
264                        std::vector<Binding> && stream_inputs,
265                        std::vector<Binding> && stream_outputs,
266                        std::vector<Binding> && scalar_parameters,
267                        std::vector<Binding> && scalar_outputs,
268                        std::vector<Binding> && internal_scalars);
269
270private:
271
272    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb);
273
274    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
275
276private:
277
278    llvm::Function *        mDoBlockMethod;
279    llvm::BasicBlock *      mStrideLoopBody;
280    llvm::IndirectBrInst *  mStrideLoopBranch;
281    llvm::PHINode *         mStrideLoopTarget;
282};
283
284/*
285The Multi-Block Kernel Builder
286------------------------------
287
288The Multi-Block Kernel Builder is designed to simplify the programming of
289efficient kernels with possibly variable and/or nonaligned output, subject to
290exact or MaxRatio processing constraints.   The following restrictions apply.
291
292#.  The input consists of one or more stream sets, the first of which is
293    known as the principal input stream set.
294
295#.  If there is more than one input stream set, the additional stream sets must
296    have a processing rate defined with respect to the input stream set of one
297    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
298    declared without a processing rate attribute have the FixedRate(1) attribute
299    by default and therefore satisfy this constraint.
300
301#.  All output stream sets must be declared with processing rate attributes
302    of one of the following types:
303    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
304    *  FixedRate with respect to some other output stream set.
305
306    When using the Multi-Block Kernel Builder to program a new type of kernel,
307    the programmer must implement the generateDoMultiBlockMethod for normal
308    multi-block processing according to the requirements below, as well as
309    providing for special final block processing, if necessary.
310
311#.  The doMultiBlockMethod will be called with the following parameters:
312    * the number of items of the principal input stream to process (itemsToDo),
313    * pointers to linear contiguous buffer areas for each of the input stream sets, and
314    * pointers to linear contiguous output buffer areas for each of the output stream sets.
315    * pointers are to the address of the first item of the first stream of the stream set.
316
317#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
318    processed under the following simplifying assumptions.
319    * the number of itemsToDo will either be an exact multiple of the BlockSize,
320      or, for processing the final block, a value less than BlockSize
321    * all input buffers will be safe to access and have data available in
322      accord with their processing rates based on the given number of itemsToDo
323      of the principal input stream set; no further bounds checking is needed.
324    * all output buffers will be safe to access and have space available
325      for the given maximum output generation rates based on the given number
326      of blocksToDo of the principal input stream set; no further bounds checking
327      is needed.
328    * for final block processing, all input buffers will be extended to be safely
329      treated as containing data corresponding to a full block of the principal
330      input stream set, with the actual data in each buffer padded with null values
331      beyond the end of input.  Similarly, all output buffers will contain space
332      sufficient for the maximum output that can be generated for a full block of
333      input processing.
334    * input and output pointers will be typed to allow convenient and logical access
335      to corresponding streams based on their declared stream set type and processing rate.
336    * for any input pointer p, a GEP instruction with a single int32 index i
337      will produce a pointer to the buffer position corresponding to the ith block of the
338      principal input stream set.
339    * for any output stream set declared with a Fixed or Add1 processing rate with respect
340      to the principal input stream set, a GEP instruction with a single int32 index i
341      will produce a pointer to the buffer position corresponding to the ith block of the
342      principal input stream set.
343
344#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
345    processed and produced item counts are updated for all stream sets that have exact
346    processing rate attributes.   Programmers are responsible for updating the producedItemCount
347    of any stream set declared with a variable attribute (MaxRatio).
348
349#.  An important caveat is that buffer areas may change arbitrarily between
350    calls to the doMultiBlockMethod.   In no case should a kernel store a
351    buffer pointer in its internal state.   Furthermore a kernel must not make
352    any assumptions about the accessibility of stream set data outside of the
353    processing range outside of the block boundaries associated with the given itemsToDo.
354*/
355
356class MultiBlockKernel : public Kernel {
357protected:
358
359    MultiBlockKernel(std::string && kernelName,
360                     std::vector<Binding> && stream_inputs,
361                     std::vector<Binding> && stream_outputs,
362                     std::vector<Binding> && scalar_parameters,
363                     std::vector<Binding> && scalar_outputs,
364                     std::vector<Binding> && internal_scalars);
365
366    // Each multi-block kernel subtype must provide its own logic for handling
367    // doMultiBlock calls, subject to the requirements laid out above.
368    // The generateMultiBlockLogic must be written to generate this logic, given
369    // a created but empty function.  Upon entry to generateMultiBlockLogic,
370    // the builder insertion point will be set to the entry block; upone
371    // exit the RetVoid instruction will be added to complete the method.
372    //
373    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & idb) = 0;
374
375private:
376
377    // Given a kernel subtype with an appropriate interface, the generateDoSegment
378    // method of the multi-block kernel builder makes all the necessary arrangements
379    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
380    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
381
382};
383
384
385}
386#endif
Note: See TracBrowser for help on using the repository browser.