source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5439

Last change on this file since 5439 was 5439, checked in by cameron, 2 years ago

Multiblock Kernels: initial check-in

File size: 20.3 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11#include <IR_Gen/idisa_builder.h>
12#include <toolchain/pipeline.h>
13#include <llvm/IR/Constants.h>
14
15namespace llvm { class Function; }
16namespace llvm { class IntegerType; }
17namespace llvm { class LoadInst; }
18namespace llvm { class Type; }
19namespace llvm { class Value; }
20namespace parabix { class StreamSetBuffer; }
21
22namespace kernel {
23   
24class KernelBuilder;
25
26class Kernel : public KernelInterface {
27protected:
28    using KernelMap = boost::container::flat_map<std::string, unsigned>;
29    enum class Port { Input, Output };
30    using StreamPort = std::pair<Port, unsigned>;
31    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
32    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
33    using Kernels = std::vector<Kernel *>;
34
35    friend class KernelBuilder;
36    friend void ::generateSegmentParallelPipeline(const std::unique_ptr<kernel::KernelBuilder> &, const Kernels &);
37    friend void ::generatePipelineLoop(const std::unique_ptr<kernel::KernelBuilder> &, const Kernels &);
38    friend void ::generateParallelPipeline(const std::unique_ptr<kernel::KernelBuilder> &, const Kernels &);
39
40    static const std::string DO_BLOCK_SUFFIX;
41    static const std::string FINAL_BLOCK_SUFFIX;
42    static const std::string MULTI_BLOCK_SUFFIX;
43    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
44    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
45    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
46    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
47    static const std::string TERMINATION_SIGNAL;
48    static const std::string BUFFER_PTR_SUFFIX;
49    static const std::string CONSUMER_SUFFIX;
50
51public:
52   
53    // Kernel Signatures and Module IDs
54    //
55    // A kernel signature uniquely identifies a kernel and its full functionality.
56    // In the event that a particular kernel instance is to be generated and compiled
57    // to produce object code, and we have a cached kernel object code instance with
58    // the same signature and targetting the same IDISA architecture, then the cached
59    // object code may safely be used to avoid recompilation.
60    //
61    // A kernel signature is a byte string of arbitrary length.
62    //
63    // Kernel developers should take responsibility for designing appropriate signature
64    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
65    // based on the semantics of the kernel. 
66    //
67    // If no other mechanism is available, the default generateKernelSignature() method
68    // uses the full LLVM IR (before optimization) of the kernel instance.
69    //
70    // A kernel Module ID is short string that is used as a name for a particular kernel
71    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel instances
72    // and so should be highly likely to uniquely identify a kernel instance.
73    //
74    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
75    // guaranteeing uniqueness.  In this case, the moduleIDisUnique() method
76    // should return true.
77    //
78       
79    bool isCachable() const override { return false; }
80
81    std::string makeSignature() override;
82
83    // Can the module ID itself serve as the unique signature?
84    virtual bool moduleIDisSignature() const { return false; }
85
86    // Create a module stub for the kernel, populated only with its Module ID.     
87    //
88
89    void createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
90
91    void createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, llvm::Module * const kernelModule);
92
93    llvm::Module * getModule() const {
94        return mModule;
95    }
96
97    // Generate the Kernel to the current module (iBuilder->getModule()).
98    void generateKernel();
99   
100    llvm::Value * createInstance() final;
101
102    void initializeInstance() final;
103
104    void finalizeInstance() final;
105
106    llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const final;
107
108    void setProducedItemCount(const std::string & name, llvm::Value * value) const final;
109
110    llvm::Value * getProcessedItemCount(const std::string & name) const final;
111
112    void setProcessedItemCount(const std::string & name, llvm::Value * value) const final;
113
114    llvm::Value * getConsumedItemCount(const std::string & name) const final;
115
116    void setConsumedItemCount(const std::string & name, llvm::Value * value) const final;
117
118    llvm::Value * getTerminationSignal() const final;
119
120    void setTerminationSignal() const final;
121
122    // Get the value of a scalar field for the current instance.
123    llvm::Value * getScalarFieldPtr(llvm::Value * index) const;
124
125    llvm::Value * getScalarFieldPtr(const std::string & fieldName) const;
126
127    llvm::Value * getScalarField(const std::string & fieldName) const;
128
129    // Set the value of a scalar field for the current instance.
130    void setScalarField(const std::string & fieldName, llvm::Value * value) const;
131
132    // Synchronization actions for executing a kernel for a particular logical segment.
133    //
134    // Before the segment is processed, acquireLogicalSegmentNo must be used to load
135    // the segment number of the kernel state to ensure that the previous segment is
136    // complete (by checking that the acquired segment number is equal to the desired segment
137    // number).
138    // After all segment processing actions for the kernel are complete, and any necessary
139    // data has been extracted from the kernel for further pipeline processing, the
140    // segment number must be incremented and stored using releaseLogicalSegmentNo.
141    llvm::LoadInst * acquireLogicalSegmentNo() const;
142
143    void releaseLogicalSegmentNo(llvm::Value * nextSegNo) const;
144
145    bool hasNoTerminateAttribute() const {
146        return mNoTerminateAttribute;
147    }
148
149    const StreamSetBuffers & getStreamSetInputBuffers() const {
150        return mStreamSetInputBuffers;
151    }
152
153    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
154        return mStreamSetInputBuffers[i];
155    }
156
157    const StreamSetBuffers & getStreamSetOutputBuffers() const {
158        return mStreamSetOutputBuffers;
159    }
160
161    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
162        return mStreamSetOutputBuffers[i];
163    }
164
165    llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
166
167    llvm::Value * getAccumulator(const std::string & accumName) const;
168
169    virtual ~Kernel() = 0;
170
171protected:
172
173    // Constructor
174    Kernel(std::string && kernelName,
175                  std::vector<Binding> && stream_inputs,
176                  std::vector<Binding> && stream_outputs,
177                  std::vector<Binding> && scalar_parameters,
178                  std::vector<Binding> && scalar_outputs,
179                  std::vector<Binding> && internal_scalars);
180
181    //
182    // Kernel builder subtypes define their logic of kernel construction
183    // in terms of 3 virtual methods for
184    // (a) preparing the Kernel state data structure
185    // (b) defining the logic of the doBlock function, and
186    // (c) defining the logic of the finalBlock function.
187    //
188    // Note: the kernel state data structure must only be finalized after
189    // all scalar fields have been added.   If there are no fields to
190    // be added, the default method for preparing kernel state may be used.
191
192    void setNoTerminateAttribute(const bool noTerminate = true) {
193        mNoTerminateAttribute = noTerminate;
194    }
195
196    void prepareStreamSetNameMap();
197
198    void linkExternalMethods() override { }
199
200    virtual void prepareKernel();
201
202    virtual void generateInitializeMethod() { }
203   
204    virtual void generateDoSegmentMethod() = 0;
205
206    virtual void generateFinalizeMethod() { }
207
208    // Add an additional scalar field to the KernelState struct.
209    // Must occur before any call to addKernelDeclarations or createKernelModule.
210    unsigned addScalar(llvm::Type * type, const std::string & name);
211
212    unsigned addUnnamedScalar(llvm::Type * type);
213
214    // Run-time access of Kernel State and parameters of methods for
215    // use in implementing kernels.
216   
217    // Get the index of a named scalar field within the kernel state struct.
218    unsigned getScalarIndex(const std::string & name) const;
219
220    llvm::Value * getInputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) const;
221
222    llvm::Value * loadInputStreamBlock(const std::string & name, llvm::Value * streamIndex) const;
223   
224    llvm::Value * getInputStreamPackPtr(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
225   
226    llvm::Value * loadInputStreamPack(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
227   
228    llvm::Value * getInputStreamSetCount(const std::string & name) const;
229
230    llvm::Value * getOutputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) const;
231   
232    void storeOutputStreamBlock(const std::string & name, llvm::Value * streamIndex, llvm::Value * toStore) const;
233   
234    llvm::Value * getOutputStreamPackPtr(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
235   
236    void storeOutputStreamPack(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex, llvm::Value * toStore) const;
237
238    llvm::Value * getOutputStreamSetCount(const std::string & name) const;
239
240    llvm::Value * getAdjustedInputStreamBlockPtr(llvm::Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const;
241
242    llvm::Value * getRawInputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
243
244    llvm::Value * getRawOutputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
245
246    llvm::Value * getBaseAddress(const std::string & name) const;
247
248    void setBaseAddress(const std::string & name, llvm::Value * addr) const;
249
250    llvm::Value * getBufferedSize(const std::string & name) const;
251
252    void setBufferedSize(const std::string & name, llvm::Value * size) const;
253
254    void reserveBytes(const std::string & name, llvm::Value * requested) const;
255
256    llvm::Value * getAvailableItemCount(const std::string & name) const;
257
258    llvm::Value * getLinearlyAccessibleItems(const std::string & name, llvm::Value * fromPosition) const;
259
260    llvm::BasicBlock * CreateWaitForConsumers() const;
261
262    llvm::BasicBlock * CreateBasicBlock(std::string && name) const;
263
264    llvm::Value * getStreamSetBufferPtr(const std::string & name) const;
265
266    llvm::Value * getIsFinal() const {
267        return mIsFinal;
268    }
269
270    void callGenerateInitializeMethod();
271
272    void callGenerateDoSegmentMethod();
273
274    void callGenerateFinalizeMethod();
275
276    StreamPort getStreamPort(const std::string & name) const;
277
278    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
279        const auto port = getStreamPort(name);
280        assert (port.first == Port::Input);
281        assert (port.second < mStreamSetInputBuffers.size());
282        return mStreamSetInputBuffers[port.second];
283    }
284
285    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
286        const auto port = getStreamPort(name);
287        assert (port.first == Port::Output);
288        assert (port.second < mStreamSetOutputBuffers.size());
289        return mStreamSetOutputBuffers[port.second];
290    }
291
292    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
293        unsigned index; Port port;
294        std::tie(port, index) = getStreamPort(name);
295        if (port == Port::Input) {
296            assert (index < mStreamSetInputBuffers.size());
297            return mStreamSetInputBuffers[index];
298        } else {
299            assert (index < mStreamSetOutputBuffers.size());
300            return mStreamSetOutputBuffers[index];
301        }
302    }
303
304private:
305
306    llvm::Value * getConsumerLock(const std::string & name) const;
307
308    void setConsumerLock(const std::string & name, llvm::Value * value) const;
309
310    llvm::Value * computeBlockIndex(const std::vector<Binding> & binding, const std::string & name, llvm::Value * itemCount) const;
311
312protected:
313
314    llvm::Function *                    mCurrentMethod;
315    bool                                mNoTerminateAttribute;
316    bool                                mIsGenerated;
317
318    llvm::Value *                       mIsFinal;
319    std::vector<llvm::Value *>          mAvailableItemCount;
320    llvm::Value *                       mOutputScalarResult;
321
322
323    std::vector<llvm::Type *>           mKernelFields;
324    KernelMap                           mKernelMap;
325    StreamMap                           mStreamMap;
326    StreamSetBuffers                    mStreamSetInputBuffers;
327    StreamSetBuffers                    mStreamSetOutputBuffers;
328
329};
330
331class SegmentOrientedKernel : public Kernel {
332protected:
333
334    SegmentOrientedKernel(std::string && kernelName,
335                          std::vector<Binding> && stream_inputs,
336                          std::vector<Binding> && stream_outputs,
337                          std::vector<Binding> && scalar_parameters,
338                          std::vector<Binding> && scalar_outputs,
339                          std::vector<Binding> && internal_scalars);
340
341};
342
343class BlockOrientedKernel : public Kernel {
344protected:
345
346    void CreateDoBlockMethodCall();
347
348    // Each kernel builder subtype must provide its own logic for generating
349    // doBlock calls.
350    virtual void generateDoBlockMethod() = 0;
351
352    // Each kernel builder subtypre must also specify the logic for processing the
353    // final block of stream data, if there is any special processing required
354    // beyond simply calling the doBlock function.   In the case that the final block
355    // processing may be trivially implemented by dispatching to the doBlock method
356    // without additional preparation, the default generateFinalBlockMethod need
357    // not be overridden.
358
359    virtual void generateFinalBlockMethod(llvm::Value * remainingItems);
360
361    void generateDoSegmentMethod() override final;
362
363    BlockOrientedKernel(std::string && kernelName,
364                        std::vector<Binding> && stream_inputs,
365                        std::vector<Binding> && stream_outputs,
366                        std::vector<Binding> && scalar_parameters,
367                        std::vector<Binding> && scalar_outputs,
368                        std::vector<Binding> && internal_scalars);
369
370private:
371
372    virtual bool useIndirectBr() const;
373
374    void writeDoBlockMethod();
375
376    void writeFinalBlockMethod(llvm::Value * remainingItems);
377
378private:
379
380    llvm::Function *        mDoBlockMethod;
381    llvm::BasicBlock *      mStrideLoopBody;
382    llvm::IndirectBrInst *  mStrideLoopBranch;
383    llvm::PHINode *         mStrideLoopTarget;
384};
385
386/*   
387The Multi-Block Kernel Builder
388------------------------------
389
390The Multi-Block Kernel Builder is designed to simplify the programming of
391efficient kernels with possibly variable and/or nonaligned output, subject to
392exact or MaxRatio processing constraints.   The following restrictions apply.
393   
394#.  The input consists of one or more stream sets, the first of which is
395    known as the principal input stream set. 
396   
397#.  If there is more than one input stream set, the additional stream sets must
398    have a processing rate defined with respect to the input stream set of one
399    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
400    declared without a processing rate attribute have the FixedRate(1) attribute
401    by default and therefore satisfy this constraint.
402   
403#.  All output stream sets must be declared with processing rate attributes
404    of one of the following types:
405    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
406    *  FixedRate with respect to some other output stream set.
407   
408    When using the Multi-Block Kernel Builder to program a new type of kernel,
409    the programmer must implement the generateDoMultiBlockMethod for normal
410    multi-block processing according to the requirements below, as well as
411    providing for special final block processing, if necessary.
412           
413#.  The doMultiBlockMethod will be called with the following parameters:
414    * the number of items of the principal input stream to process (itemsToDo),
415    * pointers to linear contiguous buffer areas for each of the input stream sets, and
416    * pointers to linear contiguous output buffer areas for each of the output stream sets.
417    * pointers are to the address of the first item of the first stream of the stream set.
418
419#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
420    processed under the following simplifying assumptions.
421    * the number of itemsToDo will either be an exact multiple of the BlockSize,
422      or, for processing the final block, a value less than BlockSize
423    * all input buffers will be safe to access and have data available in
424      accord with their processing rates based on the given number of itemsToDo
425      of the principal input stream set; no further bounds checking is needed.
426    * all output buffers will be safe to access and have space available
427      for the given maximum output generation rates based on the given number
428      of blocksToDo of the principal input stream set; no further bounds checking
429      is needed.
430    * for final block processing, all input buffers will be extended to be safely
431      treated as containing data corresponding to a full block of the principal
432      input stream set, with the actual data in each buffer padded with null values
433      beyond the end of input.  Similarly, all output buffers will contain space
434      sufficient for the maximum output that can be generated for a full block of
435      input processing.
436    * input and output pointers will be typed to allow convenient and logical access
437      to corresponding streams based on their declared stream set type and processing rate.
438    * for any input pointer p, a GEP instruction with a single int32 index i
439      will produce a pointer to the buffer position corresponding to the ith block of the
440      principal input stream set. 
441    * for any output stream set declared with a Fixed or Add1 processing rate with respect
442      to the principal input stream set, a GEP instruction with a single int32 index i
443      will produce a pointer to the buffer position corresponding to the ith block of the
444      principal input stream set.
445                   
446#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
447    processed and produced item counts are updated for all stream sets that have exact
448    processing rate attributes.   Programmers are responsible for updating the producedItemCount
449    of any stream set declared with a variable attribute (MaxRatio).
450                           
451#.  An important caveat is that buffer areas may change arbitrarily between
452    calls to the doMultiBlockMethod.   In no case should a kernel store a
453    buffer pointer in its internal state.   Furthermore a kernel must not make
454    any assumptions about the accessibility of stream set data outside of the
455    processing range outside of the block boundaries associated with the given itemsToDo.
456*/
457
458class MultiBlockKernel : public Kernel {
459protected:
460
461    MultiBlockKernel(std::string && kernelName,
462                     std::vector<Binding> && stream_inputs,
463                     std::vector<Binding> && stream_outputs,
464                     std::vector<Binding> && scalar_parameters,
465                     std::vector<Binding> && scalar_outputs,
466                     std::vector<Binding> && internal_scalars);
467
468    // Each multi-block kernel subtype must provide its own logic for handling
469    // doMultiBlock calls, subject to the requirements laid out above.
470    // The generateMultiBlockLogic must be written to generate this logic, given
471    // a created but empty function.  Upon entry to generateMultiBlockLogic,
472    // the builder insertion point will be set to the entry block; upone
473    // exit the RetVoid instruction will be added to complete the method.
474    //
475    virtual void generateMultiBlockLogic () = 0;
476
477    // Given a kernel subtype with an appropriate interface, the generateDoSegment
478    // method of the multi-block kernel builder makes all the necessary arrangements
479    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
480    void generateDoSegmentMethod() override final;
481};
482   
483   
484}
485#endif
Note: See TracBrowser for help on using the repository browser.