source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 6135

Last change on this file since 6135 was 6047, checked in by nmedfort, 11 months ago

Major refactoring of buffer types. Static buffers replace Circular and CircularCopyback?. External buffers unify Source/External?.

File size: 21.8 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11
12namespace llvm { class AllocaInst; }
13namespace llvm { class BasicBlock; }
14namespace llvm { class Constant; }
15namespace llvm { class Function; }
16namespace llvm { class IntegerType; }
17namespace llvm { class IndirectBrInst; }
18namespace llvm { class PHINode; }
19namespace llvm { class LoadInst; }
20namespace llvm { class Type; }
21namespace llvm { class Value; }
22namespace parabix { class StreamSetBuffer; }
23
24namespace kernel {
25   
26class KernelBuilder;
27
28class Kernel : public KernelInterface {
29    friend class KernelBuilder;
30    friend class PipelineGenerator;
31public:
32   
33    enum class Port { Input, Output };
34    using StreamPort = std::pair<Port, unsigned>;
35    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
36    using KernelFieldMap = boost::container::flat_map<std::string, unsigned>;
37    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
38
39    // Kernel Signatures and Module IDs
40    //
41    // A kernel signature uniquely identifies a kernel and its full functionality.
42    // In the event that a particular kernel instance is to be generated and compiled
43    // to produce object code, and we have a cached kernel object code instance with
44    // the same signature and targetting the same IDISA architecture, then the cached
45    // object code may safely be used to avoid recompilation.
46    //
47    // A kernel signature is a byte string of arbitrary length.
48    //
49    // Kernel developers should take responsibility for designing appropriate signature
50    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
51    // based on the semantics of the kernel.
52    //
53    // If no other mechanism is available, the default makeSignature() method uses the
54    // full LLVM IR (before optimization) of the kernel instance.
55    //
56    // A kernel Module ID is short string that is used as a name for a particular kernel
57    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
58    // instances and so should be highly likely to uniquely identify a kernel instance.
59    //
60    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
61    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
62    //
63
64    //
65    // Kernel builder subtypes define their logic of kernel construction
66    // in terms of 3 virtual methods for
67    // (a) preparing the Kernel state data structure
68    // (c) defining the logic of the finalBlock function.
69    //
70    // Note: the kernel state data structure must only be finalized after
71    // all scalar fields have been added.   If there are no fields to
72    // be added, the default method for preparing kernel state may be used.
73
74
75    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
76
77    // Can the module ID itself serve as the unique signature?
78    virtual bool hasSignature() const { return true; }
79
80    bool isCachable() const override { return false; }
81
82    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
83
84    llvm::Module * setModule(llvm::Module * const module);
85
86    llvm::Module * makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb);
87
88    llvm::Module * getModule() const {
89        return mModule;
90    }
91
92    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
93   
94    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
95
96    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
97
98    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
99
100    StreamPort getStreamPort(const std::string & name) const;
101
102    const Binding & getBinding(const std::string & name) const;
103
104    ProcessingRate::RateValue getLowerBound(const ProcessingRate &rate) const;
105
106    ProcessingRate::RateValue getUpperBound(const ProcessingRate & rate) const;
107
108    bool requiresCopyBack(const Binding & binding) const;
109
110    bool strideOffsetIsTriviallyCalculable(const Binding & binding) const;
111
112    bool permitsNonLinearAccess(const Binding & binding) const;
113
114    bool requiresLinearAccess(const Binding & binding) const;
115
116    bool anyBindingRequiresLinearSpace() const;
117
118    const StreamSetBuffers & getStreamSetInputBuffers() const {
119        return mStreamSetInputBuffers;
120    }
121
122    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
123        assert (i < mStreamSetInputBuffers.size());
124        assert (mStreamSetInputBuffers[i]);
125        return mStreamSetInputBuffers[i];
126    }
127
128    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
129        const auto port = getStreamPort(name);
130        assert (port.first == Port::Input);
131        return getStreamSetInputBuffer(port.second);
132    }
133
134    const StreamSetBuffers & getStreamSetOutputBuffers() const {
135        return mStreamSetOutputBuffers;
136    }
137
138    const Binding & getStreamInput(const unsigned i) const {
139        return KernelInterface::getStreamInput(i);
140    }
141
142    const Binding & getStreamInput(const std::string & name) const {
143        const auto port = getStreamPort(name);
144        assert (port.first == Port::Input);
145        return KernelInterface::getStreamInput(port.second);
146    }
147
148    const Binding & getStreamInput(const parabix::StreamSetBuffer * const buffer) const {
149        for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
150            if (mStreamSetInputBuffers[i] == buffer) {
151                return getStreamInput(i);
152            }
153        }
154        throw std::runtime_error("no output binding found given buffer");
155    }
156
157    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
158        assert (i < mStreamSetOutputBuffers.size());
159        assert (mStreamSetOutputBuffers[i]);
160        return mStreamSetOutputBuffers[i];
161    }
162
163    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
164        const auto port = getStreamPort(name);
165        assert (port.first == Port::Output);
166        return getStreamSetOutputBuffer(port.second);
167    }
168
169    const Binding & getStreamOutput(const unsigned i) const {
170        return KernelInterface::getStreamOutput(i);
171    }
172
173    const Binding & getStreamOutput(const parabix::StreamSetBuffer * const buffer) const {
174        for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
175            if (mStreamSetOutputBuffers[i] == buffer) {
176                return getStreamOutput(i);
177            }
178        }
179        throw std::runtime_error("no output binding found given buffer");
180    }
181
182    const Binding & getStreamOutput(const std::string & name) const {
183        const auto port = getStreamPort(name);
184        assert (port.first == Port::Output);
185        return KernelInterface::getStreamOutput(port.second);
186    }
187   
188    // Kernels typically perform block-at-a-time processing, but some kernels may require
189    // a different stride.   In the case of multiblock kernels, the stride attribute
190    // determines the number of minimum number of items that will be provided to the kernel
191    // on each doMultiBlock call.
192    //
193   
194    unsigned getStride() const { return mStride; }
195   
196    virtual ~Kernel() = 0;
197
198    void prepareKernel(const std::unique_ptr<KernelBuilder> & b);
199
200    void prepareCachedKernel(const std::unique_ptr<KernelBuilder> & b);
201
202    std::string getCacheName(const std::unique_ptr<KernelBuilder> & b) const;
203
204    bool mayTerminateEarly() const {
205        return hasAttribute(kernel::Attribute::KindId::CanTerminateEarly);
206    }
207
208    bool mustExplicitlyTerminate() const {
209        return hasAttribute(kernel::Attribute::KindId::MustExplicitlyTerminate);
210    }
211
212protected:
213
214    virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> &) { }
215
216    void getDoSegmentFunctionArguments(const std::vector<llvm::Value *> & availItems) const;
217
218    // Constructor
219    Kernel(std::string && kernelName, Bindings && stream_inputs,
220          Bindings && stream_outputs,
221          Bindings && scalar_parameters,
222          Bindings && scalar_outputs,
223          Bindings && internal_scalars);
224
225    unsigned getScalarIndex(const std::string & name) const;
226
227    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
228
229    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> &) { }
230   
231    virtual void generateKernelMethod(const std::unique_ptr<KernelBuilder> &) = 0;
232
233    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> &) { }
234
235    // Add an additional scalar field to the KernelState struct.
236    // Must occur before any call to addKernelDeclarations or createKernelModule.
237    unsigned addScalar(llvm::Type * type, const std::string & name);
238
239    unsigned addUnnamedScalar(llvm::Type * type);
240
241    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b);
242
243    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b);
244
245    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b);
246
247    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
248        unsigned index; Port port;
249        std::tie(port, index) = getStreamPort(name);
250        if (port == Port::Input) {
251            return getStreamSetInputBuffer(index);
252        } else {
253            return getStreamSetOutputBuffer(index);
254        }
255    }
256
257    void setStride(unsigned stride) { mStride = stride; }
258
259    bool treatUnsafeKernelOperationsAsErrors() const { return mTreatUnsafeKernelOperationsAsErrors; }
260
261    bool mustClearOverflowPriorToCopyback(const Binding & binding) const;
262
263private:
264
265    void addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b);
266
267    llvm::Value * getAvailableItemCount(const unsigned i) const {
268        assert (i < mAvailableItemCount.size());
269        return mAvailableItemCount[i];
270    }
271
272    void verifyStreamSetDefinitions() const;
273
274protected:
275
276    llvm::Function *                    mCurrentMethod; 
277    unsigned                            mStride;
278    bool                                mTreatUnsafeKernelOperationsAsErrors;
279    llvm::Value *                       mIsFinal;
280    llvm::Value *                       mOutputScalarResult;
281    mutable bool                        mIsGenerated;
282
283    std::vector<llvm::Value *>          mAvailableItemCount;
284
285    KernelFieldMap                      mKernelFieldMap;
286    std::vector<llvm::Type *>           mKernelFields;
287
288    StreamMap                           mStreamMap;
289
290    StreamSetBuffers                    mStreamSetInputBuffers;
291    StreamSetBuffers                    mStreamSetOutputBuffers;
292};
293
294using Kernels = std::vector<Kernel *>;
295
296class SegmentOrientedKernel : public Kernel {
297protected:
298
299    SegmentOrientedKernel(std::string && kernelName,
300                          Bindings && stream_inputs,
301                          Bindings && stream_outputs,
302                          Bindings && scalar_parameters,
303                          Bindings && scalar_outputs,
304                          Bindings && internal_scalars);
305protected:
306
307    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
308
309    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) = 0;
310
311};
312
313/*
314The Multi-Block Kernel Builder
315------------------------------
316
317The Multi-Block Kernel Builder is designed to simplify the programming of
318efficient kernels with possibly variable and/or nonaligned output, subject to
319exact or MaxRatio processing constraints.   The following restrictions apply.
320
321#.  The input consists of one or more stream sets, the first of which is
322    known as the principal input stream set.
323
324#.  If there is more than one input stream set, the additional stream sets
325    are first classified as having either a derived processing rate or
326    a variable processing rate.   Stream sets with a derived processing rate
327    have a processing rate defined with respect to the input stream set of one
328    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
329    declared without a processing rate attribute have the FixedRate(1) attribute
330    by default and therefore satisfy this constraint.  All other processing rate
331    types are classified as variable rate.
332
333#.  All output stream sets must be declared with processing rate attributes
334    of one of the following types:
335    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
336    *  FixedRate with respect to some other output stream set.
337
338    When using the Multi-Block Kernel Builder to program a new type of kernel,
339    the programmer must implement the generateDoMultiBlockMethod for normal
340    multi-block processing according to the requirements below, as well as
341    providing for special final block processing, if necessary.
342
343#.  The doMultiBlockMethod will be called with the following parameters:
344    * the number of items of the principal input stream to process (itemsToDo),
345    * additional items available parameters for each additional input stream set
346      that is classified as a variable rate stream set
347    * pointers to linear contiguous buffer areas for each of the input stream sets, and
348    * pointers to linear contiguous output buffer areas for each of the output stream sets.
349
350    Notes:
351    * if the kernel has a Lookahead dependency declared on any input stream set, then
352      there will be two buffer pointers for that stream set, one for accessing stream set
353      items without lookahead and one for accessing the items with lookahead.   
354    * pointers are to the beginning of the block corresponding to the
355      processedItemCount or producedItemCount of the given stream set.
356    * the base type of each pointer is the StreamSetBlockType of that streamset
357
358#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
359    processed under the following simplifying assumptions.
360    * the number of itemsToDo will either be an exact multiple of the kernel stride,
361      or, for processing the final block, a value less than the kernel stride
362    * the input buffer of the principal stream set and all input buffers of stream sets
363      with derived processing rates will be safe to access and have data available in
364      accord with their processing rates based on the given number of itemsToDo
365      of the principal input stream set; no further bounds checking is needed. 
366    * input buffers of stream sets with MaxRatio attributes will be safe to access,
367      but will only have valid data as specified by the available items parameter for
368      that stream set.
369    * the kernel programmer is responsible for safe access and bounds checking for any
370      input stream set classified as Unknown rate.   No temporary buffers are used
371      for such stream sets.
372    * all output buffers will be safe to access and have space available
373      for the given maximum output generation rates based on the given number
374      of itemsToDo of the principal input stream set; no further bounds checking
375      is needed.
376    * for final block processing, all input buffers will be extended to be safely
377      treated as containing data corresponding to a full block of the principal
378      input stream set, with the actual data in each buffer padded with null values
379      beyond the end of input.  Similarly, all output buffers will contain space
380      sufficient for the maximum output that can be generated for a full block of
381      input processing.
382    * input and output pointers will be typed to allow convenient and logical access
383      to corresponding streams based on their declared stream set type and processing rate.
384    * for any input pointer p, a GEP instruction with a single int32 index i
385      will produce a pointer to the buffer position corresponding to the ith block of the
386      input stream set, relative to the initial block based on the processedItemCount.
387    * for any output stream set declared with a Fixed or Add1 processing rate with respect
388      to the principal input stream set, a GEP instruction with a single int32 index i
389      will produce a pointer to the buffer position corresponding to the ith block of the
390      stream set, relative to the initial block based on the producedItemCount.
391
392#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
393    processed and produced item counts are updated for all stream sets that have exact
394    processing rate attributes.   Programmers are responsible for updating the counts
395    of any stream set declared with a variable attribute (MaxRatio or Unknown).
396
397#.  An important caveat is that buffer areas may change arbitrarily between
398    calls to the doMultiBlockMethod.   In no case should a kernel store a
399    buffer pointer in its internal state.   Furthermore a kernel must not make
400    any assumptions about the accessibility of stream set data outside of the
401    processing range outside of the block boundaries associated with the given itemsToDo.
402*/
403
404class MultiBlockKernel : public Kernel {
405    friend class BlockOrientedKernel;
406protected:
407
408    MultiBlockKernel(std::string && kernelName,
409                     Bindings && stream_inputs,
410                     Bindings && stream_outputs,
411                     Bindings && scalar_parameters,
412                     Bindings && scalar_outputs,
413                     Bindings && internal_scalars);
414
415    // Each multi-block kernel subtype must provide its own logic for handling
416    // doMultiBlock calls, subject to the requirements laid out above.
417    // The generateMultiBlockLogic must be written to generate this logic, given
418    // a created but empty function.  Upon entry to generateMultiBlockLogic,
419    // the builder insertion point will be set to the entry block; upone
420    // exit the RetVoid instruction will be added to complete the method.
421    //
422    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) = 0;
423
424private:
425
426    // Given a kernel subtype with an appropriate interface, the generateDoSegment
427    // method of the multi-block kernel builder makes all the necessary arrangements
428    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
429    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
430
431    void writeMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b);
432
433    void checkInputStream(const std::unique_ptr<KernelBuilder> & b, const unsigned index);
434
435    void checkOutputStream(const std::unique_ptr<KernelBuilder> & b, const unsigned index);
436
437    llvm::Value * computePopCountRate(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, llvm::Value * const maxItems);
438
439    llvm::Value * getPopCountRateItems(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, llvm::Value * const strideIndex);
440
441    void prepareOverflowBuffers(const std::unique_ptr<KernelBuilder> & b);
442
443    void writeCopyBackLogic(const std::unique_ptr<KernelBuilder> & b);
444
445    void calculateDerivedItemCounts(const std::unique_ptr<KernelBuilder> & b);
446
447    void updateDerivedItemCounts(const std::unique_ptr<KernelBuilder> & b);
448
449    llvm::Value * hasAnotherStride(const std::unique_ptr<KernelBuilder> & b);
450
451    void updateFinalDerivedItemCounts(const std::unique_ptr<KernelBuilder> & b);
452
453    void checkTerminationSignal(const std::unique_ptr<KernelBuilder> & b);
454
455    static bool hasDerivedItemCount(const Binding & binding);
456
457protected:
458
459    llvm::Value *                   mInitiallyFinal;
460    llvm::Value *                   mNumOfStrides;
461    llvm::Value *                   mNumOfStridesInFinalSegment;
462
463    std::vector<llvm::Value *>      mInitialAvailableItemCount;
464
465    std::vector<llvm::Value *>      mInitialProcessedItemCount;
466    std::vector<llvm::Value *>      mAccessibleInputItems;
467    std::vector<llvm::Value *>      mInputStrideLength;
468    std::vector<llvm::Value *>      mPopCountRateArray;
469
470    std::vector<llvm::Value *>      mInitialProducedItemCount;
471    std::vector<llvm::Value *>      mWritableOutputItems;
472    std::vector<llvm::Value *>      mOutputStrideLength;
473};
474
475
476class BlockOrientedKernel : public MultiBlockKernel {
477protected:
478
479    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & b);
480
481    // Each kernel builder subtype must provide its own logic for generating
482    // doBlock calls.
483    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) = 0;
484
485    // Each kernel builder subtypre must also specify the logic for processing the
486    // final block of stream data, if there is any special processing required
487    // beyond simply calling the doBlock function.   In the case that the final block
488    // processing may be trivially implemented by dispatching to the doBlock method
489    // without additional preparation, the default generateFinalBlockMethod need
490    // not be overridden.
491
492    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
493
494    BlockOrientedKernel(std::string && kernelName,
495                        Bindings && stream_inputs,
496                        Bindings && stream_outputs,
497                        Bindings && scalar_parameters,
498                        Bindings && scalar_outputs,
499                        Bindings && internal_scalars);
500
501    llvm::Value * getRemainingItems(const std::unique_ptr<KernelBuilder> & b);
502
503private:
504
505    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
506
507    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & b);
508
509    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
510
511    llvm::Value * incrementDerivedItemCounts(const std::unique_ptr<KernelBuilder> & b);
512
513private:
514
515    llvm::Function *            mDoBlockMethod;
516    llvm::BasicBlock *          mStrideLoopBody;
517    llvm::IndirectBrInst *      mStrideLoopBranch;
518    llvm::PHINode *             mStrideLoopTarget;
519    llvm::PHINode *             mStrideBlockIndex;
520};
521
522}
523
524#endif
Note: See TracBrowser for help on using the repository browser.