source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 6034

Last change on this file since 6034 was 5998, checked in by nmedfort, 17 months ago

Added temporary buffer functionality to the pipeline for single stream source buffers. Fixed memory leak from UCD::UnicodeBreakRE()

File size: 21.9 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11
12namespace llvm { class AllocaInst; }
13namespace llvm { class BasicBlock; }
14namespace llvm { class Constant; }
15namespace llvm { class Function; }
16namespace llvm { class IntegerType; }
17namespace llvm { class IndirectBrInst; }
18namespace llvm { class PHINode; }
19namespace llvm { class LoadInst; }
20namespace llvm { class Type; }
21namespace llvm { class Value; }
22namespace parabix { class StreamSetBuffer; }
23
24namespace kernel {
25   
26class KernelBuilder;
27
28class Kernel : public KernelInterface {
29    friend class KernelBuilder;
30    friend class PipelineGenerator;
31public:
32   
33    enum class Port { Input, Output };
34    using StreamPort = std::pair<Port, unsigned>;
35    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
36    using KernelFieldMap = boost::container::flat_map<std::string, unsigned>;
37    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
38
39    // Kernel Signatures and Module IDs
40    //
41    // A kernel signature uniquely identifies a kernel and its full functionality.
42    // In the event that a particular kernel instance is to be generated and compiled
43    // to produce object code, and we have a cached kernel object code instance with
44    // the same signature and targetting the same IDISA architecture, then the cached
45    // object code may safely be used to avoid recompilation.
46    //
47    // A kernel signature is a byte string of arbitrary length.
48    //
49    // Kernel developers should take responsibility for designing appropriate signature
50    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
51    // based on the semantics of the kernel.
52    //
53    // If no other mechanism is available, the default makeSignature() method uses the
54    // full LLVM IR (before optimization) of the kernel instance.
55    //
56    // A kernel Module ID is short string that is used as a name for a particular kernel
57    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
58    // instances and so should be highly likely to uniquely identify a kernel instance.
59    //
60    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
61    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
62    //
63
64    //
65    // Kernel builder subtypes define their logic of kernel construction
66    // in terms of 3 virtual methods for
67    // (a) preparing the Kernel state data structure
68    // (c) defining the logic of the finalBlock function.
69    //
70    // Note: the kernel state data structure must only be finalized after
71    // all scalar fields have been added.   If there are no fields to
72    // be added, the default method for preparing kernel state may be used.
73
74
75    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
76
77    // Can the module ID itself serve as the unique signature?
78    virtual bool hasSignature() const { return true; }
79
80    bool isCachable() const override { return false; }
81
82    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
83
84    llvm::Module * setModule(llvm::Module * const module);
85
86    llvm::Module * makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb);
87
88    llvm::Module * getModule() const {
89        return mModule;
90    }
91
92    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
93   
94    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
95
96    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
97
98    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
99
100    StreamPort getStreamPort(const std::string & name) const;
101
102    const Binding & getBinding(const std::string & name) const;
103
104    ProcessingRate::RateValue getLowerBound(const ProcessingRate &rate) const;
105
106    ProcessingRate::RateValue getUpperBound(const ProcessingRate & rate) const;
107
108    bool requiresCopyBack(const Binding & binding) const;
109
110    bool strideOffsetIsTriviallyCalculable(const Binding & binding) const;
111
112    bool permitsNonLinearAccess(const Binding & binding) const;
113
114    bool requiresLinearAccess(const Binding & binding) const;
115
116    bool anyBindingRequiresLinearSpace() const;
117
118    const StreamSetBuffers & getStreamSetInputBuffers() const {
119        return mStreamSetInputBuffers;
120    }
121
122    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
123        assert (i < mStreamSetInputBuffers.size());
124        assert (mStreamSetInputBuffers[i]);
125        return mStreamSetInputBuffers[i];
126    }
127
128    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
129        const auto port = getStreamPort(name);
130        assert (port.first == Port::Input);
131        return getStreamSetInputBuffer(port.second);
132    }
133
134    const StreamSetBuffers & getStreamSetOutputBuffers() const {
135        return mStreamSetOutputBuffers;
136    }
137
138    const Binding & getStreamInput(const unsigned i) const {
139        return KernelInterface::getStreamInput(i);
140    }
141
142    const Binding & getStreamInput(const std::string & name) const {
143        const auto port = getStreamPort(name);
144        assert (port.first == Port::Input);
145        return KernelInterface::getStreamInput(port.second);
146    }
147
148    const Binding & getStreamInput(const parabix::StreamSetBuffer * const buffer) const {
149        for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
150            if (mStreamSetInputBuffers[i] == buffer) {
151                return getStreamInput(i);
152            }
153        }
154        throw std::runtime_error("no output binding found given buffer");
155    }
156
157    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
158        assert (i < mStreamSetOutputBuffers.size());
159        assert (mStreamSetOutputBuffers[i]);
160        return mStreamSetOutputBuffers[i];
161    }
162
163    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
164        const auto port = getStreamPort(name);
165        assert (port.first == Port::Output);
166        return getStreamSetOutputBuffer(port.second);
167    }
168
169    const Binding & getStreamOutput(const unsigned i) const {
170        return KernelInterface::getStreamOutput(i);
171    }
172
173    const Binding & getStreamOutput(const parabix::StreamSetBuffer * const buffer) const {
174        for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
175            if (mStreamSetOutputBuffers[i] == buffer) {
176                return getStreamOutput(i);
177            }
178        }
179        throw std::runtime_error("no output binding found given buffer");
180    }
181
182    const Binding & getStreamOutput(const std::string & name) const {
183        const auto port = getStreamPort(name);
184        assert (port.first == Port::Output);
185        return KernelInterface::getStreamOutput(port.second);
186    }
187   
188    // Kernels typically perform block-at-a-time processing, but some kernels may require
189    // a different stride.   In the case of multiblock kernels, the stride attribute
190    // determines the number of minimum number of items that will be provided to the kernel
191    // on each doMultiBlock call.
192    //
193   
194    unsigned getStride() const { return mStride; }
195   
196    virtual ~Kernel() = 0;
197
198    void prepareKernel(const std::unique_ptr<KernelBuilder> & b);
199
200    void prepareCachedKernel(const std::unique_ptr<KernelBuilder> & b);
201
202    std::string getCacheName(const std::unique_ptr<KernelBuilder> & b) const;
203
204    bool mayTerminateEarly() const {
205        return hasAttribute(kernel::Attribute::KindId::CanTerminateEarly);
206    }
207
208    bool mustExplicitlyTerminate() const {
209        return hasAttribute(kernel::Attribute::KindId::MustExplicitlyTerminate);
210    }
211
212protected:
213
214    virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> &) { }
215
216    void getDoSegmentFunctionArguments(const std::vector<llvm::Value *> & availItems) const;
217
218    // Constructor
219    Kernel(std::string && kernelName, Bindings && stream_inputs,
220          Bindings && stream_outputs,
221          Bindings && scalar_parameters,
222          Bindings && scalar_outputs,
223          Bindings && internal_scalars);
224
225    unsigned getScalarIndex(const std::string & name) const;
226
227    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
228
229    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> &) { }
230   
231    virtual void generateKernelMethod(const std::unique_ptr<KernelBuilder> &) = 0;
232
233    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> &) { }
234
235    // Add an additional scalar field to the KernelState struct.
236    // Must occur before any call to addKernelDeclarations or createKernelModule.
237    unsigned addScalar(llvm::Type * type, const std::string & name);
238
239    unsigned addUnnamedScalar(llvm::Type * type);
240
241    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & b);
242
243    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b);
244
245    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & b);
246
247    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
248        unsigned index; Port port;
249        std::tie(port, index) = getStreamPort(name);
250        if (port == Port::Input) {
251            return getStreamSetInputBuffer(index);
252        } else {
253            return getStreamSetOutputBuffer(index);
254        }
255    }
256
257    void setStride(unsigned stride) { mStride = stride; }
258
259    bool treatUnsafeKernelOperationsAsErrors() const { return mTreatUnsafeKernelOperationsAsErrors; }
260
261    bool mustClearOverflowPriorToCopyback(const Binding & binding) const;
262
263private:
264
265    void addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & b);
266
267    llvm::Value * getAvailableItemCount(const unsigned i) const {
268        assert (i < mAvailableItemCount.size());
269        return mAvailableItemCount[i];
270    }
271
272    bool verifyBufferSize(const Binding & binding, const parabix::StreamSetBuffer * const buffer) const;
273
274    void verifyStreamSetDefinitions() const;
275
276protected:
277
278    llvm::Function *                    mCurrentMethod; 
279    unsigned                            mStride;
280    bool                                mTreatUnsafeKernelOperationsAsErrors;
281    llvm::Value *                       mIsFinal;
282    llvm::Value *                       mOutputScalarResult;
283    mutable bool                        mIsGenerated;
284
285    std::vector<llvm::Value *>          mAvailableItemCount;
286
287    KernelFieldMap                      mKernelFieldMap;
288    std::vector<llvm::Type *>           mKernelFields;
289
290    StreamMap                           mStreamMap;
291
292    StreamSetBuffers                    mStreamSetInputBuffers;
293    StreamSetBuffers                    mStreamSetOutputBuffers;
294};
295
296using Kernels = std::vector<Kernel *>;
297
298class SegmentOrientedKernel : public Kernel {
299protected:
300
301    SegmentOrientedKernel(std::string && kernelName,
302                          Bindings && stream_inputs,
303                          Bindings && stream_outputs,
304                          Bindings && scalar_parameters,
305                          Bindings && scalar_outputs,
306                          Bindings && internal_scalars);
307protected:
308
309    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
310
311    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) = 0;
312
313};
314
315/*
316The Multi-Block Kernel Builder
317------------------------------
318
319The Multi-Block Kernel Builder is designed to simplify the programming of
320efficient kernels with possibly variable and/or nonaligned output, subject to
321exact or MaxRatio processing constraints.   The following restrictions apply.
322
323#.  The input consists of one or more stream sets, the first of which is
324    known as the principal input stream set.
325
326#.  If there is more than one input stream set, the additional stream sets
327    are first classified as having either a derived processing rate or
328    a variable processing rate.   Stream sets with a derived processing rate
329    have a processing rate defined with respect to the input stream set of one
330    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
331    declared without a processing rate attribute have the FixedRate(1) attribute
332    by default and therefore satisfy this constraint.  All other processing rate
333    types are classified as variable rate.
334
335#.  All output stream sets must be declared with processing rate attributes
336    of one of the following types:
337    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
338    *  FixedRate with respect to some other output stream set.
339
340    When using the Multi-Block Kernel Builder to program a new type of kernel,
341    the programmer must implement the generateDoMultiBlockMethod for normal
342    multi-block processing according to the requirements below, as well as
343    providing for special final block processing, if necessary.
344
345#.  The doMultiBlockMethod will be called with the following parameters:
346    * the number of items of the principal input stream to process (itemsToDo),
347    * additional items available parameters for each additional input stream set
348      that is classified as a variable rate stream set
349    * pointers to linear contiguous buffer areas for each of the input stream sets, and
350    * pointers to linear contiguous output buffer areas for each of the output stream sets.
351
352    Notes:
353    * if the kernel has a Lookahead dependency declared on any input stream set, then
354      there will be two buffer pointers for that stream set, one for accessing stream set
355      items without lookahead and one for accessing the items with lookahead.   
356    * pointers are to the beginning of the block corresponding to the
357      processedItemCount or producedItemCount of the given stream set.
358    * the base type of each pointer is the StreamSetBlockType of that streamset
359
360#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
361    processed under the following simplifying assumptions.
362    * the number of itemsToDo will either be an exact multiple of the kernel stride,
363      or, for processing the final block, a value less than the kernel stride
364    * the input buffer of the principal stream set and all input buffers of stream sets
365      with derived processing rates will be safe to access and have data available in
366      accord with their processing rates based on the given number of itemsToDo
367      of the principal input stream set; no further bounds checking is needed. 
368    * input buffers of stream sets with MaxRatio attributes will be safe to access,
369      but will only have valid data as specified by the available items parameter for
370      that stream set.
371    * the kernel programmer is responsible for safe access and bounds checking for any
372      input stream set classified as Unknown rate.   No temporary buffers are used
373      for such stream sets.
374    * all output buffers will be safe to access and have space available
375      for the given maximum output generation rates based on the given number
376      of itemsToDo of the principal input stream set; no further bounds checking
377      is needed.
378    * for final block processing, all input buffers will be extended to be safely
379      treated as containing data corresponding to a full block of the principal
380      input stream set, with the actual data in each buffer padded with null values
381      beyond the end of input.  Similarly, all output buffers will contain space
382      sufficient for the maximum output that can be generated for a full block of
383      input processing.
384    * input and output pointers will be typed to allow convenient and logical access
385      to corresponding streams based on their declared stream set type and processing rate.
386    * for any input pointer p, a GEP instruction with a single int32 index i
387      will produce a pointer to the buffer position corresponding to the ith block of the
388      input stream set, relative to the initial block based on the processedItemCount.
389    * for any output stream set declared with a Fixed or Add1 processing rate with respect
390      to the principal input stream set, a GEP instruction with a single int32 index i
391      will produce a pointer to the buffer position corresponding to the ith block of the
392      stream set, relative to the initial block based on the producedItemCount.
393
394#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
395    processed and produced item counts are updated for all stream sets that have exact
396    processing rate attributes.   Programmers are responsible for updating the counts
397    of any stream set declared with a variable attribute (MaxRatio or Unknown).
398
399#.  An important caveat is that buffer areas may change arbitrarily between
400    calls to the doMultiBlockMethod.   In no case should a kernel store a
401    buffer pointer in its internal state.   Furthermore a kernel must not make
402    any assumptions about the accessibility of stream set data outside of the
403    processing range outside of the block boundaries associated with the given itemsToDo.
404*/
405
406class MultiBlockKernel : public Kernel {
407    friend class BlockOrientedKernel;
408protected:
409
410    MultiBlockKernel(std::string && kernelName,
411                     Bindings && stream_inputs,
412                     Bindings && stream_outputs,
413                     Bindings && scalar_parameters,
414                     Bindings && scalar_outputs,
415                     Bindings && internal_scalars);
416
417    // Each multi-block kernel subtype must provide its own logic for handling
418    // doMultiBlock calls, subject to the requirements laid out above.
419    // The generateMultiBlockLogic must be written to generate this logic, given
420    // a created but empty function.  Upon entry to generateMultiBlockLogic,
421    // the builder insertion point will be set to the entry block; upone
422    // exit the RetVoid instruction will be added to complete the method.
423    //
424    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) = 0;
425
426private:
427
428    // Given a kernel subtype with an appropriate interface, the generateDoSegment
429    // method of the multi-block kernel builder makes all the necessary arrangements
430    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
431    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
432
433    void writeMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b);
434
435    void checkInputStream(const std::unique_ptr<KernelBuilder> & b, const unsigned index);
436
437    void checkOutputStream(const std::unique_ptr<KernelBuilder> & b, const unsigned index);
438
439    llvm::Value * computePopCountRate(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, llvm::Value * const maxItems);
440
441    llvm::Value * getPopCountRateItems(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate, llvm::Value * const strideIndex);
442
443    void prepareOverflowBuffers(const std::unique_ptr<KernelBuilder> & b);
444
445    void writeCopyBackLogic(const std::unique_ptr<KernelBuilder> & b);
446
447    void calculateDerivedItemCounts(const std::unique_ptr<KernelBuilder> & b);
448
449    void updateDerivedItemCounts(const std::unique_ptr<KernelBuilder> & b);
450
451    llvm::Value * hasAnotherStride(const std::unique_ptr<KernelBuilder> & b);
452
453    void updateFinalDerivedItemCounts(const std::unique_ptr<KernelBuilder> & b);
454
455    void checkTerminationSignal(const std::unique_ptr<KernelBuilder> & b);
456
457    static bool hasDerivedItemCount(const Binding & binding);
458
459protected:
460
461    llvm::Value *                   mInitiallyFinal;
462    llvm::Value *                   mNumOfStrides;
463    llvm::Value *                   mNumOfStridesInFinalSegment;
464
465    std::vector<llvm::Value *>      mInitialAvailableItemCount;
466
467    std::vector<llvm::Value *>      mInitialProcessedItemCount;
468    std::vector<llvm::Value *>      mAccessibleInputItems;
469    std::vector<llvm::Value *>      mInputStrideLength;
470    std::vector<llvm::Value *>      mPopCountRateArray;
471
472    std::vector<llvm::Value *>      mInitialProducedItemCount;
473    std::vector<llvm::Value *>      mWritableOutputItems;
474    std::vector<llvm::Value *>      mOutputStrideLength;
475};
476
477
478class BlockOrientedKernel : public MultiBlockKernel {
479protected:
480
481    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & b);
482
483    // Each kernel builder subtype must provide its own logic for generating
484    // doBlock calls.
485    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) = 0;
486
487    // Each kernel builder subtypre must also specify the logic for processing the
488    // final block of stream data, if there is any special processing required
489    // beyond simply calling the doBlock function.   In the case that the final block
490    // processing may be trivially implemented by dispatching to the doBlock method
491    // without additional preparation, the default generateFinalBlockMethod need
492    // not be overridden.
493
494    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
495
496    BlockOrientedKernel(std::string && kernelName,
497                        Bindings && stream_inputs,
498                        Bindings && stream_outputs,
499                        Bindings && scalar_parameters,
500                        Bindings && scalar_outputs,
501                        Bindings && internal_scalars);
502
503    llvm::Value * getRemainingItems(const std::unique_ptr<KernelBuilder> & b);
504
505private:
506
507    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
508
509    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & b);
510
511    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
512
513    llvm::Value * incrementDerivedItemCounts(const std::unique_ptr<KernelBuilder> & b);
514
515private:
516
517    llvm::Function *            mDoBlockMethod;
518    llvm::BasicBlock *          mStrideLoopBody;
519    llvm::IndirectBrInst *      mStrideLoopBranch;
520    llvm::PHINode *             mStrideLoopTarget;
521    llvm::PHINode *             mStrideBlockIndex;
522};
523
524}
525
526#endif
Note: See TracBrowser for help on using the repository browser.