source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5782

Last change on this file since 5782 was 5782, checked in by nmedfort, 17 months ago

Initial check-in of LookAhead? support; modified LineBreakKernel? to compute CR+LF using LookAhead?(1) + misc. fixes.

File size: 21.6 KB
Line 
1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#ifndef KERNEL_H
7#define KERNEL_H
8
9#include "interface.h"
10#include <boost/container/flat_map.hpp>
11
12namespace llvm { class BasicBlock; }
13namespace llvm { class Constant; }
14namespace llvm { class Function; }
15namespace llvm { class IntegerType; }
16namespace llvm { class IndirectBrInst; }
17namespace llvm { class PHINode; }
18namespace llvm { class LoadInst; }
19namespace llvm { class Type; }
20namespace llvm { class Value; }
21namespace parabix { class StreamSetBuffer; }
22
23namespace kernel {
24   
25class KernelBuilder;
26
27class Kernel : public KernelInterface {
28    friend class KernelBuilder;
29protected:
30
31    static const std::string DO_BLOCK_SUFFIX;
32    static const std::string FINAL_BLOCK_SUFFIX;
33    static const std::string MULTI_BLOCK_SUFFIX;
34    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
35    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
36    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
37    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
38    static const std::string TERMINATION_SIGNAL;
39    static const std::string BUFFER_PTR_SUFFIX;
40    static const std::string CONSUMER_SUFFIX;
41    static const std::string CYCLECOUNT_SCALAR;
42
43public:
44   
45    enum class Port { Input, Output };
46    using StreamPort = std::pair<Port, unsigned>;
47    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
48    using KernelFieldMap = boost::container::flat_map<std::string, unsigned>;
49    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
50
51    // Kernel Signatures and Module IDs
52    //
53    // A kernel signature uniquely identifies a kernel and its full functionality.
54    // In the event that a particular kernel instance is to be generated and compiled
55    // to produce object code, and we have a cached kernel object code instance with
56    // the same signature and targetting the same IDISA architecture, then the cached
57    // object code may safely be used to avoid recompilation.
58    //
59    // A kernel signature is a byte string of arbitrary length.
60    //
61    // Kernel developers should take responsibility for designing appropriate signature
62    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
63    // based on the semantics of the kernel.
64    //
65    // If no other mechanism is available, the default makeSignature() method uses the
66    // full LLVM IR (before optimization) of the kernel instance.
67    //
68    // A kernel Module ID is short string that is used as a name for a particular kernel
69    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel
70    // instances and so should be highly likely to uniquely identify a kernel instance.
71    //
72    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
73    // guaranteeing uniqueness.  In this case, hasSignature() should return false.
74    //
75
76    //
77    // Kernel builder subtypes define their logic of kernel construction
78    // in terms of 3 virtual methods for
79    // (a) preparing the Kernel state data structure
80    // (c) defining the logic of the finalBlock function.
81    //
82    // Note: the kernel state data structure must only be finalized after
83    // all scalar fields have been added.   If there are no fields to
84    // be added, the default method for preparing kernel state may be used.
85
86
87    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
88
89    // Can the module ID itself serve as the unique signature?
90    virtual bool hasSignature() const { return true; }
91
92    bool isCachable() const override { return false; }
93
94    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
95
96    llvm::Module * setModule(llvm::Module * const module);
97
98    llvm::Module * makeModule(const std::unique_ptr<kernel::KernelBuilder> & idb);
99
100    llvm::Module * getModule() const {
101        return mModule;
102    }
103
104    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
105   
106    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
107
108    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
109
110    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
111
112    bool hasNoTerminateAttribute() const {
113        return mNoTerminateAttribute;
114    }
115
116    StreamPort getStreamPort(const std::string & name) const;
117
118    const Binding & getBinding(const std::string & name) const;
119
120    ProcessingRate::RateValue getLowerBound(const ProcessingRate &rate) const;
121
122    ProcessingRate::RateValue getUpperBound(const ProcessingRate & rate) const;
123
124    const StreamSetBuffers & getStreamSetInputBuffers() const {
125        return mStreamSetInputBuffers;
126    }
127
128    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
129        assert (i < mStreamSetInputBuffers.size());
130        assert (mStreamSetInputBuffers[i]);
131        return mStreamSetInputBuffers[i];
132    }
133
134    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
135        const auto port = getStreamPort(name);
136        assert (port.first == Port::Input);
137        return getStreamSetInputBuffer(port.second);
138    }
139
140    const StreamSetBuffers & getStreamSetOutputBuffers() const {
141        return mStreamSetOutputBuffers;
142    }
143
144    const Binding & getStreamInput(const unsigned i) const {
145        return KernelInterface::getStreamInput(i);
146    }
147
148    const Binding & getStreamInput(const std::string & name) const {
149        const auto port = getStreamPort(name);
150        assert (port.first == Port::Input);
151        return KernelInterface::getStreamInput(port.second);
152    }
153
154    const Binding & getStreamInput(const parabix::StreamSetBuffer * const buffer) const {
155        for (unsigned i = 0; i < mStreamSetInputBuffers.size(); ++i) {
156            if (mStreamSetInputBuffers[i] == buffer) {
157                return getStreamInput(i);
158            }
159        }
160        throw std::runtime_error("no output binding found given buffer");
161    }
162
163    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
164        assert (i < mStreamSetOutputBuffers.size());
165        assert (mStreamSetOutputBuffers[i]);
166        return mStreamSetOutputBuffers[i];
167    }
168
169    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
170        const auto port = getStreamPort(name);
171        assert (port.first == Port::Output);
172        return getStreamSetOutputBuffer(port.second);
173    }
174
175    const Binding & getStreamOutput(const unsigned i) const {
176        return KernelInterface::getStreamOutput(i);
177    }
178
179    const Binding & getStreamOutput(const parabix::StreamSetBuffer * const buffer) const {
180        for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
181            if (mStreamSetOutputBuffers[i] == buffer) {
182                return getStreamOutput(i);
183            }
184        }
185        throw std::runtime_error("no output binding found given buffer");
186    }
187
188    const Binding & getStreamOutput(const std::string & name) const {
189        const auto port = getStreamPort(name);
190        assert (port.first == Port::Output);
191        return KernelInterface::getStreamOutput(port.second);
192    }
193   
194    // Kernels typically perform block-at-a-time processing, but some kernels may require
195    // a different stride.   In the case of multiblock kernels, the stride attribute
196    // determines the number of minimum number of items that will be provided to the kernel
197    // on each doMultiBlock call.
198    //
199   
200    unsigned getStride() const { return mStride; }
201   
202    virtual ~Kernel() = 0;
203
204    void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
205
206    void prepareCachedKernel(const std::unique_ptr<KernelBuilder> & idb);
207
208    std::string getCacheName(const std::unique_ptr<KernelBuilder> & idb) const;
209
210protected:
211
212    virtual void addInternalKernelProperties(const std::unique_ptr<KernelBuilder> & idb) { }
213
214    void getDoSegmentFunctionArguments(const std::vector<llvm::Value *> & availItems) const;
215
216    // Constructor
217    Kernel(std::string && kernelName, Bindings && stream_inputs,
218          Bindings && stream_outputs,
219          Bindings && scalar_parameters,
220          Bindings && scalar_outputs,
221          Bindings && internal_scalars);
222
223    void setNoTerminateAttribute(const bool noTerminate = true) {
224        mNoTerminateAttribute = noTerminate;
225    }
226
227    llvm::Value * getPrincipalItemCount() const {
228        return mAvailablePrincipalItemCount;
229    }
230
231    unsigned getScalarIndex(const std::string & name) const;
232
233    void prepareStreamSetNameMap();
234   
235    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
236
237    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
238   
239    virtual void generateKernelMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
240
241    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
242
243    // Add an additional scalar field to the KernelState struct.
244    // Must occur before any call to addKernelDeclarations or createKernelModule.
245    unsigned addScalar(llvm::Type * type, const std::string & name);
246
247    unsigned addUnnamedScalar(llvm::Type * type);
248
249    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
250
251    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
252
253    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
254
255    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
256        unsigned index; Port port;
257        std::tie(port, index) = getStreamPort(name);
258        if (port == Port::Input) {
259            assert (index < mStreamSetInputBuffers.size());
260            assert (mStreamSetInputBuffers[index]);
261            return mStreamSetInputBuffers[index];
262        } else {
263            assert (index < mStreamSetOutputBuffers.size());
264            assert (mStreamSetOutputBuffers[index]);
265            return mStreamSetOutputBuffers[index];
266        }
267    }
268
269    void setStride(unsigned stride) { mStride = stride; }
270
271private:
272
273    void addBaseKernelProperties(const std::unique_ptr<KernelBuilder> & idb);
274
275    llvm::Value * getStreamSetInputAddress(const std::string & name) const {
276        const Kernel::StreamPort p = getStreamPort(name);
277        assert (p.first == Port::Input);
278        return mStreamSetInputBaseAddress[p.second];
279    }
280
281    llvm::Value * getStreamSetOutputAddress(const std::string & name) const {
282        const Kernel::StreamPort p = getStreamPort(name);
283        assert (p.first == Port::Output);
284        return mStreamSetOutputBaseAddress[p.second];
285    }
286
287    llvm::Value * getAvailableItemCount(const unsigned i) const {
288        return mAvailableItemCount[i];
289    }
290
291    void normalizeStreamProcessingRates();
292
293    bool normalizeRelativeToFixedProcessingRate(const ProcessingRate & base, ProcessingRate & toUpdate);
294
295protected:
296
297    llvm::Function *                    mCurrentMethod;
298    llvm::Value *                       mAvailablePrincipalItemCount;
299    bool                                mNoTerminateAttribute;
300    bool                                mIsGenerated;
301    unsigned                            mStride;
302    llvm::Value *                       mIsFinal;
303    llvm::Value *                       mOutputScalarResult;
304    std::vector<llvm::Value *>          mAvailableItemCount;
305
306    KernelFieldMap                      mKernelFieldMap;
307    std::vector<llvm::Type *>           mKernelFields;
308
309    StreamMap                           mStreamMap;
310
311    StreamSetBuffers                    mStreamSetInputBuffers;
312    std::vector<llvm::Value *>          mStreamSetInputBaseAddress;
313    StreamSetBuffers                    mStreamSetOutputBuffers;
314    std::vector<llvm::Value *>          mStreamSetOutputBaseAddress;
315};
316
317using Kernels = std::vector<Kernel *>;
318
319class SegmentOrientedKernel : public Kernel {
320protected:
321
322    SegmentOrientedKernel(std::string && kernelName,
323                          Bindings && stream_inputs,
324                          Bindings && stream_outputs,
325                          Bindings && scalar_parameters,
326                          Bindings && scalar_outputs,
327                          Bindings && internal_scalars);
328protected:
329
330    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
331
332    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & b) = 0;
333
334};
335
336/*
337The Multi-Block Kernel Builder
338------------------------------
339
340The Multi-Block Kernel Builder is designed to simplify the programming of
341efficient kernels with possibly variable and/or nonaligned output, subject to
342exact or MaxRatio processing constraints.   The following restrictions apply.
343
344#.  The input consists of one or more stream sets, the first of which is
345    known as the principal input stream set.
346
347#.  If there is more than one input stream set, the additional stream sets
348    are first classified as having either a derived processing rate or
349    a variable processing rate.   Stream sets with a derived processing rate
350    have a processing rate defined with respect to the input stream set of one
351    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
352    declared without a processing rate attribute have the FixedRate(1) attribute
353    by default and therefore satisfy this constraint.  All other processing rate
354    types are classified as variable rate.
355
356#.  All output stream sets must be declared with processing rate attributes
357    of one of the following types:
358    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
359    *  FixedRate with respect to some other output stream set.
360
361    When using the Multi-Block Kernel Builder to program a new type of kernel,
362    the programmer must implement the generateDoMultiBlockMethod for normal
363    multi-block processing according to the requirements below, as well as
364    providing for special final block processing, if necessary.
365
366#.  The doMultiBlockMethod will be called with the following parameters:
367    * the number of items of the principal input stream to process (itemsToDo),
368    * additional items available parameters for each additional input stream set
369      that is classified as a variable rate stream set
370    * pointers to linear contiguous buffer areas for each of the input stream sets, and
371    * pointers to linear contiguous output buffer areas for each of the output stream sets.
372
373    Notes:
374    * if the kernel has a Lookahead dependency declared on any input stream set, then
375      there will be two buffer pointers for that stream set, one for accessing stream set
376      items without lookahead and one for accessing the items with lookahead.   
377    * pointers are to the beginning of the block corresponding to the
378      processedItemCount or producedItemCount of the given stream set.
379    * the base type of each pointer is the StreamSetBlockType of that streamset
380
381#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
382    processed under the following simplifying assumptions.
383    * the number of itemsToDo will either be an exact multiple of the kernel stride,
384      or, for processing the final block, a value less than the kernel stride
385    * the input buffer of the principal stream set and all input buffers of stream sets
386      with derived processing rates will be safe to access and have data available in
387      accord with their processing rates based on the given number of itemsToDo
388      of the principal input stream set; no further bounds checking is needed. 
389    * input buffers of stream sets with MaxRatio attributes will be safe to access,
390      but will only have valid data as specified by the available items parameter for
391      that stream set.
392    * the kernel programmer is responsible for safe access and bounds checking for any
393      input stream set classified as Unknown rate.   No temporary buffers are used
394      for such stream sets.
395    * all output buffers will be safe to access and have space available
396      for the given maximum output generation rates based on the given number
397      of itemsToDo of the principal input stream set; no further bounds checking
398      is needed.
399    * for final block processing, all input buffers will be extended to be safely
400      treated as containing data corresponding to a full block of the principal
401      input stream set, with the actual data in each buffer padded with null values
402      beyond the end of input.  Similarly, all output buffers will contain space
403      sufficient for the maximum output that can be generated for a full block of
404      input processing.
405    * input and output pointers will be typed to allow convenient and logical access
406      to corresponding streams based on their declared stream set type and processing rate.
407    * for any input pointer p, a GEP instruction with a single int32 index i
408      will produce a pointer to the buffer position corresponding to the ith block of the
409      input stream set, relative to the initial block based on the processedItemCount.
410    * for any output stream set declared with a Fixed or Add1 processing rate with respect
411      to the principal input stream set, a GEP instruction with a single int32 index i
412      will produce a pointer to the buffer position corresponding to the ith block of the
413      stream set, relative to the initial block based on the producedItemCount.
414
415#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
416    processed and produced item counts are updated for all stream sets that have exact
417    processing rate attributes.   Programmers are responsible for updating the counts
418    of any stream set declared with a variable attribute (MaxRatio or Unknown).
419
420#.  An important caveat is that buffer areas may change arbitrarily between
421    calls to the doMultiBlockMethod.   In no case should a kernel store a
422    buffer pointer in its internal state.   Furthermore a kernel must not make
423    any assumptions about the accessibility of stream set data outside of the
424    processing range outside of the block boundaries associated with the given itemsToDo.
425*/
426
427class MultiBlockKernel : public Kernel {
428protected:
429
430    MultiBlockKernel(std::string && kernelName,
431                     Bindings && stream_inputs,
432                     Bindings && stream_outputs,
433                     Bindings && scalar_parameters,
434                     Bindings && scalar_outputs,
435                     Bindings && internal_scalars);
436
437    // Each multi-block kernel subtype must provide its own logic for handling
438    // doMultiBlock calls, subject to the requirements laid out above.
439    // The generateMultiBlockLogic must be written to generate this logic, given
440    // a created but empty function.  Upon entry to generateMultiBlockLogic,
441    // the builder insertion point will be set to the entry block; upone
442    // exit the RetVoid instruction will be added to complete the method.
443    //
444    virtual llvm::Value * generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) = 0;
445
446private:
447
448    // Given a kernel subtype with an appropriate interface, the generateDoSegment
449    // method of the multi-block kernel builder makes all the necessary arrangements
450    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
451    void generateKernelMethod(const std::unique_ptr<KernelBuilder> & b) final;
452
453    unsigned getItemAlignment(const Binding & binding) const;
454
455    bool isTransitivelyUnknownRate(const ProcessingRate & rate) const;
456
457    llvm::Value * getStrideSize(const std::unique_ptr<KernelBuilder> & b, const ProcessingRate & rate);
458
459    bool requiresCopyBack(const ProcessingRate & rate) const;
460
461    void reviseFinalProducedItemCounts(const std::unique_ptr<KernelBuilder> & b);
462
463protected:
464
465    std::vector<llvm::Value *>      mInitialAvailableItemCount;
466    std::vector<llvm::Value *>      mInitialProcessedItemCount;
467    std::vector<llvm::Value *>      mInitialProducedItemCount;
468
469};
470
471
472class BlockOrientedKernel : public MultiBlockKernel {
473protected:
474
475    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & b);
476
477    // Each kernel builder subtype must provide its own logic for generating
478    // doBlock calls.
479    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & b) = 0;
480
481    // Each kernel builder subtypre must also specify the logic for processing the
482    // final block of stream data, if there is any special processing required
483    // beyond simply calling the doBlock function.   In the case that the final block
484    // processing may be trivially implemented by dispatching to the doBlock method
485    // without additional preparation, the default generateFinalBlockMethod need
486    // not be overridden.
487
488    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
489
490    BlockOrientedKernel(std::string && kernelName,
491                        Bindings && stream_inputs,
492                        Bindings && stream_outputs,
493                        Bindings && scalar_parameters,
494                        Bindings && scalar_outputs,
495                        Bindings && internal_scalars);
496
497private:
498
499    llvm::Value * generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
500
501    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & b);
502
503    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & b, llvm::Value * remainingItems);
504
505    llvm::Value * getRemainingItems(const std::unique_ptr<KernelBuilder> & b);
506
507private:
508
509    llvm::Function *            mDoBlockMethod;
510    llvm::BasicBlock *          mStrideLoopBody;
511    llvm::IndirectBrInst *      mStrideLoopBranch;
512    llvm::PHINode *             mStrideLoopTarget;
513    llvm::PHINode *             mStrideBlockIndex;
514};
515
516}
517
518#endif
Note: See TracBrowser for help on using the repository browser.