source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5446

Last change on this file since 5446 was 5446, checked in by nmedfort, 2 years ago

Refactoring work + correction for getRawItemPointer

File size: 16.6 KB
RevLine 
[4924]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
[5435]6#ifndef KERNEL_H
7#define KERNEL_H
[5063]8
[5425]9#include "interface.h"
[5260]10#include <boost/container/flat_map.hpp>
[5238]11#include <IR_Gen/idisa_builder.h>
[5425]12#include <toolchain/pipeline.h>
[5418]13#include <llvm/IR/Constants.h>
[5350]14
[5260]15namespace llvm { class Function; }
16namespace llvm { class IntegerType; }
17namespace llvm { class LoadInst; }
18namespace llvm { class Type; }
19namespace llvm { class Value; }
20namespace parabix { class StreamSetBuffer; }
[4924]21
[4974]22namespace kernel {
[5051]23   
[5436]24class KernelBuilder;
25
[5435]26class Kernel : public KernelInterface {
[5440]27    friend class KernelBuilder;
[5398]28protected:
29    using KernelMap = boost::container::flat_map<std::string, unsigned>;
30    enum class Port { Input, Output };
31    using StreamPort = std::pair<Port, unsigned>;
32    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
[5399]33    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
[5435]34    using Kernels = std::vector<Kernel *>;
[5408]35
[5435]36    static const std::string DO_BLOCK_SUFFIX;
37    static const std::string FINAL_BLOCK_SUFFIX;
[5439]38    static const std::string MULTI_BLOCK_SUFFIX;
[5435]39    static const std::string LOGICAL_SEGMENT_NO_SCALAR;
40    static const std::string PROCESSED_ITEM_COUNT_SUFFIX;
41    static const std::string CONSUMED_ITEM_COUNT_SUFFIX;
42    static const std::string PRODUCED_ITEM_COUNT_SUFFIX;
43    static const std::string TERMINATION_SIGNAL;
44    static const std::string BUFFER_PTR_SUFFIX;
45    static const std::string CONSUMER_SUFFIX;
46
[4924]47public:
[5051]48   
[5392]49    // Kernel Signatures and Module IDs
50    //
51    // A kernel signature uniquely identifies a kernel and its full functionality.
52    // In the event that a particular kernel instance is to be generated and compiled
53    // to produce object code, and we have a cached kernel object code instance with
54    // the same signature and targetting the same IDISA architecture, then the cached
55    // object code may safely be used to avoid recompilation.
56    //
57    // A kernel signature is a byte string of arbitrary length.
58    //
59    // Kernel developers should take responsibility for designing appropriate signature
60    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
61    // based on the semantics of the kernel. 
62    //
63    // If no other mechanism is available, the default generateKernelSignature() method
64    // uses the full LLVM IR (before optimization) of the kernel instance.
65    //
66    // A kernel Module ID is short string that is used as a name for a particular kernel
67    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel instances
68    // and so should be highly likely to uniquely identify a kernel instance.
69    //
70    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
71    // guaranteeing uniqueness.  In this case, the moduleIDisUnique() method
72    // should return true.
73    //
[5431]74       
75    bool isCachable() const override { return false; }
76
[5440]77    std::string makeSignature(const std::unique_ptr<KernelBuilder> & idb) override;
[5431]78
[5392]79    // Can the module ID itself serve as the unique signature?
[5431]80    virtual bool moduleIDisSignature() const { return false; }
81
[5391]82    // Create a module stub for the kernel, populated only with its Module ID.     
83    //
[5431]84
[5446]85    void bindPorts(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
[5418]86
[5446]87    llvm::Module * makeModule(const std::unique_ptr<KernelBuilder> & idb);
88
89    llvm::Module * setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module);
90
[5440]91    void createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, llvm::Module * const kernelModule);
[5431]92
[5418]93    llvm::Module * getModule() const {
94        return mModule;
95    }
96
[5440]97    void generateKernel(const std::unique_ptr<kernel::KernelBuilder> & idb);
[5051]98   
[5440]99    llvm::Value * createInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
[5135]100
[5440]101    void initializeInstance(const std::unique_ptr<KernelBuilder> & idb) final;
[5283]102
[5440]103    void finalizeInstance(const std::unique_ptr<kernel::KernelBuilder> & idb) final;
[5411]104
[5436]105    bool hasNoTerminateAttribute() const {
106        return mNoTerminateAttribute;
107    }
[5292]108
[5435]109    const StreamSetBuffers & getStreamSetInputBuffers() const {
110        return mStreamSetInputBuffers;
[5217]111    }
[5202]112
[5435]113    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const {
114        return mStreamSetInputBuffers[i];
[5229]115    }
[5435]116
117    const StreamSetBuffers & getStreamSetOutputBuffers() const {
118        return mStreamSetOutputBuffers;
[5217]119    }
[5246]120
[5435]121    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const {
122        return mStreamSetOutputBuffers[i];
123    }
[5402]124
[5435]125    virtual ~Kernel() = 0;
[5408]126
[5097]127protected:
[5246]128
129    // Constructor
[5435]130    Kernel(std::string && kernelName,
[5431]131                  std::vector<Binding> && stream_inputs,
132                  std::vector<Binding> && stream_outputs,
133                  std::vector<Binding> && scalar_parameters,
134                  std::vector<Binding> && scalar_outputs,
135                  std::vector<Binding> && internal_scalars);
[5246]136
[5063]137    //
[5074]138    // Kernel builder subtypes define their logic of kernel construction
139    // in terms of 3 virtual methods for
140    // (a) preparing the Kernel state data structure
141    // (b) defining the logic of the doBlock function, and
142    // (c) defining the logic of the finalBlock function.
143    //
144    // Note: the kernel state data structure must only be finalized after
145    // all scalar fields have been added.   If there are no fields to
146    // be added, the default method for preparing kernel state may be used.
[5435]147
[5283]148    void setNoTerminateAttribute(const bool noTerminate = true) {
149        mNoTerminateAttribute = noTerminate;
150    }
151
[5440]152    unsigned getScalarIndex(const std::string & name) const;
153
[5392]154    void prepareStreamSetNameMap();
[5299]155
[5440]156    void linkExternalMethods(const std::unique_ptr<kernel::KernelBuilder> &) override { }
[5425]157
[5440]158    virtual void prepareKernel(const std::unique_ptr<KernelBuilder> & idb);
[5299]159
[5440]160    virtual void generateInitializeMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) { }
[5250]161   
[5440]162    virtual void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & iBuilder) = 0;
[5292]163
[5440]164    virtual void generateFinalizeMethod(const std::unique_ptr<KernelBuilder> & iBuilder) { }
[5411]165
[5074]166    // Add an additional scalar field to the KernelState struct.
167    // Must occur before any call to addKernelDeclarations or createKernelModule.
[5260]168    unsigned addScalar(llvm::Type * type, const std::string & name);
[5227]169
[5283]170    unsigned addUnnamedScalar(llvm::Type * type);
171
[5435]172    llvm::Value * getIsFinal() const {
173        return mIsFinal;
174    }
175
[5440]176    void callGenerateInitializeMethod(const std::unique_ptr<KernelBuilder> & idb);
[5418]177
[5440]178    void callGenerateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb);
[5418]179
[5440]180    void callGenerateFinalizeMethod(const std::unique_ptr<KernelBuilder> & idb);
[5418]181
[5398]182    StreamPort getStreamPort(const std::string & name) const;
[5286]183
[5310]184    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
[5398]185        const auto port = getStreamPort(name);
186        assert (port.first == Port::Input);
187        assert (port.second < mStreamSetInputBuffers.size());
[5446]188        assert (mStreamSetInputBuffers[port.second]);
[5398]189        return mStreamSetInputBuffers[port.second];
[5310]190    }
[5286]191
[5310]192    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
[5398]193        const auto port = getStreamPort(name);
194        assert (port.first == Port::Output);
195        assert (port.second < mStreamSetOutputBuffers.size());
[5446]196        assert (mStreamSetOutputBuffers[port.second]);
[5398]197        return mStreamSetOutputBuffers[port.second];
[5310]198    }
199
[5418]200    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
201        unsigned index; Port port;
202        std::tie(port, index) = getStreamPort(name);
203        if (port == Port::Input) {
204            assert (index < mStreamSetInputBuffers.size());
[5446]205            assert (mStreamSetInputBuffers[index]);
[5418]206            return mStreamSetInputBuffers[index];
207        } else {
208            assert (index < mStreamSetOutputBuffers.size());
[5446]209            assert (mStreamSetOutputBuffers[index]);
[5418]210            return mStreamSetOutputBuffers[index];
211        }
212    }
[5292]213
[5307]214private:
215
[5440]216    llvm::Value * getAvailableItemCount(const unsigned i) const {
217        return mAvailableItemCount[i];
218    }
[5408]219
[5097]220protected:
[4959]221
[5418]222    llvm::Function *                    mCurrentMethod;
223    bool                                mNoTerminateAttribute;
224    bool                                mIsGenerated;
[5292]225
[5418]226    llvm::Value *                       mIsFinal;
227    std::vector<llvm::Value *>          mAvailableItemCount;
228    llvm::Value *                       mOutputScalarResult;
229
230    std::vector<llvm::Type *>           mKernelFields;
231    KernelMap                           mKernelMap;
232    StreamMap                           mStreamMap;
233    StreamSetBuffers                    mStreamSetInputBuffers;
234    StreamSetBuffers                    mStreamSetOutputBuffers;
235
[4924]236};
[5283]237
[5435]238class SegmentOrientedKernel : public Kernel {
[5287]239protected:
240
[5435]241    SegmentOrientedKernel(std::string && kernelName,
[5287]242                          std::vector<Binding> && stream_inputs,
243                          std::vector<Binding> && stream_outputs,
244                          std::vector<Binding> && scalar_parameters,
245                          std::vector<Binding> && scalar_outputs,
246                          std::vector<Binding> && internal_scalars);
247
248};
249
[5435]250class BlockOrientedKernel : public Kernel {
[5283]251protected:
252
[5440]253    void CreateDoBlockMethodCall(const std::unique_ptr<KernelBuilder> & idb);
[5292]254
[5283]255    // Each kernel builder subtype must provide its own logic for generating
256    // doBlock calls.
[5440]257    virtual void generateDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb) = 0;
[5283]258
259    // Each kernel builder subtypre must also specify the logic for processing the
260    // final block of stream data, if there is any special processing required
261    // beyond simply calling the doBlock function.   In the case that the final block
262    // processing may be trivially implemented by dispatching to the doBlock method
263    // without additional preparation, the default generateFinalBlockMethod need
264    // not be overridden.
265
[5440]266    virtual void generateFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
[5283]267
[5440]268    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & idb) final;
[5283]269
[5435]270    BlockOrientedKernel(std::string && kernelName,
[5283]271                        std::vector<Binding> && stream_inputs,
272                        std::vector<Binding> && stream_outputs,
273                        std::vector<Binding> && scalar_parameters,
274                        std::vector<Binding> && scalar_outputs,
275                        std::vector<Binding> && internal_scalars);
276
[5350]277private:
[5307]278
[5440]279    void writeDoBlockMethod(const std::unique_ptr<KernelBuilder> & idb);
[5307]280
[5440]281    void writeFinalBlockMethod(const std::unique_ptr<KernelBuilder> & idb, llvm::Value * remainingItems);
[5292]282
[5347]283private:
284
[5350]285    llvm::Function *        mDoBlockMethod;
286    llvm::BasicBlock *      mStrideLoopBody;
287    llvm::IndirectBrInst *  mStrideLoopBranch;
[5351]288    llvm::PHINode *         mStrideLoopTarget;
[5283]289};
290
[5440]291/*
[5439]292The Multi-Block Kernel Builder
293------------------------------
[5285]294
[5439]295The Multi-Block Kernel Builder is designed to simplify the programming of
296efficient kernels with possibly variable and/or nonaligned output, subject to
297exact or MaxRatio processing constraints.   The following restrictions apply.
[5440]298
[5439]299#.  The input consists of one or more stream sets, the first of which is
[5440]300    known as the principal input stream set.
301
[5439]302#.  If there is more than one input stream set, the additional stream sets must
303    have a processing rate defined with respect to the input stream set of one
304    of the following types:  FixedRate, Add1 or RoundUp.    Note that stream sets
305    declared without a processing rate attribute have the FixedRate(1) attribute
306    by default and therefore satisfy this constraint.
[5440]307
[5439]308#.  All output stream sets must be declared with processing rate attributes
309    of one of the following types:
310    *  FixedRate, Add1, Roundup, or MaxRatio with respect to the principal input stream set.
311    *  FixedRate with respect to some other output stream set.
[5440]312
[5439]313    When using the Multi-Block Kernel Builder to program a new type of kernel,
314    the programmer must implement the generateDoMultiBlockMethod for normal
315    multi-block processing according to the requirements below, as well as
316    providing for special final block processing, if necessary.
[5440]317
[5439]318#.  The doMultiBlockMethod will be called with the following parameters:
319    * the number of items of the principal input stream to process (itemsToDo),
320    * pointers to linear contiguous buffer areas for each of the input stream sets, and
321    * pointers to linear contiguous output buffer areas for each of the output stream sets.
322    * pointers are to the address of the first item of the first stream of the stream set.
323
324#.  The Multi-Block Kernel Builder will arrange that these input parameters may be
325    processed under the following simplifying assumptions.
326    * the number of itemsToDo will either be an exact multiple of the BlockSize,
327      or, for processing the final block, a value less than BlockSize
328    * all input buffers will be safe to access and have data available in
329      accord with their processing rates based on the given number of itemsToDo
330      of the principal input stream set; no further bounds checking is needed.
331    * all output buffers will be safe to access and have space available
332      for the given maximum output generation rates based on the given number
333      of blocksToDo of the principal input stream set; no further bounds checking
334      is needed.
335    * for final block processing, all input buffers will be extended to be safely
336      treated as containing data corresponding to a full block of the principal
337      input stream set, with the actual data in each buffer padded with null values
338      beyond the end of input.  Similarly, all output buffers will contain space
339      sufficient for the maximum output that can be generated for a full block of
340      input processing.
341    * input and output pointers will be typed to allow convenient and logical access
342      to corresponding streams based on their declared stream set type and processing rate.
343    * for any input pointer p, a GEP instruction with a single int32 index i
344      will produce a pointer to the buffer position corresponding to the ith block of the
[5440]345      principal input stream set.
[5439]346    * for any output stream set declared with a Fixed or Add1 processing rate with respect
347      to the principal input stream set, a GEP instruction with a single int32 index i
348      will produce a pointer to the buffer position corresponding to the ith block of the
349      principal input stream set.
[5440]350
[5439]351#.  Upon completion of multi-block processing, the Multi-Block Kernel Builder will arrange that
352    processed and produced item counts are updated for all stream sets that have exact
353    processing rate attributes.   Programmers are responsible for updating the producedItemCount
354    of any stream set declared with a variable attribute (MaxRatio).
[5440]355
[5439]356#.  An important caveat is that buffer areas may change arbitrarily between
357    calls to the doMultiBlockMethod.   In no case should a kernel store a
358    buffer pointer in its internal state.   Furthermore a kernel must not make
359    any assumptions about the accessibility of stream set data outside of the
360    processing range outside of the block boundaries associated with the given itemsToDo.
361*/
362
363class MultiBlockKernel : public Kernel {
364protected:
365
366    MultiBlockKernel(std::string && kernelName,
367                     std::vector<Binding> && stream_inputs,
368                     std::vector<Binding> && stream_outputs,
369                     std::vector<Binding> && scalar_parameters,
370                     std::vector<Binding> && scalar_outputs,
371                     std::vector<Binding> && internal_scalars);
372
373    // Each multi-block kernel subtype must provide its own logic for handling
[5440]374    // doMultiBlock calls, subject to the requirements laid out above.
[5439]375    // The generateMultiBlockLogic must be written to generate this logic, given
376    // a created but empty function.  Upon entry to generateMultiBlockLogic,
377    // the builder insertion point will be set to the entry block; upone
378    // exit the RetVoid instruction will be added to complete the method.
[5440]379    //
[5441]380    virtual void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & idb) = 0;
[5439]381
[5440]382private:
383
[5439]384    // Given a kernel subtype with an appropriate interface, the generateDoSegment
385    // method of the multi-block kernel builder makes all the necessary arrangements
386    // to translate doSegment calls into a minimal sequence of doMultiBlock calls.
[5446]387    void generateDoSegmentMethod(const std::unique_ptr<KernelBuilder> & kb) final;
[5440]388
[5439]389};
[5440]390
391
[4959]392}
[5063]393#endif
Note: See TracBrowser for help on using the repository browser.