source: icGREP/icgrep-devel/icgrep/kernels/kernel.h @ 5418

Last change on this file since 5418 was 5418, checked in by nmedfort, 2 years ago

Removed non-functional CUDA code from icgrep and consolidated grep and multigrep mode into a single function; allowed segment parallel pipeline to utilize process as its initial thread; modified MMapSourceKernel to map and perform mmap directly and advise the OS to drop consumed data streams.

File size: 15.6 KB
RevLine 
[4924]1/*
2 *  Copyright (c) 2016 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
[5063]6#ifndef KERNEL_BUILDER_H
7#define KERNEL_BUILDER_H
8
[5260]9#include "interface.h"      // for KernelInterface
10#include <boost/container/flat_map.hpp>
[5238]11#include <IR_Gen/idisa_builder.h>
[5408]12#include <kernels/pipeline.h>
[5418]13#include <llvm/IR/Constants.h>
[5350]14
[5418]15//#include <string>           // for string
16//#include <memory>           // for unique_ptr
17
[5260]18namespace llvm { class Function; }
19namespace llvm { class IntegerType; }
20namespace llvm { class LoadInst; }
21namespace llvm { class Type; }
22namespace llvm { class Value; }
23namespace parabix { class StreamSetBuffer; }
[4924]24
[4974]25namespace kernel {
[5051]26   
[5063]27class KernelBuilder : public KernelInterface {
[5398]28protected:
29    using KernelMap = boost::container::flat_map<std::string, unsigned>;
30    enum class Port { Input, Output };
31    using StreamPort = std::pair<Port, unsigned>;
32    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
[5399]33    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
[5418]34    using Kernels = std::vector<KernelBuilder *>;
[5408]35
[5418]36    friend void ::generateSegmentParallelPipeline(IDISA::IDISA_Builder *, const Kernels &);
37    friend void ::generatePipelineLoop(IDISA::IDISA_Builder *, const Kernels &);
38    friend void ::generateParallelPipeline(IDISA::IDISA_Builder *, const Kernels &);
[4924]39public:
[5051]40   
[5392]41    // Kernel Signatures and Module IDs
42    //
43    // A kernel signature uniquely identifies a kernel and its full functionality.
44    // In the event that a particular kernel instance is to be generated and compiled
45    // to produce object code, and we have a cached kernel object code instance with
46    // the same signature and targetting the same IDISA architecture, then the cached
47    // object code may safely be used to avoid recompilation.
48    //
49    // A kernel signature is a byte string of arbitrary length.
50    //
51    // Kernel developers should take responsibility for designing appropriate signature
52    // mechanisms that are short, inexpensive to compute and guarantee uniqueness
53    // based on the semantics of the kernel. 
54    //
55    // If no other mechanism is available, the default generateKernelSignature() method
56    // uses the full LLVM IR (before optimization) of the kernel instance.
57    //
58    // A kernel Module ID is short string that is used as a name for a particular kernel
59    // instance.  Kernel Module IDs are used to look up and retrieve cached kernel instances
60    // and so should be highly likely to uniquely identify a kernel instance.
61    //
62    // The ideal case is that a kernel Module ID serves as a full kernel signature thus
63    // guaranteeing uniqueness.  In this case, the moduleIDisUnique() method
64    // should return true.
65    //
66   
67    // Can the module ID itself serve as the unique signature?
[5401]68    virtual bool moduleIDisSignature() { return false; }
[5392]69   
[5401]70    virtual std::string generateKernelSignature(std::string moduleId);
[5392]71   
[5391]72    // Create a module stub for the kernel, populated only with its Module ID.     
73    //
[5418]74    void createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
75
76    llvm::Module * getModule() const {
77        return mModule;
78    }
79
[5074]80    // Generate the Kernel to the current module (iBuilder->getModule()).
[5391]81    void generateKernel();
[5051]82   
[5408]83    llvm::Value * createInstance() final;
[5135]84
[5408]85    void initializeInstance() final;
[5283]86
[5418]87    void finalizeInstance() final;
[5411]88
[5408]89    llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const final;
[5283]90
[5408]91    void setProducedItemCount(const std::string & name, llvm::Value * value) const final;
[5292]92
[5408]93    llvm::Value * getProcessedItemCount(const std::string & name) const final;
[5390]94
[5408]95    void setProcessedItemCount(const std::string & name, llvm::Value * value) const final;
96
[5418]97    llvm::Value * getConsumedItemCount(const std::string & name) const final;
98
99    void setConsumedItemCount(const std::string & name, llvm::Value * value) const final;
100
[5408]101    bool hasNoTerminateAttribute() const {
102        return mNoTerminateAttribute;
103    }
[5252]104   
[5408]105    llvm::Value * getTerminationSignal() const final;
[5292]106
[5408]107    void setTerminationSignal() const final;
[5292]108
[5408]109    // Get the value of a scalar field for the current instance.
110    llvm::Value * getScalarFieldPtr(llvm::Value * index) const {
111        return getScalarFieldPtr(getInstance(), index);
112    }
[5292]113
[5408]114    llvm::Value * getScalarFieldPtr(const std::string & fieldName) const {
115        return getScalarFieldPtr(getInstance(), fieldName);
116    }
[5292]117
[5408]118    llvm::Value * getScalarField(const std::string & fieldName) const {
119        return iBuilder->CreateLoad(getScalarFieldPtr(fieldName));
120    }
[5292]121
[5408]122    llvm::Value * getScalarField(llvm::Value * index) const {
123        return iBuilder->CreateLoad(getScalarFieldPtr(index));
124    }
[5292]125
[5408]126    // Set the value of a scalar field for the current instance.
127    void setScalarField(const std::string & fieldName, llvm::Value * value) const {
128        iBuilder->CreateStore(value, getScalarFieldPtr(fieldName));
129    }
130
131    void setScalarField(llvm::Value * index, llvm::Value * value) const {
132        iBuilder->CreateStore(value, getScalarFieldPtr(index));
133    }
134
[5292]135    // Synchronization actions for executing a kernel for a particular logical segment.
136    //
137    // Before the segment is processed, acquireLogicalSegmentNo must be used to load
138    // the segment number of the kernel state to ensure that the previous segment is
139    // complete (by checking that the acquired segment number is equal to the desired segment
140    // number).
141    // After all segment processing actions for the kernel are complete, and any necessary
142    // data has been extracted from the kernel for further pipeline processing, the
143    // segment number must be incremented and stored using releaseLogicalSegmentNo.
[5408]144    llvm::LoadInst * acquireLogicalSegmentNo() const;
[5292]145
[5408]146    void releaseLogicalSegmentNo(llvm::Value * nextSegNo) const;
[5292]147
[5408]148    llvm::Value * getConsumerState(const std::string & name) const;
149
[5292]150    // Get a parameter by name.
151    llvm::Argument * getParameter(llvm::Function * f, const std::string & name) const;
152
[5260]153    inline llvm::IntegerType * getSizeTy() const {
[5217]154        return getBuilder()->getSizeTy();
155    }
[5202]156
[5260]157    inline llvm::Type * getStreamTy(const unsigned FieldWidth = 1) {
[5229]158        return getBuilder()->getStreamTy(FieldWidth);
159    }
160   
[5260]161    inline llvm::Type * getStreamSetTy(const unsigned NumElements = 1, const unsigned FieldWidth = 1) {
[5217]162        return getBuilder()->getStreamSetTy(NumElements, FieldWidth);
163    }
[5408]164       
165    const StreamSetBuffers & getStreamSetInputBuffers() const { return mStreamSetInputBuffers; }
[5246]166
[5402]167    const parabix::StreamSetBuffer * getStreamSetInputBuffer(const unsigned i) const { return mStreamSetInputBuffers[i]; }
168
[5408]169    const StreamSetBuffers & getStreamSetOutputBuffers() const { return mStreamSetOutputBuffers; }
[5251]170
[5402]171    const parabix::StreamSetBuffer * getStreamSetOutputBuffer(const unsigned i) const { return mStreamSetOutputBuffers[i]; }
172
[5403]173    llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
[5287]174
[5418]175    llvm::Value * getAccumulator(const std::string & accumName) const;
[5287]176
[5408]177    virtual ~KernelBuilder() = 0;
178
[5097]179protected:
[5246]180
181    // Constructor
182    KernelBuilder(IDISA::IDISA_Builder * builder,
[5267]183                    std::string && kernelName,
[5283]184                    std::vector<Binding> && stream_inputs,
185                    std::vector<Binding> && stream_outputs,
186                    std::vector<Binding> && scalar_parameters,
187                    std::vector<Binding> && scalar_outputs,
188                    std::vector<Binding> && internal_scalars);
[5246]189
[5063]190    //
[5074]191    // Kernel builder subtypes define their logic of kernel construction
192    // in terms of 3 virtual methods for
193    // (a) preparing the Kernel state data structure
194    // (b) defining the logic of the doBlock function, and
195    // (c) defining the logic of the finalBlock function.
196    //
197    // Note: the kernel state data structure must only be finalized after
198    // all scalar fields have been added.   If there are no fields to
199    // be added, the default method for preparing kernel state may be used.
[5051]200   
[5283]201    void setNoTerminateAttribute(const bool noTerminate = true) {
202        mNoTerminateAttribute = noTerminate;
203    }
204
[5392]205    void prepareStreamSetNameMap();
[5299]206
[5076]207    virtual void prepareKernel();
[5299]208
[5418]209    virtual void generateInitializeMethod() { }
[5250]210   
[5418]211    virtual void generateDoSegmentMethod() = 0;
[5292]212
[5418]213    virtual void generateFinalizeMethod() { }
[5411]214
[5074]215    // Add an additional scalar field to the KernelState struct.
216    // Must occur before any call to addKernelDeclarations or createKernelModule.
[5260]217    unsigned addScalar(llvm::Type * type, const std::string & name);
[5227]218
[5283]219    unsigned addUnnamedScalar(llvm::Type * type);
220
[5063]221    // Run-time access of Kernel State and parameters of methods for
222    // use in implementing kernels.
223   
224    // Get the index of a named scalar field within the kernel state struct.
[5260]225    llvm::ConstantInt * getScalarIndex(const std::string & name) const;
[5292]226
[5317]227    llvm::Value * getInputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) const;
[5329]228
[5317]229    llvm::Value * loadInputStreamBlock(const std::string & name, llvm::Value * streamIndex) const;
230   
231    llvm::Value * getInputStreamPackPtr(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
232   
233    llvm::Value * loadInputStreamPack(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
234   
[5329]235    llvm::Value * getInputStreamSetCount(const std::string & name) const;
236
[5317]237    llvm::Value * getOutputStreamBlockPtr(const std::string & name, llvm::Value * streamIndex) const;
238   
239    void storeOutputStreamBlock(const std::string & name, llvm::Value * streamIndex, llvm::Value * toStore) const;
240   
241    llvm::Value * getOutputStreamPackPtr(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex) const;
242   
243    void storeOutputStreamPack(const std::string & name, llvm::Value * streamIndex, llvm::Value * packIndex, llvm::Value * toStore) const;
[5329]244
245    llvm::Value * getOutputStreamSetCount(const std::string & name) const;
246
[5317]247    llvm::Value * getAdjustedInputStreamBlockPtr(llvm::Value * blockAdjustment, const std::string & name, llvm::Value * streamIndex) const;
[5246]248
[5310]249    llvm::Value * getRawInputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
[5260]250
[5310]251    llvm::Value * getRawOutputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
252
[5418]253    llvm::Value * getBaseAddress(const std::string & name) const;
254
[5398]255    void setBaseAddress(const std::string & name, llvm::Value * addr) const;
[5377]256
[5398]257    llvm::Value * getBufferedSize(const std::string & name) const;
258
259    void setBufferedSize(const std::string & name, llvm::Value * size) const;
260
261    void reserveBytes(const std::string & name, llvm::Value * requested) const;
262
[5402]263    llvm::Value * getAvailableItemCount(const std::string & name) const;
264
[5418]265    llvm::Value * getIsFinal() const {
266        return mIsFinal;
267    }
268
269
[5292]270    llvm::BasicBlock * CreateBasicBlock(std::string && name) const;
271
[5297]272    // Stream set helpers.
273
274    llvm::Value * getStreamSetBufferPtr(const std::string & name) const;
275
[5408]276    llvm::Value * getScalarFieldPtr(llvm::Value * const instance, llvm::Value * index) const {
277        assert ("instance cannot be null!" && instance);
278        return iBuilder->CreateGEP(getInstance(), {iBuilder->getInt32(0), index});
279    }
[5297]280
[5408]281    llvm::Value * getScalarFieldPtr(llvm::Value * const instance, const std::string & fieldName) const {
282        return getScalarFieldPtr(instance, getScalarIndex(fieldName));
283    }
[5297]284
[5418]285    void callGenerateInitializeMethod();
286
287    void callGenerateDoSegmentMethod();
288
289    void callGenerateFinalizeMethod();
290
[5398]291    StreamPort getStreamPort(const std::string & name) const;
[5286]292
[5310]293    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
[5398]294        const auto port = getStreamPort(name);
295        assert (port.first == Port::Input);
296        assert (port.second < mStreamSetInputBuffers.size());
297        return mStreamSetInputBuffers[port.second];
[5310]298    }
[5286]299
[5310]300    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
[5398]301        const auto port = getStreamPort(name);
302        assert (port.first == Port::Output);
303        assert (port.second < mStreamSetOutputBuffers.size());
304        return mStreamSetOutputBuffers[port.second];
[5310]305    }
306
[5418]307    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
308        unsigned index; Port port;
309        std::tie(port, index) = getStreamPort(name);
310        if (port == Port::Input) {
311            assert (index < mStreamSetInputBuffers.size());
312            return mStreamSetInputBuffers[index];
313        } else {
314            assert (index < mStreamSetOutputBuffers.size());
315            return mStreamSetOutputBuffers[index];
316        }
317    }
[5292]318
[5307]319private:
320
[5408]321    void setConsumerState(const std::string & name, llvm::Value * value) const;
322
[5307]323    llvm::Value * computeBlockIndex(const std::vector<Binding> & binding, const std::string & name, llvm::Value * itemCount) const;
324
[5097]325protected:
[4959]326
[5418]327    llvm::Module *                      mModule;
328    llvm::Function *                    mCurrentMethod;
329    bool                                mNoTerminateAttribute;
330    bool                                mIsGenerated;
[5292]331
[5418]332    llvm::Value *                       mIsFinal;
333    std::vector<llvm::Value *>          mAvailableItemCount;
334    llvm::Value *                       mOutputScalarResult;
335
336
337    std::vector<llvm::Type *>           mKernelFields;
338    KernelMap                           mKernelMap;
339    StreamMap                           mStreamMap;
340    StreamSetBuffers                    mStreamSetInputBuffers;
341    StreamSetBuffers                    mStreamSetOutputBuffers;
342
[4924]343};
[5283]344
[5287]345class SegmentOrientedKernel : public KernelBuilder {
346protected:
347
348    SegmentOrientedKernel(IDISA::IDISA_Builder * builder,
349                          std::string && kernelName,
350                          std::vector<Binding> && stream_inputs,
351                          std::vector<Binding> && stream_outputs,
352                          std::vector<Binding> && scalar_parameters,
353                          std::vector<Binding> && scalar_outputs,
354                          std::vector<Binding> && internal_scalars);
355
356};
357
[5283]358class BlockOrientedKernel : public KernelBuilder {
359protected:
360
[5347]361    void CreateDoBlockMethodCall();
[5292]362
[5283]363    // Each kernel builder subtype must provide its own logic for generating
364    // doBlock calls.
[5297]365    virtual void generateDoBlockMethod() = 0;
[5283]366
367    // Each kernel builder subtypre must also specify the logic for processing the
368    // final block of stream data, if there is any special processing required
369    // beyond simply calling the doBlock function.   In the case that the final block
370    // processing may be trivially implemented by dispatching to the doBlock method
371    // without additional preparation, the default generateFinalBlockMethod need
372    // not be overridden.
373
[5347]374    virtual void generateFinalBlockMethod(llvm::Value * remainingItems);
[5283]375
[5418]376    void generateDoSegmentMethod() override final;
[5283]377
378    BlockOrientedKernel(IDISA::IDISA_Builder * builder,
379                        std::string && kernelName,
380                        std::vector<Binding> && stream_inputs,
381                        std::vector<Binding> && stream_outputs,
382                        std::vector<Binding> && scalar_parameters,
383                        std::vector<Binding> && scalar_outputs,
384                        std::vector<Binding> && internal_scalars);
385
[5350]386private:
[5307]387
[5350]388    bool useIndirectBr() const {
389        return iBuilder->supportsIndirectBr();
[5347]390    }
[5307]391
[5347]392    void writeDoBlockMethod();
[5292]393
[5350]394    void writeFinalBlockMethod(llvm::Value * remainingItems);
[5347]395
396private:
397
[5350]398    llvm::Function *        mDoBlockMethod;
399    llvm::BasicBlock *      mStrideLoopBody;
400    llvm::IndirectBrInst *  mStrideLoopBranch;
[5351]401    llvm::PHINode *         mStrideLoopTarget;
[5283]402};
403
[5285]404
[4959]405}
[5063]406#endif
Note: See TracBrowser for help on using the repository browser.