source: icGREP/icgrep-devel/icgrep/kernels/pdep_kernel.h @ 6161

Last change on this file since 6161 was 6088, checked in by cameron, 16 months ago

Allow a name suffix for FieldDepositKernel? to distinguish multiple kernel instances

File size: 4.5 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5#ifndef PDEP_KERNEL_H
6#define PDEP_KERNEL_H
7
8#include "kernel.h"
9#include <llvm/IR/Value.h>
10#include <string>
11#include <toolchain/driver.h>
12
13/*
14
15Conceptually, given an unbounded input stream set of k streams and a marker stream, this kernel uses the
16Parallel Bits Deposit (PDEP) instruction to copy the input items from the i-th input stream to the i-th
17output stream the positions indicated by the marker bits. All other output items are set to zero. E.g.,
18
19 SOURCE >  abcdefgh i0000000 00000000 00000000
20 MARKER >  ...1.1.1 .....11. ..1...1. ...1.1..
21 OUTPUT >  ...a.b.c .....de. ..f...g. ...h.i..
22
23The complicating factor of this Kernel is that it assumes the input streams are *swizzled*. I.e., it
24"divides" each block of the marker stream into k elements, M_1 ... M_k, and applies the PDEP operation
25using M_i to the each of the k elements in the i-th input (swizzled) stream.
26
27            CONCEPTUAL VIEW OF INPUT STREAM SET                    ACTUAL LAYOUT OF INPUT STREAM SET
28
29 STREAM 0  abcde...  fg......  hijklm..  nopqrst.     SWIZZLE 0  abcde...  uvwxy...  OPQRS...  89abc...
30 STREAM 1  uvwxy...  zA......  BCDEFG..  HIJKLMN.     SWIZZLE 1  fg......  zA......  TU......  de......
31 STREAM 2  OPQRS...  TU......  VWXYZ0..  1234567.     SWIZZLE 2  hijklm..  BCDEFG..  VWXYZ0..  fghijk..
32 STREAM 3  89abc...  de......  fghijk..  lmnopqr.     SWIZZLE 3  nopqrst.  HIJKLMN.  1234567.  lmnopqr.
33
34
35NOTE: this kernel does *NOT* unswizzle the output. This will eventually be the responsibility of the
36pipeline to ensure it is done when needed.
37
38*/
39
40namespace kernel {
41
42class PDEPkernel final : public MultiBlockKernel {
43public:
44    PDEPkernel(const std::unique_ptr<kernel::KernelBuilder> & b, const unsigned swizzleFactor = 4, std::string name = "PDEP");
45    bool isCachable() const override { return true; }
46    bool hasSignature() const override { return false; }
47private:
48    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & b, llvm::Value * const numOfStrides) final;
49private:
50    const unsigned mSwizzleFactor;
51};   
52
53class StreamExpandKernel final : public MultiBlockKernel {
54public:
55    StreamExpandKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned sourceStreamCount, unsigned selectedStreamBase, unsigned selectedStreamCount);
56    bool isCachable() const override { return true; }
57    bool hasSignature() const override { return false; }
58protected:
59    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, llvm::Value * const numOfBlocks) override;
60private:
61    const unsigned mFieldWidth;
62    const unsigned mSelectedStreamBase;
63    const unsigned mSelectedStreamCount;
64};
65
66class FieldDepositKernel final : public MultiBlockKernel {
67public:
68    FieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned streamCount, std::string suffix);
69    bool isCachable() const override { return true; }
70    bool hasSignature() const override { return false; }
71protected:
72    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, llvm::Value * const numOfStrides) override;
73private:
74    const unsigned mFieldWidth;
75    const unsigned mStreamCount;
76};
77
78class PDEPFieldDepositKernel final : public MultiBlockKernel {
79public:
80    PDEPFieldDepositKernel(const std::unique_ptr<kernel::KernelBuilder> & b, unsigned fw, unsigned streamCount, std::string suffix);
81    bool isCachable() const override { return true; }
82    bool hasSignature() const override { return false; }
83protected:
84    void generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> & kb, llvm::Value * const numOfStrides) override;
85private:
86    const unsigned mPDEPWidth;
87    const unsigned mStreamCount;
88};
89
90class StreamDepositCompiler {
91public:
92    StreamDepositCompiler(Driver & driver, unsigned sourceStreamCount, unsigned selectedStreamBase, unsigned selectedStreamCount, unsigned bufferBlocks = 0) :
93        mDriver(driver),
94        mSourceStreamCount(sourceStreamCount),
95        mSelectedStreamBase(selectedStreamBase),
96        mSelectedStreamCount(selectedStreamCount),
97        mBufferBlocks(bufferBlocks), mFieldWidth(64) {}
98    void setDepositFieldWidth(unsigned fw) {mFieldWidth = fw;}
99    void makeCall(parabix::StreamSetBuffer * mask, parabix::StreamSetBuffer * inputs, parabix::StreamSetBuffer * outputs);
100private:
101    Driver & mDriver;
102    const unsigned mSourceStreamCount;
103    const unsigned mSelectedStreamBase;
104    const unsigned mSelectedStreamCount;
105    unsigned mBufferBlocks;
106    unsigned mFieldWidth;
107};
108
109}
110
111#endif
Note: See TracBrowser for help on using the repository browser.