source: icGREP/icgrep-devel/icgrep/kernels/attributes.h @ 5864

Last change on this file since 5864 was 5856, checked in by nmedfort, 17 months ago

Revised pipeline structure to better control I/O rates

File size: 13.3 KB
Line 
1#ifndef ATTRIBUTES_H
2#define ATTRIBUTES_H
3
4#include <vector>
5
6namespace kernel {
7
8struct Attribute {
9
10    enum class KindId {
11
12        /** INPUT STREAM ATTRIBUTES **/
13
14        LookAhead, /// NOT DONE
15
16        // A LookAhead(n) attribute on an input stream set S declares that the kernel
17        // looks ahead n positions in the input stream.  That is, processing of item
18        // S[i, j] may be defined in terms of S[i, j+n].
19
20        // Guarantee required: the pipeline compiler must ensure that, when
21        // the kernel is called with an available item count of N for the
22        // input stream, that the corresponding input data buffer has
23        // valid items in the buffer up to and including position N+n,
24        // and that the blocks containing the items from N-1 through N+n are
25        // linearly contiguous.  When the final doMultiBlock call is made,
26        // the pipeline compiler must ensure that the n items at lookahead
27        // positions are zero-initialized.
28
29        // (Note: this guarantee anticipates using a single buffer pointer
30        // for both normal stream access and lookahead access.   This avoids
31        // the cost of extra parameters per doMultiBlock call, but requires
32        // that the corresponding circular buffer have a "lookahead extension area"
33        // that holds a copy of the data at the physical start of buffer).
34
35        LookBehind, /// NOT DONE
36
37        // A LookBehind(n) attribute on an input stream S declares that the kernel
38        // requires access to input items up to n positions prior to the current
39        // processed item position.
40
41        // (Notes: this may allow more efficient advances within n (without saving state).
42        // However, a lookbehind extension area prior to the normal buffer base address
43        // is necessary, which must be initially zero-filled.)
44
45        // A LookBehind(n) attribute on an output stream S declares that the kernel
46        // requires access to up to n previously generated output items.
47        // (Example: lz4d lookbehind(65536)).
48
49        Principal,
50
51        // One input stream can be declared as the principal input buffer for a kernel.
52        // If a kernel has a principal input stream, when processing the final stride,
53        // a MultiBlockKernel assumes the item count of the principle is the correct
54        // one and zero extends / truncates all other input streams to match it.
55
56        Deferred,
57
58        // Normally, the processed item count of fixed rate streams is automatically
59        // updated by the MultiBlock kernel. However, some streams behave like Fixed
60        // rate streams (in that they will always eventually process a Fixed amount of
61        // data) but the kernel processes the data in unpredictable chunks. Rather than
62        // declaring those as Unknown or Bounded rates, marking their rate calculation
63        // as Deferred provides the pipeline with a stronger guarantee when it comes to
64        // buffer size calculations.
65
66        IndependentRegionBegin, IndependentRegionEnd, /// NOT DONE
67
68        // Some kernels can divide their processing into concrete non-overlapping regions
69        // between a beginning and ending position. This is a hard guarantee that regardless
70        // of the computations between the start of the stream and the beginning of the first
71        // independent region or between the *beginning* of any two independent regions, A,
72        // B, the calculations that occur prior to the beginning of B do not affect the
73        // calculations after it --- even if A is started at an arbitrary position with a
74        // zeroed-out kernel state.
75
76        // If a kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
77        // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
78        // compute what its state will be and begin processing before K_0 is finished. This
79        // requires a the pipeline to intervene and call an optimized "output-less" instance
80        // of the kernel prior to calling B.
81
82        ConditionalRegionBegin, ConditionalRegionEnd, /// NOT DONE
83
84        // Some kernels have clearly demarcated regions in which a MultiBlock kernel will
85        // produce useful outputs for only the inputs within those regions. This attribute
86        // instructs the kernel to "zero-fill" the output of any non-selected regions,
87        // skipping strides entirely whenever possible.
88
89        // If the same regions are also independent, we can avoid the overhead of "masking
90        // out" the input streams. Otherwise a MultiBlock will use temporary buffers for all
91        // uses of the streams and zero out any non-regions from the data.
92
93        /** OUTPUT STREAM ATTRIBUTES **/
94
95        Add,
96
97        // An Add(K) attribute states that K items will be added to this stream after
98        // processing the final block.
99
100        RoundUpTo,
101
102        // A RoundUpTo(k) attribute indicates the final item count of this stream will
103        // be rounded up to the nearest multiple of k
104
105        /** INPUT/OUTPUT STREAM ATTRIBUTES **/
106
107        Misaligned,
108
109        // Assume that we cannot statically compute the alignment of this stream set and
110        // perform any operations accordingly
111
112        BlockSize, /// NOT DONE
113
114        // A BlockSize(K) attribute, where K=2^k for some value of k>=4 declares
115        // that the layout of stream data items within the corresponding input
116        // or output buffer is arranged in blocks of K items each.   In each
117        // block, the data buffer contains K items of the first stream in the
118        // set, followed by K items of the next stream in the set and so on,
119        // up to and including K items of the last stream in the set.
120
121        // (Note: this replaces the concept of swizzling and anticipates that
122        // the pipeline will take on the role of automatically inserting the
123        // swizzling code necessary).
124
125        ReverseRegionBegin, ReverseRegionEnd, /// NOT DONE
126
127        // Conceptually, reversing a stream S is simple: {S_1,...,S_n} -> {S_n,...,S_1}.
128        // However, this means all of the input data must be computed and stored prior to
129        // executing this kernel. In practice, this is unnecessary as in the context of
130        // text parsing, we're almost always searching for the true starting position of
131        // something ambigious after we've found its end position in some prior kernel.
132
133
134
135
136
137//        Here is a revised definition of SegmentedReverse:
138
139//        Given a stream of data bits S that is considered to be divided into
140//        segments, and a marker stream S having a one bit at the final position
141//        of each segment, the R = SegmentedReverse(S, M) when
142
143//        R_{i} = S_{l + (h - i)}
144//              where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
145//          and where h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
146//          (l and h are the low and high positions of the segment containing i)
147
148//        This is an invertible operation, so we can apply R to a kernel's input
149//        and then to its output to get a SegmentedReverse version of a kernel
150
151//        A kernel which computes segmented reverse is feasible, but seems complex
152//        to implement, and probably too slow.  I have played around with several
153//        ways of tackling it, no good method yet.
154
155//        If there are multiple segments within a block, we could instead use
156//        the following:
157
158//        BlockSegmentedReverse
159
160//        B_{i} = S_{L + (H - i)}
161//             where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
162//                   h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
163//                   L = l if l div BlockSize < h divBlockSize, otherwise (i div BlockSize) * BlockSize
164//                   H = h if l div BlockSize < h divBlockSize, otherwise L + BlockSize - 1
165
166//        An alternative way of looking at this is to eliminate all but the first
167//        and last marker positions within a block.
168
169//        The main point is that, if we apply B to inputs, perform the kernel
170//        and the apply B to outputs, we get the same result if we applied R
171//        (assuming that the kernel computations do not cross boundaries in M).
172
173//        This will be more efficient to compute, but still involves overhead
174//        for shifting and combining streams.
175
176//        I think it may be better to focus on the ReverseKernel adapter, that
177//        handles the reverse operations for both input and output.   This actually
178//        gives more flexibility, because, in a multiblock scenario, we can process
179//        the longest sequence of blocks such that both the beginning and end blocks
180//        have a one bit.   If there are any interior blocks with one bits, then
181//        they will be handled automatically without special shifting and masking.
182
183//        By the way, in my designs, I am wanting to have a callable Multiblock
184//        function, so that the Multiblock function for a Reversed Kernel just
185//        does a little work before calling the Multiblock function of the base kernel.
186//        That seems to have disappeared in the current system.
187
188
189        /** KERNEL ATTRIBUTES **/
190
191        SelectMinimumInputLength, /// NOT DONE
192
193        // If a kernel has multiple input streams and their final item count differs,
194        // a MultiBlock kernel will select the *minimum* input item count as it's
195        // principle item length and truncate the streams to fit
196
197        // NOTE: this is the default if a kernel does not have SelectMaximumInputLength
198        // set and no PrincipalInputStream was declared.
199
200        SelectMaximumInputLength, /// NOT DONE
201
202        // If a kernel has multiple input streams and their final item count differs,
203        // a MultiBlock kernel will select the *maximum* input item count as it's
204        // principle item length and zero-extend the streams accordingly.
205
206        CanTerminateEarly,
207
208        // Indicates that this kernel can call setTerminationSignal() to terminate the
209        // kernel prior to processing all of its input streams.
210
211    };
212
213    bool isAdd() const {
214        return mKind == KindId::Add;
215    }
216
217    bool isPrincipal() const {
218        return mKind == KindId::Principal;
219    }
220
221    bool isRoundUpTo() const {
222        return mKind == KindId::RoundUpTo;
223    }
224
225    bool isBlockSize() const {
226        return mKind == KindId::BlockSize;
227    }
228
229    unsigned amount() const {
230        return mAmount;
231    }
232
233    void setAmount(const unsigned amount) {
234        mAmount = amount;
235    }
236
237    bool operator == (const Attribute & other) const {
238        return mKind == other.mKind && mAmount == other.mAmount;
239    }
240
241    bool operator != (const Attribute & other) const {
242        return !(*this == other);
243    }
244
245protected:
246
247    KindId getKind() const {
248        return mKind;
249    }
250
251    friend struct AttributeSet;
252    friend struct Binding;
253    friend Attribute Add1();
254    friend Attribute Principal();
255    friend Attribute RoundUpTo(const unsigned);
256    friend Attribute LookAhead(const unsigned);
257    friend Attribute LookBehind(const unsigned);
258    friend Attribute Deferred();
259    friend Attribute Misaligned();
260    friend Attribute ConditionalRegionBegin();
261    friend Attribute ConditionalRegionEnd();
262    friend Attribute CanTerminateEarly();
263
264    Attribute(const KindId kind, const unsigned k) : mKind(kind), mAmount(k) { }
265
266private:
267
268    const KindId    mKind;
269    unsigned        mAmount;
270};
271
272struct AttributeSet : public std::vector<Attribute> {
273
274    using AttributeId = Attribute::KindId;
275
276    const AttributeSet & getAttributes() const {
277        return *this;
278    }
279
280    Attribute & findOrAddAttribute(const AttributeId id) {
281        if (Attribute * const attr = __findAttribute(id)) {
282            return *attr;
283        } else {
284            return addAttribute(Attribute(id, 0));
285        }
286    }
287
288    Attribute & findAttribute(const AttributeId id) const {
289        return *__findAttribute(id);
290    }
291
292    Attribute & addAttribute(Attribute attribute);
293
294    bool hasAttributes() const {
295        return !empty();
296    }
297
298    bool hasAttribute(const AttributeId id) const {
299        return __findAttribute(id) != nullptr;
300    }
301
302    AttributeSet() = default;
303
304    AttributeSet(Attribute && attr) { emplace_back(std::move(attr)); }
305
306    AttributeSet(std::initializer_list<Attribute> attrs) : std::vector<Attribute>(attrs) { }
307
308private:
309
310    Attribute * __findAttribute(const AttributeId id) const;
311
312};
313
314
315inline Attribute Add1() {
316    return Attribute(Attribute::KindId::Add, 1);
317}
318
319inline Attribute RoundUpTo(const unsigned k) {
320    return Attribute(Attribute::KindId::RoundUpTo, k);
321}
322
323inline Attribute Principal() {
324    return Attribute(Attribute::KindId::Principal, 0);
325}
326
327inline Attribute LookAhead(const unsigned k) {
328    return Attribute(Attribute::KindId::LookAhead, k);
329}
330
331inline Attribute LookBehind(const unsigned k) {
332    return Attribute(Attribute::KindId::LookBehind, k);
333}
334
335inline Attribute Deferred() {
336    return Attribute(Attribute::KindId::Deferred, 0);
337}
338
339inline Attribute Misaligned() {
340    return Attribute(Attribute::KindId::Misaligned, 0);
341}
342
343inline Attribute ConditionalRegionBegin() {
344    return Attribute(Attribute::KindId::ConditionalRegionBegin, 0);
345}
346
347inline Attribute ConditionalRegionEnd() {
348    return Attribute(Attribute::KindId::ConditionalRegionEnd, 0);
349}
350
351inline Attribute CanTerminateEarly() {
352    return Attribute(Attribute::KindId::CanTerminateEarly, 0);
353}
354
355}
356
357
358#endif // ATTRIBUTES_H
Note: See TracBrowser for help on using the repository browser.