source: icGREP/icgrep-devel/icgrep/kernels/attributes.h @ 5883

Last change on this file since 5883 was 5883, checked in by nmedfort, 14 months ago

MustExplicitlyTerminate? attribute for Xiangyu

File size: 14.2 KB
Line 
1#ifndef ATTRIBUTES_H
2#define ATTRIBUTES_H
3
4#include <vector>
5
6namespace kernel {
7
8struct Attribute {
9
10    enum class KindId {
11
12        /** INPUT STREAM ATTRIBUTES **/
13
14        LookAhead, /// NOT DONE
15
16        // A LookAhead(n) attribute on an input stream set S declares that the kernel
17        // looks ahead n positions in the input stream.  That is, processing of item
18        // S[i, j] may be defined in terms of S[i, j+n].
19
20        // Guarantee required: the pipeline compiler must ensure that, when
21        // the kernel is called with an available item count of N for the
22        // input stream, that the corresponding input data buffer has
23        // valid items in the buffer up to and including position N+n,
24        // and that the blocks containing the items from N-1 through N+n are
25        // linearly contiguous.  When the final doMultiBlock call is made,
26        // the pipeline compiler must ensure that the n items at lookahead
27        // positions are zero-initialized.
28
29        // (Note: this guarantee anticipates using a single buffer pointer
30        // for both normal stream access and lookahead access.   This avoids
31        // the cost of extra parameters per doMultiBlock call, but requires
32        // that the corresponding circular buffer have a "lookahead extension area"
33        // that holds a copy of the data at the physical start of buffer).
34
35        LookBehind, /// NOT DONE
36
37        // A LookBehind(n) attribute on an input stream S declares that the kernel
38        // requires access to input items up to n positions prior to the current
39        // processed item position.
40
41        // (Notes: this may allow more efficient advances within n (without saving state).
42        // However, a lookbehind extension area prior to the normal buffer base address
43        // is necessary, which must be initially zero-filled.)
44
45        // A LookBehind(n) attribute on an output stream S declares that the kernel
46        // requires access to up to n previously generated output items.
47        // (Example: lz4d lookbehind(65536)).
48
49        Principal,
50
51        // One input stream can be declared as the principal input buffer for a kernel.
52        // If a kernel has a principal input stream, when processing the final stride,
53        // a MultiBlockKernel assumes the item count of the principle is the correct
54        // one and zero extends / truncates all other input streams to match it.
55
56        Deferred,
57
58        // Normally, the processed item count of fixed rate streams is automatically
59        // updated by the MultiBlock kernel. However, some streams behave like Fixed
60        // rate streams (in that they will always eventually process a Fixed amount of
61        // data) but the kernel processes the data in unpredictable chunks. Rather than
62        // declaring those as Unknown or Bounded rates, marking their rate calculation
63        // as Deferred provides the pipeline with a stronger guarantee when it comes to
64        // buffer size calculations.
65
66        IndependentRegionBegin, IndependentRegionEnd, /// NOT DONE
67
68        // Some kernels can divide their processing into concrete non-overlapping regions
69        // between a beginning and ending position. This is a hard guarantee that regardless
70        // of the computations between the start of the stream and the beginning of the first
71        // independent region or between the *beginning* of any two independent regions, A,
72        // B, the calculations that occur prior to the beginning of B do not affect the
73        // calculations after it --- even if A is started at an arbitrary position with a
74        // zeroed-out kernel state.
75
76        // If a kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
77        // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
78        // compute what its state will be and begin processing before K_0 is finished. This
79        // requires a the pipeline to intervene and call an optimized "output-less" instance
80        // of the kernel prior to calling B.
81
82        ConditionalRegionBegin, ConditionalRegionEnd, /// NOT DONE
83
84        // Some kernels have clearly demarcated regions in which a MultiBlock kernel will
85        // produce useful outputs for only the inputs within those regions. This attribute
86        // instructs the kernel to "zero-fill" the output of any non-selected regions,
87        // skipping strides entirely whenever possible.
88
89        // If the same regions are also independent, we can avoid the overhead of "masking
90        // out" the input streams. Otherwise a MultiBlock will use temporary buffers for all
91        // uses of the streams and zero out any non-regions from the data.
92
93        AlwaysConsume,
94
95        // Always consume the input (i.e., use the lowerbound to determine whether to there
96        // is enough data to execute a stride rather than the upper bound.)
97
98        /** OUTPUT STREAM ATTRIBUTES **/
99
100        Add,
101
102        // An Add(K) attribute states that K items will be added to this stream after
103        // processing the final block.
104
105        RoundUpTo,
106
107        // A RoundUpTo(k) attribute indicates the final item count of this stream will
108        // be rounded up to the nearest multiple of k
109
110        /** INPUT/OUTPUT STREAM ATTRIBUTES **/
111
112        Misaligned,
113
114        // Assume that we cannot statically compute the alignment of this stream set and
115        // perform any operations accordingly
116
117        BlockSize, /// NOT DONE
118
119        // A BlockSize(K) attribute, where K=2^k for some value of k>=4 declares
120        // that the layout of stream data items within the corresponding input
121        // or output buffer is arranged in blocks of K items each.   In each
122        // block, the data buffer contains K items of the first stream in the
123        // set, followed by K items of the next stream in the set and so on,
124        // up to and including K items of the last stream in the set.
125
126        // (Note: this replaces the concept of swizzling and anticipates that
127        // the pipeline will take on the role of automatically inserting the
128        // swizzling code necessary).
129
130        ReverseRegionBegin, ReverseRegionEnd, /// NOT DONE
131
132        // Conceptually, reversing a stream S is simple: {S_1,...,S_n} -> {S_n,...,S_1}.
133        // However, this means all of the input data must be computed and stored prior to
134        // executing this kernel. In practice, this is unnecessary as in the context of
135        // text parsing, we're almost always searching for the true starting position of
136        // something ambigious after we've found its end position in some prior kernel.
137
138
139        Swizzled,
140
141        // Whether the input streamset is in swizzled form
142
143
144//        Here is a revised definition of SegmentedReverse:
145
146//        Given a stream of data bits S that is considered to be divided into
147//        segments, and a marker stream S having a one bit at the final position
148//        of each segment, the R = SegmentedReverse(S, M) when
149
150//        R_{i} = S_{l + (h - i)}
151//              where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
152//          and where h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
153//          (l and h are the low and high positions of the segment containing i)
154
155//        This is an invertible operation, so we can apply R to a kernel's input
156//        and then to its output to get a SegmentedReverse version of a kernel
157
158//        A kernel which computes segmented reverse is feasible, but seems complex
159//        to implement, and probably too slow.  I have played around with several
160//        ways of tackling it, no good method yet.
161
162//        If there are multiple segments within a block, we could instead use
163//        the following:
164
165//        BlockSegmentedReverse
166
167//        B_{i} = S_{L + (H - i)}
168//             where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
169//                   h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
170//                   L = l if l div BlockSize < h divBlockSize, otherwise (i div BlockSize) * BlockSize
171//                   H = h if l div BlockSize < h divBlockSize, otherwise L + BlockSize - 1
172
173//        An alternative way of looking at this is to eliminate all but the first
174//        and last marker positions within a block.
175
176//        The main point is that, if we apply B to inputs, perform the kernel
177//        and the apply B to outputs, we get the same result if we applied R
178//        (assuming that the kernel computations do not cross boundaries in M).
179
180//        This will be more efficient to compute, but still involves overhead
181//        for shifting and combining streams.
182
183//        I think it may be better to focus on the ReverseKernel adapter, that
184//        handles the reverse operations for both input and output.   This actually
185//        gives more flexibility, because, in a multiblock scenario, we can process
186//        the longest sequence of blocks such that both the beginning and end blocks
187//        have a one bit.   If there are any interior blocks with one bits, then
188//        they will be handled automatically without special shifting and masking.
189
190//        By the way, in my designs, I am wanting to have a callable Multiblock
191//        function, so that the Multiblock function for a Reversed Kernel just
192//        does a little work before calling the Multiblock function of the base kernel.
193//        That seems to have disappeared in the current system.
194
195
196        /** KERNEL ATTRIBUTES **/
197
198        SelectMinimumInputLength, /// NOT DONE
199
200        // If a kernel has multiple input streams and their final item count differs,
201        // a MultiBlock kernel will select the *minimum* input item count as it's
202        // principle item length and truncate the streams to fit
203
204        // NOTE: this is the default if a kernel does not have SelectMaximumInputLength
205        // set and no PrincipalInputStream was declared.
206
207        SelectMaximumInputLength, /// NOT DONE
208
209        // If a kernel has multiple input streams and their final item count differs,
210        // a MultiBlock kernel will select the *maximum* input item count as it's
211        // principle item length and zero-extend the streams accordingly.
212
213        CanTerminateEarly,
214
215        // Indicates that this kernel can call setTerminationSignal() to terminate the
216        // kernel prior to processing all of its input streams.
217
218        MustExplicitlyTerminate,
219
220        // This kernel terminates *only* after the programmer calls setTerminationSignal()
221        // and will be called even when there is no new input data when the prior kernels
222        // in the pipeline have also terminated.
223
224    };
225
226    KindId getKind() const {
227        return mKind;
228    }
229
230    bool isAdd() const {
231        return mKind == KindId::Add;
232    }
233
234    bool isPrincipal() const {
235        return mKind == KindId::Principal;
236    }
237
238    bool isRoundUpTo() const {
239        return mKind == KindId::RoundUpTo;
240    }
241
242    bool isBlockSize() const {
243        return mKind == KindId::BlockSize;
244    }
245
246    unsigned amount() const {
247        return mAmount;
248    }
249
250    void setAmount(const unsigned amount) {
251        mAmount = amount;
252    }
253
254    bool operator == (const Attribute & other) const {
255        return mKind == other.mKind && mAmount == other.mAmount;
256    }
257
258    bool operator != (const Attribute & other) const {
259        return !(*this == other);
260    }
261
262protected:
263
264    friend struct AttributeSet;
265    friend struct Binding;
266    friend Attribute Add1();
267    friend Attribute Principal();
268    friend Attribute AlwaysConsume();
269    friend Attribute RoundUpTo(const unsigned);
270    friend Attribute LookAhead(const unsigned);
271    friend Attribute LookBehind(const unsigned);
272    friend Attribute Deferred();
273    friend Attribute Misaligned();
274    friend Attribute ConditionalRegionBegin();
275    friend Attribute ConditionalRegionEnd();
276    friend Attribute Swizzled();
277    friend Attribute CanTerminateEarly();
278    friend Attribute MustExplicitlyTerminate();
279
280    Attribute(const KindId kind, const unsigned k) : mKind(kind), mAmount(k) { }
281
282private:
283
284    const KindId    mKind;
285    unsigned        mAmount;
286};
287
288struct AttributeSet : public std::vector<Attribute> {
289
290    using AttributeId = Attribute::KindId;
291
292    const AttributeSet & getAttributes() const {
293        return *this;
294    }
295
296    Attribute & findOrAddAttribute(const AttributeId id) {
297        if (Attribute * const attr = __findAttribute(id)) {
298            return *attr;
299        } else {
300            return addAttribute(Attribute(id, 0));
301        }
302    }
303
304    Attribute & findAttribute(const AttributeId id) const {
305        return *__findAttribute(id);
306    }
307
308    Attribute & addAttribute(Attribute attribute);
309
310    bool hasAttributes() const {
311        return !empty();
312    }
313
314    bool hasAttribute(const AttributeId id) const {
315        return __findAttribute(id) != nullptr;
316    }
317
318    AttributeSet() = default;
319
320    AttributeSet(Attribute && attr) { emplace_back(std::move(attr)); }
321
322    AttributeSet(std::initializer_list<Attribute> attrs) : std::vector<Attribute>(attrs) { }
323
324private:
325
326    Attribute * __findAttribute(const AttributeId id) const;
327
328};
329
330
331inline Attribute Add1() {
332    return Attribute(Attribute::KindId::Add, 1);
333}
334
335inline Attribute RoundUpTo(const unsigned k) {
336    return Attribute(Attribute::KindId::RoundUpTo, k);
337}
338
339inline Attribute AlwaysConsume() {
340    return Attribute(Attribute::KindId::AlwaysConsume, 0);
341}
342
343inline Attribute Principal() {
344    return Attribute(Attribute::KindId::Principal, 0);
345}
346
347inline Attribute LookAhead(const unsigned k) {
348    return Attribute(Attribute::KindId::LookAhead, k);
349}
350
351inline Attribute LookBehind(const unsigned k) {
352    return Attribute(Attribute::KindId::LookBehind, k);
353}
354
355inline Attribute Deferred() {
356    return Attribute(Attribute::KindId::Deferred, 0);
357}
358
359inline Attribute Misaligned() {
360    return Attribute(Attribute::KindId::Misaligned, 0);
361}
362
363inline Attribute ConditionalRegionBegin() {
364    return Attribute(Attribute::KindId::ConditionalRegionBegin, 0);
365}
366
367inline Attribute ConditionalRegionEnd() {
368    return Attribute(Attribute::KindId::ConditionalRegionEnd, 0);
369}
370
371inline Attribute CanTerminateEarly() {
372    return Attribute(Attribute::KindId::CanTerminateEarly, 0);
373}
374
375inline Attribute MustExplicitlyTerminate() {
376    return Attribute(Attribute::KindId::MustExplicitlyTerminate, 0);
377}
378
379inline Attribute Swizzled() {
380    return Attribute(Attribute::KindId::Swizzled, 0);
381}
382
383}
384
385#endif // ATTRIBUTES_H
Note: See TracBrowser for help on using the repository browser.