source: icGREP/icgrep-devel/icgrep/kernels/attributes.h @ 5816

Last change on this file since 5816 was 5793, checked in by nmedfort, 21 months ago

Bug fix for pipeline: it was terminating too early when there was insufficient output space to process all of the input for a kernel.

File size: 13.0 KB
Line 
1#ifndef ATTRIBUTES_H
2#define ATTRIBUTES_H
3
4#include <vector>
5
6namespace kernel {
7
8struct Attribute {
9
10    enum class KindId {
11
12        /** INPUT STREAM ATTRIBUTES **/
13
14        LookAhead, /// NOT DONE
15
16        // A LookAhead(n) attribute on an input stream set S declares that the kernel
17        // looks ahead n positions in the input stream.  That is, processing of item
18        // S[i, j] may be defined in terms of S[i, j+n].
19
20        // Guarantee required: the pipeline compiler must ensure that, when
21        // the kernel is called with an available item count of N for the
22        // input stream, that the corresponding input data buffer has
23        // valid items in the buffer up to and including position N+n,
24        // and that the blocks containing the items from N-1 through N+n are
25        // linearly contiguous.  When the final doMultiBlock call is made,
26        // the pipeline compiler must ensure that the n items at lookahead
27        // positions are zero-initialized.
28
29        // (Note: this guarantee anticipates using a single buffer pointer
30        // for both normal stream access and lookahead access.   This avoids
31        // the cost of extra parameters per doMultiBlock call, but requires
32        // that the corresponding circular buffer have a "lookahead extension area"
33        // that holds a copy of the data at the physical start of buffer).
34
35        LookBehind, /// NOT DONE
36
37        // A LookBehind(n) attribute on an input stream S declares that the kernel
38        // requires access to input items up to n positions prior to the current
39        // processed item position.
40
41        // (Notes: this may allow more efficient advances within n (without saving state).
42        // However, a lookbehind extension area prior to the normal buffer base address
43        // is necessary, which must be initially zero-filled.)
44
45        // A LookBehind(n) attribute on an output stream S declares that the kernel
46        // requires access to up to n previously generated output items.
47        // (Example: lz4d lookbehind(65536)).
48
49        Principal,
50
51        // One input stream can be declared as the principal input buffer for a kernel.
52        // If a kernel has a principal input stream, when processing the final stride,
53        // a MultiBlockKernel assumes the item count of the principle is the correct
54        // one and zero extends / truncates all other input streams to match it.
55
56        Deferred,
57
58        // Normally, the processed item count of fixed rate streams is automatically
59        // updated by the MultiBlock kernel. However, some streams behave like Fixed
60        // rate streams (in that they will always eventually process a Fixed amount of
61        // data) but the kernel processes the data in unpredictable chunks. Rather than
62        // declaring those as Unknown or Bounded rates, marking their rate calculation
63        // as Deferred provides the pipeline with a stronger guarantee when it comes to
64        // buffer size calculations.
65
66        IndependentRegionBegin, IndependentRegionEnd, /// NOT DONE
67
68        // Some kernels can divide their processing into concrete non-overlapping regions
69        // between a beginning and ending position. This is a hard guarantee that regardless
70        // of the computations between the start of the stream and the beginning of the first
71        // independent region or between the *beginning* of any two independent regions, A,
72        // B, the calculations that occur prior to the beginning of B do not affect the
73        // calculations after it --- even if A is started at an arbitrary position with a
74        // zeroed-out kernel state.
75
76        // If a kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
77        // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
78        // compute what its state will be and begin processing before K_0 is finished. This
79        // requires a the pipeline to intervene and call an optimized "output-less" instance
80        // of the kernel prior to calling B.
81
82        ConditionalRegionBegin, ConditionalRegionEnd, /// NOT DONE
83
84        // Some kernels have clearly demarcated regions in which a MultiBlock kernel will
85        // produce useful outputs for only the inputs within those regions. This attribute
86        // instructs the kernel to "zero-fill" the output of any non-selected regions,
87        // skipping strides entirely whenever possible.
88
89        // If the same regions are also independent, we can avoid the overhead of "masking
90        // out" the input streams. Otherwise a MultiBlock will use temporary buffers for all
91        // uses of the streams and zero out any non-regions from the data.
92
93        /** OUTPUT STREAM ATTRIBUTES **/
94
95        Add,
96
97        // An Add(K) attribute states that K items will be added to this stream after
98        // processing the final block.
99
100        RoundUpTo,
101
102        // A RoundUpTo(k) attribute indicates the final item count of this stream will
103        // be rounded up to the nearest multiple of k
104
105        /** INPUT/OUTPUT STREAM ATTRIBUTES **/
106
107        Misaligned,
108
109        // Assume that we cannot statically compute the alignment of this stream set and
110        // perform any operations accordingly
111
112        BlockSize, /// NOT DONE
113
114        // A BlockSize(K) attribute, where K=2^k for some value of k>=4 declares
115        // that the layout of stream data items within the corresponding input
116        // or output buffer is arranged in blocks of K items each.   In each
117        // block, the data buffer contains K items of the first stream in the
118        // set, followed by K items of the next stream in the set and so on,
119        // up to and including K items of the last stream in the set.
120
121        // (Note: this replaces the concept of swizzling and anticipates that
122        // the pipeline will take on the role of automatically inserting the
123        // swizzling code necessary).
124
125        ReverseRegionBegin, ReverseRegionEnd, /// NOT DONE
126
127        // Conceptually, reversing a stream S is simple: {S_1,...,S_n} -> {S_n,...,S_1}.
128        // However, this means all of the input data must be computed and stored prior to
129        // executing this kernel. In practice, this is unnecessary as in the context of
130        // text parsing, we're almost always searching for the true starting position of
131        // something ambigious after we've found its end position in some prior kernel.
132
133
134
135
136
137//        Here is a revised definition of SegmentedReverse:
138
139//        Given a stream of data bits S that is considered to be divided into
140//        segments, and a marker stream S having a one bit at the final position
141//        of each segment, the R = SegmentedReverse(S, M) when
142
143//        R_{i} = S_{l + (h - i)}
144//              where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
145//          and where h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
146//          (l and h are the low and high positions of the segment containing i)
147
148//        This is an invertible operation, so we can apply R to a kernel's input
149//        and then to its output to get a SegmentedReverse version of a kernel
150
151//        A kernel which computes segmented reverse is feasible, but seems complex
152//        to implement, and probably too slow.  I have played around with several
153//        ways of tackling it, no good method yet.
154
155//        If there are multiple segments within a block, we could instead use
156//        the following:
157
158//        BlockSegmentedReverse
159
160//        B_{i} = S_{L + (H - i)}
161//             where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
162//                   h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
163//                   L = l if l div BlockSize < h divBlockSize, otherwise (i div BlockSize) * BlockSize
164//                   H = h if l div BlockSize < h divBlockSize, otherwise L + BlockSize - 1
165
166//        An alternative way of looking at this is to eliminate all but the first
167//        and last marker positions within a block.
168
169//        The main point is that, if we apply B to inputs, perform the kernel
170//        and the apply B to outputs, we get the same result if we applied R
171//        (assuming that the kernel computations do not cross boundaries in M).
172
173//        This will be more efficient to compute, but still involves overhead
174//        for shifting and combining streams.
175
176//        I think it may be better to focus on the ReverseKernel adapter, that
177//        handles the reverse operations for both input and output.   This actually
178//        gives more flexibility, because, in a multiblock scenario, we can process
179//        the longest sequence of blocks such that both the beginning and end blocks
180//        have a one bit.   If there are any interior blocks with one bits, then
181//        they will be handled automatically without special shifting and masking.
182
183//        By the way, in my designs, I am wanting to have a callable Multiblock
184//        function, so that the Multiblock function for a Reversed Kernel just
185//        does a little work before calling the Multiblock function of the base kernel.
186//        That seems to have disappeared in the current system.
187
188
189        /** KERNEL ATTRIBUTES **/
190
191        SelectMinimumInputLength, /// NOT DONE
192
193        // If a kernel has multiple input streams and their final item count differs,
194        // a MultiBlock kernel will select the *minimum* input item count as it's
195        // principle item length and truncate the streams to fit
196
197        // NOTE: this is the default if a kernel does not have SelectMaximumInputLength
198        // set and no PrincipalInputStream was declared.
199
200        SelectMaximumInputLength, /// NOT DONE
201
202        // If a kernel has multiple input streams and their final item count differs,
203        // a MultiBlock kernel will select the *maximum* input item count as it's
204        // principle item length and zero-extend the streams accordingly.
205
206    };
207
208    bool isAdd() const {
209        return mKind == KindId::Add;
210    }
211
212    bool isPrincipal() const {
213        return mKind == KindId::Principal;
214    }
215
216    bool isRoundUpTo() const {
217        return mKind == KindId::RoundUpTo;
218    }
219
220    bool isBlockSize() const {
221        return mKind == KindId::BlockSize;
222    }
223
224    unsigned amount() const {
225        return mAmount;
226    }
227
228    void setAmount(const unsigned amount) {
229        mAmount = amount;
230    }
231
232    bool operator == (const Attribute & other) const {
233        return mKind == other.mKind && mAmount == other.mAmount;
234    }
235
236    bool operator != (const Attribute & other) const {
237        return !(*this == other);
238    }
239
240protected:
241
242    KindId getKind() const {
243        return mKind;
244    }
245
246    friend struct AttributeSet;
247    friend struct Binding;
248    friend Attribute Add1();
249    friend Attribute Principal();
250    friend Attribute RoundUpTo(const unsigned);
251    friend Attribute LookAhead(const unsigned);
252    friend Attribute LookBehind(const unsigned);
253    friend Attribute Deferred();
254    friend Attribute Misaligned();
255    friend Attribute ConditionalRegionBegin();
256    friend Attribute ConditionalRegionEnd();
257
258    Attribute(const KindId kind, const unsigned k) : mKind(kind), mAmount(k) { }
259
260private:
261
262    const KindId    mKind;
263    unsigned        mAmount;
264};
265
266struct AttributeSet : public std::vector<Attribute> {
267
268    using AttributeId = Attribute::KindId;
269
270    const AttributeSet & getAttributes() const {
271        return *this;
272    }
273
274    Attribute & findOrAddAttribute(const AttributeId id) {
275        if (Attribute * const attr = __findAttribute(id)) {
276            return *attr;
277        } else {
278            return addAttribute(Attribute(id, 0));
279        }
280    }
281
282    Attribute & findAttribute(const AttributeId id) const {
283        return *__findAttribute(id);
284    }
285
286    Attribute & addAttribute(Attribute attribute);
287
288    bool hasAttributes() const {
289        return !empty();
290    }
291
292    bool hasAttribute(const AttributeId id) const {
293        return __findAttribute(id) != nullptr;
294    }
295
296    AttributeSet() = default;
297
298    AttributeSet(Attribute && attr) { emplace_back(std::move(attr)); }
299
300    AttributeSet(std::initializer_list<Attribute> attrs) : std::vector<Attribute>(attrs) { }
301
302private:
303
304    Attribute * __findAttribute(const AttributeId id) const;
305
306};
307
308
309inline Attribute Add1() {
310    return Attribute(Attribute::KindId::Add, 1);
311}
312
313inline Attribute RoundUpTo(const unsigned k) {
314    return Attribute(Attribute::KindId::RoundUpTo, k);
315}
316
317inline Attribute Principal() {
318    return Attribute(Attribute::KindId::Principal, 0);
319}
320
321inline Attribute LookAhead(const unsigned k) {
322    return Attribute(Attribute::KindId::LookAhead, k);
323}
324
325inline Attribute LookBehind(const unsigned k) {
326    return Attribute(Attribute::KindId::LookBehind, k);
327}
328
329inline Attribute Deferred() {
330    return Attribute(Attribute::KindId::Deferred, 0);
331}
332
333inline Attribute Misaligned() {
334    return Attribute(Attribute::KindId::Misaligned, 0);
335}
336
337inline Attribute ConditionalRegionBegin() {
338    return Attribute(Attribute::KindId::ConditionalRegionBegin, 0);
339}
340
341inline Attribute ConditionalRegionEnd() {
342    return Attribute(Attribute::KindId::ConditionalRegionEnd, 0);
343}
344
345}
346
347
348#endif // ATTRIBUTES_H
Note: See TracBrowser for help on using the repository browser.