source: icGREP/icgrep-devel/icgrep/kernels/attributes.h @ 5941

Last change on this file since 5941 was 5941, checked in by xwa163, 14 months ago
  1. Add attributes to disable some features of multiblock kernel
  2. Fix bug for lz4d new approach in large data, pass all test cases
  3. Disable lz4d related test cases for old approach
File size: 15.2 KB
Line 
1#ifndef ATTRIBUTES_H
2#define ATTRIBUTES_H
3
4#include <vector>
5
6namespace kernel {
7
8struct Attribute {
9
10    enum class KindId {
11
12        /** INPUT STREAM ATTRIBUTES **/
13
14        LookAhead, /// NOT DONE
15
16        // A LookAhead(n) attribute on an input stream set S declares that the kernel
17        // looks ahead n positions in the input stream.  That is, processing of item
18        // S[i, j] may be defined in terms of S[i, j+n].
19
20        // Guarantee required: the pipeline compiler must ensure that, when
21        // the kernel is called with an available item count of N for the
22        // input stream, that the corresponding input data buffer has
23        // valid items in the buffer up to and including position N+n,
24        // and that the blocks containing the items from N-1 through N+n are
25        // linearly contiguous.  When the final doMultiBlock call is made,
26        // the pipeline compiler must ensure that the n items at lookahead
27        // positions are zero-initialized.
28
29        // (Note: this guarantee anticipates using a single buffer pointer
30        // for both normal stream access and lookahead access.   This avoids
31        // the cost of extra parameters per doMultiBlock call, but requires
32        // that the corresponding circular buffer have a "lookahead extension area"
33        // that holds a copy of the data at the physical start of buffer).
34
35        LookBehind, /// NOT DONE
36
37        // A LookBehind(n) attribute on an input stream S declares that the kernel
38        // requires access to input items up to n positions prior to the current
39        // processed item position.
40
41        // (Notes: this may allow more efficient advances within n (without saving state).
42        // However, a lookbehind extension area prior to the normal buffer base address
43        // is necessary, which must be initially zero-filled.)
44
45        // A LookBehind(n) attribute on an output stream S declares that the kernel
46        // requires access to up to n previously generated output items.
47        // (Example: lz4d lookbehind(65536)).
48
49        Principal,
50
51        // One input stream can be declared as the principal input buffer for a kernel.
52        // If a kernel has a principal input stream, when processing the final stride,
53        // a MultiBlockKernel assumes the item count of the principle is the correct
54        // one and zero extends / truncates all other input streams to match it.
55
56        Deferred,
57
58        // Normally, the processed item count of fixed rate streams is automatically
59        // updated by the MultiBlock kernel. However, some streams behave like Fixed
60        // rate streams (in that they will always eventually process a Fixed amount of
61        // data) but the kernel processes the data in unpredictable chunks. Rather than
62        // declaring those as Unknown or Bounded rates, marking their rate calculation
63        // as Deferred provides the pipeline with a stronger guarantee when it comes to
64        // buffer size calculations.
65
66        IndependentRegionBegin, IndependentRegionEnd, /// NOT DONE
67
68        // Some kernels can divide their processing into concrete non-overlapping regions
69        // between a beginning and ending position. This is a hard guarantee that regardless
70        // of the computations between the start of the stream and the beginning of the first
71        // independent region or between the *beginning* of any two independent regions, A,
72        // B, the calculations that occur prior to the beginning of B do not affect the
73        // calculations after it --- even if A is started at an arbitrary position with a
74        // zeroed-out kernel state.
75
76        // If a kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
77        // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
78        // compute what its state will be and begin processing before K_0 is finished. This
79        // requires a the pipeline to intervene and call an optimized "output-less" instance
80        // of the kernel prior to calling B.
81
82        ConditionalRegionBegin, ConditionalRegionEnd, /// NOT DONE
83
84        // Some kernels have clearly demarcated regions in which a MultiBlock kernel will
85        // produce useful outputs for only the inputs within those regions. This attribute
86        // instructs the kernel to "zero-fill" the output of any non-selected regions,
87        // skipping strides entirely whenever possible.
88
89        // If the same regions are also independent, we can avoid the overhead of "masking
90        // out" the input streams. Otherwise a MultiBlock will use temporary buffers for all
91        // uses of the streams and zero out any non-regions from the data.
92
93        AlwaysConsume,
94
95        // Always consume the input (i.e., use the lowerbound to determine whether to there
96        // is enough data to execute a stride rather than the upper bound.)
97
98        DisableTemporaryBuffer,
99
100        // Workaround attribute, force disable temporary buffer
101
102        DisableSufficientChecking,
103
104        // Workaround attribute, force disable sufficient data or sufficient space checking in pipelilne, always assume that
105        // the data or space is sufficient
106
107        DisableAvailableItemCountAdjustment,
108
109        // Workaround attribute, keep original availableItemCount in multiblock kernel (do not replace it by linear available item count)
110
111        /** OUTPUT STREAM ATTRIBUTES **/
112
113        Add,
114
115        // An Add(K) attribute states that K items will be added to this stream after
116        // processing the final block.
117
118        RoundUpTo,
119
120        // A RoundUpTo(k) attribute indicates the final item count of this stream will
121        // be rounded up to the nearest multiple of k
122
123        /** INPUT/OUTPUT STREAM ATTRIBUTES **/
124
125        Misaligned,
126
127        // Assume that we cannot statically compute the alignment of this stream set and
128        // perform any operations accordingly
129
130        BlockSize, /// NOT DONE
131
132        // A BlockSize(K) attribute, where K=2^k for some value of k>=4 declares
133        // that the layout of stream data items within the corresponding input
134        // or output buffer is arranged in blocks of K items each.   In each
135        // block, the data buffer contains K items of the first stream in the
136        // set, followed by K items of the next stream in the set and so on,
137        // up to and including K items of the last stream in the set.
138
139        // (Note: this replaces the concept of swizzling and anticipates that
140        // the pipeline will take on the role of automatically inserting the
141        // swizzling code necessary).
142
143        ReverseRegionBegin, ReverseRegionEnd, /// NOT DONE
144
145        // Conceptually, reversing a stream S is simple: {S_1,...,S_n} -> {S_n,...,S_1}.
146        // However, this means all of the input data must be computed and stored prior to
147        // executing this kernel. In practice, this is unnecessary as in the context of
148        // text parsing, we're almost always searching for the true starting position of
149        // something ambigious after we've found its end position in some prior kernel.
150
151
152        Swizzled,
153
154        // Whether the input streamset is in swizzled form
155
156//        Here is a revised definition of SegmentedReverse:
157
158//        Given a stream of data bits S that is considered to be divided into
159//        segments, and a marker stream S having a one bit at the final position
160//        of each segment, the R = SegmentedReverse(S, M) when
161
162//        R_{i} = S_{l + (h - i)}
163//              where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
164//          and where h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
165//          (l and h are the low and high positions of the segment containing i)
166
167//        This is an invertible operation, so we can apply R to a kernel's input
168//        and then to its output to get a SegmentedReverse version of a kernel
169
170//        A kernel which computes segmented reverse is feasible, but seems complex
171//        to implement, and probably too slow.  I have played around with several
172//        ways of tackling it, no good method yet.
173
174//        If there are multiple segments within a block, we could instead use
175//        the following:
176
177//        BlockSegmentedReverse
178
179//        B_{i} = S_{L + (H - i)}
180//             where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
181//                   h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
182//                   L = l if l div BlockSize < h divBlockSize, otherwise (i div BlockSize) * BlockSize
183//                   H = h if l div BlockSize < h divBlockSize, otherwise L + BlockSize - 1
184
185//        An alternative way of looking at this is to eliminate all but the first
186//        and last marker positions within a block.
187
188//        The main point is that, if we apply B to inputs, perform the kernel
189//        and the apply B to outputs, we get the same result if we applied R
190//        (assuming that the kernel computations do not cross boundaries in M).
191
192//        This will be more efficient to compute, but still involves overhead
193//        for shifting and combining streams.
194
195//        I think it may be better to focus on the ReverseKernel adapter, that
196//        handles the reverse operations for both input and output.   This actually
197//        gives more flexibility, because, in a multiblock scenario, we can process
198//        the longest sequence of blocks such that both the beginning and end blocks
199//        have a one bit.   If there are any interior blocks with one bits, then
200//        they will be handled automatically without special shifting and masking.
201
202//        By the way, in my designs, I am wanting to have a callable Multiblock
203//        function, so that the Multiblock function for a Reversed Kernel just
204//        does a little work before calling the Multiblock function of the base kernel.
205//        That seems to have disappeared in the current system.
206
207
208        /** KERNEL ATTRIBUTES **/
209
210        SelectMinimumInputLength, /// NOT DONE
211
212        // If a kernel has multiple input streams and their final item count differs,
213        // a MultiBlock kernel will select the *minimum* input item count as it's
214        // principle item length and truncate the streams to fit
215
216        // NOTE: this is the default if a kernel does not have SelectMaximumInputLength
217        // set and no PrincipalInputStream was declared.
218
219        SelectMaximumInputLength, /// NOT DONE
220
221        // If a kernel has multiple input streams and their final item count differs,
222        // a MultiBlock kernel will select the *maximum* input item count as it's
223        // principle item length and zero-extend the streams accordingly.
224
225        CanTerminateEarly,
226
227        // Indicates that this kernel can call setTerminationSignal() to terminate the
228        // kernel prior to processing all of its input streams.
229
230        MustExplicitlyTerminate,
231
232        // This kernel terminates *only* after the programmer calls setTerminationSignal()
233        // and will be called even when there is no new input data when the prior kernels
234        // in the pipeline have also terminated.
235
236    };
237
238    KindId getKind() const {
239        return mKind;
240    }
241
242    bool isAdd() const {
243        return mKind == KindId::Add;
244    }
245
246    bool isPrincipal() const {
247        return mKind == KindId::Principal;
248    }
249
250    bool isRoundUpTo() const {
251        return mKind == KindId::RoundUpTo;
252    }
253
254    bool isBlockSize() const {
255        return mKind == KindId::BlockSize;
256    }
257
258    unsigned amount() const {
259        return mAmount;
260    }
261
262    void setAmount(const unsigned amount) {
263        mAmount = amount;
264    }
265
266    bool operator == (const Attribute & other) const {
267        return mKind == other.mKind && mAmount == other.mAmount;
268    }
269
270    bool operator != (const Attribute & other) const {
271        return !(*this == other);
272    }
273
274protected:
275
276    friend struct AttributeSet;
277    friend struct Binding;
278    friend Attribute Add1();
279    friend Attribute Principal();
280    friend Attribute AlwaysConsume();
281    friend Attribute DisableTemporaryBuffer();
282    friend Attribute DisableSufficientChecking();
283    friend Attribute DisableAvailableItemCountAdjustment();
284    friend Attribute RoundUpTo(const unsigned);
285    friend Attribute LookAhead(const unsigned);
286    friend Attribute LookBehind(const unsigned);
287    friend Attribute Deferred();
288    friend Attribute Misaligned();
289    friend Attribute ConditionalRegionBegin();
290    friend Attribute ConditionalRegionEnd();
291    friend Attribute Swizzled();
292    friend Attribute CanTerminateEarly();
293    friend Attribute MustExplicitlyTerminate();
294
295    Attribute(const KindId kind, const unsigned k) : mKind(kind), mAmount(k) { }
296
297private:
298
299    const KindId    mKind;
300    unsigned        mAmount;
301};
302
303struct AttributeSet : public std::vector<Attribute> {
304
305    using AttributeId = Attribute::KindId;
306
307    const AttributeSet & getAttributes() const {
308        return *this;
309    }
310
311    Attribute & findOrAddAttribute(const AttributeId id) {
312        if (Attribute * const attr = __findAttribute(id)) {
313            return *attr;
314        } else {
315            return addAttribute(Attribute(id, 0));
316        }
317    }
318
319    Attribute & findAttribute(const AttributeId id) const {
320        return *__findAttribute(id);
321    }
322
323    Attribute & addAttribute(Attribute attribute);
324
325    bool hasAttributes() const {
326        return !empty();
327    }
328
329    bool hasAttribute(const AttributeId id) const {
330        return __findAttribute(id) != nullptr;
331    }
332
333    AttributeSet() = default;
334
335    AttributeSet(Attribute && attr) { emplace_back(std::move(attr)); }
336
337    AttributeSet(std::initializer_list<Attribute> attrs) : std::vector<Attribute>(attrs) { }
338
339private:
340
341    Attribute * __findAttribute(const AttributeId id) const;
342
343};
344
345
346inline Attribute Add1() {
347    return Attribute(Attribute::KindId::Add, 1);
348}
349
350inline Attribute RoundUpTo(const unsigned k) {
351    return Attribute(Attribute::KindId::RoundUpTo, k);
352}
353
354inline Attribute AlwaysConsume() {
355    return Attribute(Attribute::KindId::AlwaysConsume, 0);
356}
357
358inline Attribute DisableTemporaryBuffer() {
359    return Attribute(Attribute::KindId::DisableTemporaryBuffer, 0);
360}
361
362inline Attribute DisableAvailableItemCountAdjustment() {
363    return Attribute(Attribute::KindId::DisableAvailableItemCountAdjustment, 0);
364}
365
366inline Attribute DisableSufficientChecking() {
367    return Attribute(Attribute::KindId::DisableSufficientChecking, 0);
368}
369
370inline Attribute Principal() {
371    return Attribute(Attribute::KindId::Principal, 0);
372}
373
374inline Attribute LookAhead(const unsigned k) {
375    return Attribute(Attribute::KindId::LookAhead, k);
376}
377
378inline Attribute LookBehind(const unsigned k) {
379    return Attribute(Attribute::KindId::LookBehind, k);
380}
381
382inline Attribute Deferred() {
383    return Attribute(Attribute::KindId::Deferred, 0);
384}
385
386inline Attribute Misaligned() {
387    return Attribute(Attribute::KindId::Misaligned, 0);
388}
389
390inline Attribute ConditionalRegionBegin() {
391    return Attribute(Attribute::KindId::ConditionalRegionBegin, 0);
392}
393
394inline Attribute ConditionalRegionEnd() {
395    return Attribute(Attribute::KindId::ConditionalRegionEnd, 0);
396}
397
398inline Attribute CanTerminateEarly() {
399    return Attribute(Attribute::KindId::CanTerminateEarly, 0);
400}
401
402inline Attribute MustExplicitlyTerminate() {
403    return Attribute(Attribute::KindId::MustExplicitlyTerminate, 0);
404}
405
406inline Attribute Swizzled() {
407    return Attribute(Attribute::KindId::Swizzled, 0);
408}
409
410}
411
412#endif // ATTRIBUTES_H
Note: See TracBrowser for help on using the repository browser.