source: icGREP/icgrep-devel/icgrep/kernels/attributes.h @ 5948

Last change on this file since 5948 was 5948, checked in by xwa163, 13 months ago
  1. Remove legacy kernels and codes for lz4
  2. Remove old approach for lz4 decoder
  3. Fixed some bugs of lz4 decoder new approach in large file by adding workaround attribute
  4. Add related test cases
File size: 15.5 KB
Line 
1#ifndef ATTRIBUTES_H
2#define ATTRIBUTES_H
3
4#include <vector>
5
6namespace kernel {
7
8struct Attribute {
9
10    enum class KindId {
11
12        /** INPUT STREAM ATTRIBUTES **/
13
14        LookAhead, /// NOT DONE
15
16        // A LookAhead(n) attribute on an input stream set S declares that the kernel
17        // looks ahead n positions in the input stream.  That is, processing of item
18        // S[i, j] may be defined in terms of S[i, j+n].
19
20        // Guarantee required: the pipeline compiler must ensure that, when
21        // the kernel is called with an available item count of N for the
22        // input stream, that the corresponding input data buffer has
23        // valid items in the buffer up to and including position N+n,
24        // and that the blocks containing the items from N-1 through N+n are
25        // linearly contiguous.  When the final doMultiBlock call is made,
26        // the pipeline compiler must ensure that the n items at lookahead
27        // positions are zero-initialized.
28
29        // (Note: this guarantee anticipates using a single buffer pointer
30        // for both normal stream access and lookahead access.   This avoids
31        // the cost of extra parameters per doMultiBlock call, but requires
32        // that the corresponding circular buffer have a "lookahead extension area"
33        // that holds a copy of the data at the physical start of buffer).
34
35        LookBehind, /// NOT DONE
36
37        // A LookBehind(n) attribute on an input stream S declares that the kernel
38        // requires access to input items up to n positions prior to the current
39        // processed item position.
40
41        // (Notes: this may allow more efficient advances within n (without saving state).
42        // However, a lookbehind extension area prior to the normal buffer base address
43        // is necessary, which must be initially zero-filled.)
44
45        // A LookBehind(n) attribute on an output stream S declares that the kernel
46        // requires access to up to n previously generated output items.
47        // (Example: lz4d lookbehind(65536)).
48
49        Principal,
50
51        // One input stream can be declared as the principal input buffer for a kernel.
52        // If a kernel has a principal input stream, when processing the final stride,
53        // a MultiBlockKernel assumes the item count of the principle is the correct
54        // one and zero extends / truncates all other input streams to match it.
55
56        Deferred,
57
58        // Normally, the processed item count of fixed rate streams is automatically
59        // updated by the MultiBlock kernel. However, some streams behave like Fixed
60        // rate streams (in that they will always eventually process a Fixed amount of
61        // data) but the kernel processes the data in unpredictable chunks. Rather than
62        // declaring those as Unknown or Bounded rates, marking their rate calculation
63        // as Deferred provides the pipeline with a stronger guarantee when it comes to
64        // buffer size calculations.
65
66        IndependentRegionBegin, IndependentRegionEnd, /// NOT DONE
67
68        // Some kernels can divide their processing into concrete non-overlapping regions
69        // between a beginning and ending position. This is a hard guarantee that regardless
70        // of the computations between the start of the stream and the beginning of the first
71        // independent region or between the *beginning* of any two independent regions, A,
72        // B, the calculations that occur prior to the beginning of B do not affect the
73        // calculations after it --- even if A is started at an arbitrary position with a
74        // zeroed-out kernel state.
75
76        // If a kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
77        // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
78        // compute what its state will be and begin processing before K_0 is finished. This
79        // requires a the pipeline to intervene and call an optimized "output-less" instance
80        // of the kernel prior to calling B.
81
82        ConditionalRegionBegin, ConditionalRegionEnd, /// NOT DONE
83
84        // Some kernels have clearly demarcated regions in which a MultiBlock kernel will
85        // produce useful outputs for only the inputs within those regions. This attribute
86        // instructs the kernel to "zero-fill" the output of any non-selected regions,
87        // skipping strides entirely whenever possible.
88
89        // If the same regions are also independent, we can avoid the overhead of "masking
90        // out" the input streams. Otherwise a MultiBlock will use temporary buffers for all
91        // uses of the streams and zero out any non-regions from the data.
92
93        AlwaysConsume,
94
95        // Always consume the input (i.e., use the lowerbound to determine whether to there
96        // is enough data to execute a stride rather than the upper bound.)
97
98        DisableTemporaryBuffer,
99
100        // Workaround attribute, force disable temporary buffer
101
102        DisableSufficientChecking,
103
104        // Workaround attribute, force disable sufficient data or sufficient space checking in pipelilne, always assume that
105        // the data or space is sufficient
106
107        DisableAvailableItemCountAdjustment,
108
109        // Workaround attribute, keep original availableItemCount in multiblock kernel (do not replace it by linear available item count)
110
111        /** OUTPUT STREAM ATTRIBUTES **/
112
113        Add,
114
115        // An Add(K) attribute states that K items will be added to this stream after
116        // processing the final block.
117
118        RoundUpTo,
119
120        // A RoundUpTo(k) attribute indicates the final item count of this stream will
121        // be rounded up to the nearest multiple of k
122
123        /** INPUT/OUTPUT STREAM ATTRIBUTES **/
124
125        Misaligned,
126
127        // Assume that we cannot statically compute the alignment of this stream set and
128        // perform any operations accordingly
129
130        BlockSize, /// NOT DONE
131
132        // A BlockSize(K) attribute, where K=2^k for some value of k>=4 declares
133        // that the layout of stream data items within the corresponding input
134        // or output buffer is arranged in blocks of K items each.   In each
135        // block, the data buffer contains K items of the first stream in the
136        // set, followed by K items of the next stream in the set and so on,
137        // up to and including K items of the last stream in the set.
138
139        // (Note: this replaces the concept of swizzling and anticipates that
140        // the pipeline will take on the role of automatically inserting the
141        // swizzling code necessary).
142
143        ReverseRegionBegin, ReverseRegionEnd, /// NOT DONE
144
145        // Conceptually, reversing a stream S is simple: {S_1,...,S_n} -> {S_n,...,S_1}.
146        // However, this means all of the input data must be computed and stored prior to
147        // executing this kernel. In practice, this is unnecessary as in the context of
148        // text parsing, we're almost always searching for the true starting position of
149        // something ambigious after we've found its end position in some prior kernel.
150
151
152        Swizzled,
153
154        // Whether the input streamset is in swizzled form
155
156//        Here is a revised definition of SegmentedReverse:
157
158//        Given a stream of data bits S that is considered to be divided into
159//        segments, and a marker stream S having a one bit at the final position
160//        of each segment, the R = SegmentedReverse(S, M) when
161
162//        R_{i} = S_{l + (h - i)}
163//              where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
164//          and where h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
165//          (l and h are the low and high positions of the segment containing i)
166
167//        This is an invertible operation, so we can apply R to a kernel's input
168//        and then to its output to get a SegmentedReverse version of a kernel
169
170//        A kernel which computes segmented reverse is feasible, but seems complex
171//        to implement, and probably too slow.  I have played around with several
172//        ways of tackling it, no good method yet.
173
174//        If there are multiple segments within a block, we could instead use
175//        the following:
176
177//        BlockSegmentedReverse
178
179//        B_{i} = S_{L + (H - i)}
180//             where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
181//                   h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
182//                   L = l if l div BlockSize < h divBlockSize, otherwise (i div BlockSize) * BlockSize
183//                   H = h if l div BlockSize < h divBlockSize, otherwise L + BlockSize - 1
184
185//        An alternative way of looking at this is to eliminate all but the first
186//        and last marker positions within a block.
187
188//        The main point is that, if we apply B to inputs, perform the kernel
189//        and the apply B to outputs, we get the same result if we applied R
190//        (assuming that the kernel computations do not cross boundaries in M).
191
192//        This will be more efficient to compute, but still involves overhead
193//        for shifting and combining streams.
194
195//        I think it may be better to focus on the ReverseKernel adapter, that
196//        handles the reverse operations for both input and output.   This actually
197//        gives more flexibility, because, in a multiblock scenario, we can process
198//        the longest sequence of blocks such that both the beginning and end blocks
199//        have a one bit.   If there are any interior blocks with one bits, then
200//        they will be handled automatically without special shifting and masking.
201
202//        By the way, in my designs, I am wanting to have a callable Multiblock
203//        function, so that the Multiblock function for a Reversed Kernel just
204//        does a little work before calling the Multiblock function of the base kernel.
205//        That seems to have disappeared in the current system.
206
207
208        /** KERNEL ATTRIBUTES **/
209
210        SelectMinimumInputLength, /// NOT DONE
211
212        // If a kernel has multiple input streams and their final item count differs,
213        // a MultiBlock kernel will select the *minimum* input item count as it's
214        // principle item length and truncate the streams to fit
215
216        // NOTE: this is the default if a kernel does not have SelectMaximumInputLength
217        // set and no PrincipalInputStream was declared.
218
219        SelectMaximumInputLength, /// NOT DONE
220
221        // If a kernel has multiple input streams and their final item count differs,
222        // a MultiBlock kernel will select the *maximum* input item count as it's
223        // principle item length and zero-extend the streams accordingly.
224
225        CanTerminateEarly,
226
227        // Indicates that this kernel can call setTerminationSignal() to terminate the
228        // kernel prior to processing all of its input streams.
229
230        MustExplicitlyTerminate,
231
232        // This kernel terminates *only* after the programmer calls setTerminationSignal()
233        // and will be called even when there is no new input data when the prior kernels
234        // in the pipeline have also terminated.
235
236        MustConsumeAll,
237
238        //Workaround, the kernel will finish only when all of the inputs are consumed
239
240    };
241
242    KindId getKind() const {
243        return mKind;
244    }
245
246    bool isAdd() const {
247        return mKind == KindId::Add;
248    }
249
250    bool isPrincipal() const {
251        return mKind == KindId::Principal;
252    }
253
254    bool isRoundUpTo() const {
255        return mKind == KindId::RoundUpTo;
256    }
257
258    bool isBlockSize() const {
259        return mKind == KindId::BlockSize;
260    }
261
262    unsigned amount() const {
263        return mAmount;
264    }
265
266    void setAmount(const unsigned amount) {
267        mAmount = amount;
268    }
269
270    bool operator == (const Attribute & other) const {
271        return mKind == other.mKind && mAmount == other.mAmount;
272    }
273
274    bool operator != (const Attribute & other) const {
275        return !(*this == other);
276    }
277
278protected:
279
280    friend struct AttributeSet;
281    friend struct Binding;
282    friend Attribute Add1();
283    friend Attribute Principal();
284    friend Attribute AlwaysConsume();
285    friend Attribute DisableTemporaryBuffer();
286    friend Attribute DisableSufficientChecking();
287    friend Attribute DisableAvailableItemCountAdjustment();
288    friend Attribute RoundUpTo(const unsigned);
289    friend Attribute LookAhead(const unsigned);
290    friend Attribute LookBehind(const unsigned);
291    friend Attribute Deferred();
292    friend Attribute Misaligned();
293    friend Attribute ConditionalRegionBegin();
294    friend Attribute ConditionalRegionEnd();
295    friend Attribute Swizzled();
296    friend Attribute CanTerminateEarly();
297    friend Attribute MustExplicitlyTerminate();
298    friend Attribute MustConsumeAll();
299
300    Attribute(const KindId kind, const unsigned k) : mKind(kind), mAmount(k) { }
301
302private:
303
304    const KindId    mKind;
305    unsigned        mAmount;
306};
307
308struct AttributeSet : public std::vector<Attribute> {
309
310    using AttributeId = Attribute::KindId;
311
312    const AttributeSet & getAttributes() const {
313        return *this;
314    }
315
316    Attribute & findOrAddAttribute(const AttributeId id) {
317        if (Attribute * const attr = __findAttribute(id)) {
318            return *attr;
319        } else {
320            return addAttribute(Attribute(id, 0));
321        }
322    }
323
324    Attribute & findAttribute(const AttributeId id) const {
325        return *__findAttribute(id);
326    }
327
328    Attribute & addAttribute(Attribute attribute);
329
330    bool hasAttributes() const {
331        return !empty();
332    }
333
334    bool hasAttribute(const AttributeId id) const {
335        return __findAttribute(id) != nullptr;
336    }
337
338    AttributeSet() = default;
339
340    AttributeSet(Attribute && attr) { emplace_back(std::move(attr)); }
341
342    AttributeSet(std::initializer_list<Attribute> attrs) : std::vector<Attribute>(attrs) { }
343
344private:
345
346    Attribute * __findAttribute(const AttributeId id) const;
347
348};
349
350
351inline Attribute Add1() {
352    return Attribute(Attribute::KindId::Add, 1);
353}
354
355inline Attribute RoundUpTo(const unsigned k) {
356    return Attribute(Attribute::KindId::RoundUpTo, k);
357}
358
359inline Attribute AlwaysConsume() {
360    return Attribute(Attribute::KindId::AlwaysConsume, 0);
361}
362
363inline Attribute DisableTemporaryBuffer() {
364    return Attribute(Attribute::KindId::DisableTemporaryBuffer, 0);
365}
366
367inline Attribute DisableAvailableItemCountAdjustment() {
368    return Attribute(Attribute::KindId::DisableAvailableItemCountAdjustment, 0);
369}
370
371inline Attribute DisableSufficientChecking() {
372    return Attribute(Attribute::KindId::DisableSufficientChecking, 0);
373}
374
375inline Attribute Principal() {
376    return Attribute(Attribute::KindId::Principal, 0);
377}
378
379inline Attribute LookAhead(const unsigned k) {
380    return Attribute(Attribute::KindId::LookAhead, k);
381}
382
383inline Attribute LookBehind(const unsigned k) {
384    return Attribute(Attribute::KindId::LookBehind, k);
385}
386
387inline Attribute Deferred() {
388    return Attribute(Attribute::KindId::Deferred, 0);
389}
390
391inline Attribute Misaligned() {
392    return Attribute(Attribute::KindId::Misaligned, 0);
393}
394
395inline Attribute ConditionalRegionBegin() {
396    return Attribute(Attribute::KindId::ConditionalRegionBegin, 0);
397}
398
399inline Attribute ConditionalRegionEnd() {
400    return Attribute(Attribute::KindId::ConditionalRegionEnd, 0);
401}
402
403inline Attribute CanTerminateEarly() {
404    return Attribute(Attribute::KindId::CanTerminateEarly, 0);
405}
406
407inline Attribute MustExplicitlyTerminate() {
408    return Attribute(Attribute::KindId::MustExplicitlyTerminate, 0);
409}
410
411inline Attribute MustConsumeAll() {
412    return Attribute(Attribute::KindId::MustConsumeAll, 0);
413}
414
415inline Attribute Swizzled() {
416    return Attribute(Attribute::KindId::Swizzled, 0);
417}
418
419}
420
421#endif // ATTRIBUTES_H
Note: See TracBrowser for help on using the repository browser.