source: icGREP/icgrep-devel/icgrep/kernels/attributes.h @ 5921

Last change on this file since 5921 was 5921, checked in by xwa163, 13 months ago
  1. Initial checkin for new approach for lz4 index decoder that always use 4MB buffer
  2. Add test case for new approach (for now test cases will fail when test file is larger than 4MB)
File size: 14.5 KB
Line 
1#ifndef ATTRIBUTES_H
2#define ATTRIBUTES_H
3
4#include <vector>
5
6namespace kernel {
7
8struct Attribute {
9
10    enum class KindId {
11
12        /** INPUT STREAM ATTRIBUTES **/
13
14        LookAhead, /// NOT DONE
15
16        // A LookAhead(n) attribute on an input stream set S declares that the kernel
17        // looks ahead n positions in the input stream.  That is, processing of item
18        // S[i, j] may be defined in terms of S[i, j+n].
19
20        // Guarantee required: the pipeline compiler must ensure that, when
21        // the kernel is called with an available item count of N for the
22        // input stream, that the corresponding input data buffer has
23        // valid items in the buffer up to and including position N+n,
24        // and that the blocks containing the items from N-1 through N+n are
25        // linearly contiguous.  When the final doMultiBlock call is made,
26        // the pipeline compiler must ensure that the n items at lookahead
27        // positions are zero-initialized.
28
29        // (Note: this guarantee anticipates using a single buffer pointer
30        // for both normal stream access and lookahead access.   This avoids
31        // the cost of extra parameters per doMultiBlock call, but requires
32        // that the corresponding circular buffer have a "lookahead extension area"
33        // that holds a copy of the data at the physical start of buffer).
34
35        LookBehind, /// NOT DONE
36
37        // A LookBehind(n) attribute on an input stream S declares that the kernel
38        // requires access to input items up to n positions prior to the current
39        // processed item position.
40
41        // (Notes: this may allow more efficient advances within n (without saving state).
42        // However, a lookbehind extension area prior to the normal buffer base address
43        // is necessary, which must be initially zero-filled.)
44
45        // A LookBehind(n) attribute on an output stream S declares that the kernel
46        // requires access to up to n previously generated output items.
47        // (Example: lz4d lookbehind(65536)).
48
49        Principal,
50
51        // One input stream can be declared as the principal input buffer for a kernel.
52        // If a kernel has a principal input stream, when processing the final stride,
53        // a MultiBlockKernel assumes the item count of the principle is the correct
54        // one and zero extends / truncates all other input streams to match it.
55
56        Deferred,
57
58        // Normally, the processed item count of fixed rate streams is automatically
59        // updated by the MultiBlock kernel. However, some streams behave like Fixed
60        // rate streams (in that they will always eventually process a Fixed amount of
61        // data) but the kernel processes the data in unpredictable chunks. Rather than
62        // declaring those as Unknown or Bounded rates, marking their rate calculation
63        // as Deferred provides the pipeline with a stronger guarantee when it comes to
64        // buffer size calculations.
65
66        IndependentRegionBegin, IndependentRegionEnd, /// NOT DONE
67
68        // Some kernels can divide their processing into concrete non-overlapping regions
69        // between a beginning and ending position. This is a hard guarantee that regardless
70        // of the computations between the start of the stream and the beginning of the first
71        // independent region or between the *beginning* of any two independent regions, A,
72        // B, the calculations that occur prior to the beginning of B do not affect the
73        // calculations after it --- even if A is started at an arbitrary position with a
74        // zeroed-out kernel state.
75
76        // If a kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
77        // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
78        // compute what its state will be and begin processing before K_0 is finished. This
79        // requires a the pipeline to intervene and call an optimized "output-less" instance
80        // of the kernel prior to calling B.
81
82        ConditionalRegionBegin, ConditionalRegionEnd, /// NOT DONE
83
84        // Some kernels have clearly demarcated regions in which a MultiBlock kernel will
85        // produce useful outputs for only the inputs within those regions. This attribute
86        // instructs the kernel to "zero-fill" the output of any non-selected regions,
87        // skipping strides entirely whenever possible.
88
89        // If the same regions are also independent, we can avoid the overhead of "masking
90        // out" the input streams. Otherwise a MultiBlock will use temporary buffers for all
91        // uses of the streams and zero out any non-regions from the data.
92
93        AlwaysConsume,
94
95        // Always consume the input (i.e., use the lowerbound to determine whether to there
96        // is enough data to execute a stride rather than the upper bound.)
97
98        /** OUTPUT STREAM ATTRIBUTES **/
99
100        Add,
101
102        // An Add(K) attribute states that K items will be added to this stream after
103        // processing the final block.
104
105        RoundUpTo,
106
107        // A RoundUpTo(k) attribute indicates the final item count of this stream will
108        // be rounded up to the nearest multiple of k
109
110        /** INPUT/OUTPUT STREAM ATTRIBUTES **/
111
112        Misaligned,
113
114        // Assume that we cannot statically compute the alignment of this stream set and
115        // perform any operations accordingly
116
117        BlockSize, /// NOT DONE
118
119        // A BlockSize(K) attribute, where K=2^k for some value of k>=4 declares
120        // that the layout of stream data items within the corresponding input
121        // or output buffer is arranged in blocks of K items each.   In each
122        // block, the data buffer contains K items of the first stream in the
123        // set, followed by K items of the next stream in the set and so on,
124        // up to and including K items of the last stream in the set.
125
126        // (Note: this replaces the concept of swizzling and anticipates that
127        // the pipeline will take on the role of automatically inserting the
128        // swizzling code necessary).
129
130        ReverseRegionBegin, ReverseRegionEnd, /// NOT DONE
131
132        // Conceptually, reversing a stream S is simple: {S_1,...,S_n} -> {S_n,...,S_1}.
133        // However, this means all of the input data must be computed and stored prior to
134        // executing this kernel. In practice, this is unnecessary as in the context of
135        // text parsing, we're almost always searching for the true starting position of
136        // something ambigious after we've found its end position in some prior kernel.
137
138
139        Swizzled,
140
141        // Whether the input streamset is in swizzled form
142
143        ConstantStrideLengthOne,
144
145        // TODO: Workaround here, the Pack Size is always one
146
147
148//        Here is a revised definition of SegmentedReverse:
149
150//        Given a stream of data bits S that is considered to be divided into
151//        segments, and a marker stream S having a one bit at the final position
152//        of each segment, the R = SegmentedReverse(S, M) when
153
154//        R_{i} = S_{l + (h - i)}
155//              where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
156//          and where h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
157//          (l and h are the low and high positions of the segment containing i)
158
159//        This is an invertible operation, so we can apply R to a kernel's input
160//        and then to its output to get a SegmentedReverse version of a kernel
161
162//        A kernel which computes segmented reverse is feasible, but seems complex
163//        to implement, and probably too slow.  I have played around with several
164//        ways of tackling it, no good method yet.
165
166//        If there are multiple segments within a block, we could instead use
167//        the following:
168
169//        BlockSegmentedReverse
170
171//        B_{i} = S_{L + (H - i)}
172//             where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
173//                   h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
174//                   L = l if l div BlockSize < h divBlockSize, otherwise (i div BlockSize) * BlockSize
175//                   H = h if l div BlockSize < h divBlockSize, otherwise L + BlockSize - 1
176
177//        An alternative way of looking at this is to eliminate all but the first
178//        and last marker positions within a block.
179
180//        The main point is that, if we apply B to inputs, perform the kernel
181//        and the apply B to outputs, we get the same result if we applied R
182//        (assuming that the kernel computations do not cross boundaries in M).
183
184//        This will be more efficient to compute, but still involves overhead
185//        for shifting and combining streams.
186
187//        I think it may be better to focus on the ReverseKernel adapter, that
188//        handles the reverse operations for both input and output.   This actually
189//        gives more flexibility, because, in a multiblock scenario, we can process
190//        the longest sequence of blocks such that both the beginning and end blocks
191//        have a one bit.   If there are any interior blocks with one bits, then
192//        they will be handled automatically without special shifting and masking.
193
194//        By the way, in my designs, I am wanting to have a callable Multiblock
195//        function, so that the Multiblock function for a Reversed Kernel just
196//        does a little work before calling the Multiblock function of the base kernel.
197//        That seems to have disappeared in the current system.
198
199
200        /** KERNEL ATTRIBUTES **/
201
202        SelectMinimumInputLength, /// NOT DONE
203
204        // If a kernel has multiple input streams and their final item count differs,
205        // a MultiBlock kernel will select the *minimum* input item count as it's
206        // principle item length and truncate the streams to fit
207
208        // NOTE: this is the default if a kernel does not have SelectMaximumInputLength
209        // set and no PrincipalInputStream was declared.
210
211        SelectMaximumInputLength, /// NOT DONE
212
213        // If a kernel has multiple input streams and their final item count differs,
214        // a MultiBlock kernel will select the *maximum* input item count as it's
215        // principle item length and zero-extend the streams accordingly.
216
217        CanTerminateEarly,
218
219        // Indicates that this kernel can call setTerminationSignal() to terminate the
220        // kernel prior to processing all of its input streams.
221
222        MustExplicitlyTerminate,
223
224        // This kernel terminates *only* after the programmer calls setTerminationSignal()
225        // and will be called even when there is no new input data when the prior kernels
226        // in the pipeline have also terminated.
227
228    };
229
230    KindId getKind() const {
231        return mKind;
232    }
233
234    bool isAdd() const {
235        return mKind == KindId::Add;
236    }
237
238    bool isPrincipal() const {
239        return mKind == KindId::Principal;
240    }
241
242    bool isRoundUpTo() const {
243        return mKind == KindId::RoundUpTo;
244    }
245
246    bool isBlockSize() const {
247        return mKind == KindId::BlockSize;
248    }
249
250    unsigned amount() const {
251        return mAmount;
252    }
253
254    void setAmount(const unsigned amount) {
255        mAmount = amount;
256    }
257
258    bool operator == (const Attribute & other) const {
259        return mKind == other.mKind && mAmount == other.mAmount;
260    }
261
262    bool operator != (const Attribute & other) const {
263        return !(*this == other);
264    }
265
266protected:
267
268    friend struct AttributeSet;
269    friend struct Binding;
270    friend Attribute Add1();
271    friend Attribute Principal();
272    friend Attribute AlwaysConsume();
273    friend Attribute RoundUpTo(const unsigned);
274    friend Attribute LookAhead(const unsigned);
275    friend Attribute LookBehind(const unsigned);
276    friend Attribute Deferred();
277    friend Attribute Misaligned();
278    friend Attribute ConditionalRegionBegin();
279    friend Attribute ConditionalRegionEnd();
280    friend Attribute Swizzled();
281    friend Attribute ConstantStrideLengthOne();
282    friend Attribute CanTerminateEarly();
283    friend Attribute MustExplicitlyTerminate();
284
285    Attribute(const KindId kind, const unsigned k) : mKind(kind), mAmount(k) { }
286
287private:
288
289    const KindId    mKind;
290    unsigned        mAmount;
291};
292
293struct AttributeSet : public std::vector<Attribute> {
294
295    using AttributeId = Attribute::KindId;
296
297    const AttributeSet & getAttributes() const {
298        return *this;
299    }
300
301    Attribute & findOrAddAttribute(const AttributeId id) {
302        if (Attribute * const attr = __findAttribute(id)) {
303            return *attr;
304        } else {
305            return addAttribute(Attribute(id, 0));
306        }
307    }
308
309    Attribute & findAttribute(const AttributeId id) const {
310        return *__findAttribute(id);
311    }
312
313    Attribute & addAttribute(Attribute attribute);
314
315    bool hasAttributes() const {
316        return !empty();
317    }
318
319    bool hasAttribute(const AttributeId id) const {
320        return __findAttribute(id) != nullptr;
321    }
322
323    AttributeSet() = default;
324
325    AttributeSet(Attribute && attr) { emplace_back(std::move(attr)); }
326
327    AttributeSet(std::initializer_list<Attribute> attrs) : std::vector<Attribute>(attrs) { }
328
329private:
330
331    Attribute * __findAttribute(const AttributeId id) const;
332
333};
334
335
336inline Attribute Add1() {
337    return Attribute(Attribute::KindId::Add, 1);
338}
339
340inline Attribute RoundUpTo(const unsigned k) {
341    return Attribute(Attribute::KindId::RoundUpTo, k);
342}
343
344inline Attribute AlwaysConsume() {
345    return Attribute(Attribute::KindId::AlwaysConsume, 0);
346}
347
348inline Attribute Principal() {
349    return Attribute(Attribute::KindId::Principal, 0);
350}
351
352inline Attribute LookAhead(const unsigned k) {
353    return Attribute(Attribute::KindId::LookAhead, k);
354}
355
356inline Attribute LookBehind(const unsigned k) {
357    return Attribute(Attribute::KindId::LookBehind, k);
358}
359
360inline Attribute Deferred() {
361    return Attribute(Attribute::KindId::Deferred, 0);
362}
363
364inline Attribute Misaligned() {
365    return Attribute(Attribute::KindId::Misaligned, 0);
366}
367
368inline Attribute ConditionalRegionBegin() {
369    return Attribute(Attribute::KindId::ConditionalRegionBegin, 0);
370}
371
372inline Attribute ConditionalRegionEnd() {
373    return Attribute(Attribute::KindId::ConditionalRegionEnd, 0);
374}
375
376inline Attribute CanTerminateEarly() {
377    return Attribute(Attribute::KindId::CanTerminateEarly, 0);
378}
379
380inline Attribute MustExplicitlyTerminate() {
381    return Attribute(Attribute::KindId::MustExplicitlyTerminate, 0);
382}
383
384inline Attribute Swizzled() {
385    return Attribute(Attribute::KindId::Swizzled, 0);
386}
387
388inline Attribute ConstantStrideLengthOne() {
389    return Attribute(Attribute::KindId::ConstantStrideLengthOne, 0);
390}
391
392
393
394}
395
396#endif // ATTRIBUTES_H
Note: See TracBrowser for help on using the repository browser.