source: icGREP/icgrep-devel/icgrep/kernels/attributes.h @ 5873

Last change on this file since 5873 was 5873, checked in by xwa163, 14 months ago
  1. Implement LZ4MultiplePdepKernel, which can do PDEP for more than one input streamset
  2. Add attributed Swizzled in StreamSet?, copy at least one whole block when doing temporary buffer copy for Swizzled InputStreamSet?
  3. Bug fixing for character_deposit pipeline
  4. Add more test files for character_deposit pipeline
File size: 13.5 KB
Line 
1#ifndef ATTRIBUTES_H
2#define ATTRIBUTES_H
3
4#include <vector>
5
6namespace kernel {
7
8struct Attribute {
9
10    enum class KindId {
11
12        /** INPUT STREAM ATTRIBUTES **/
13
14        LookAhead, /// NOT DONE
15
16        // A LookAhead(n) attribute on an input stream set S declares that the kernel
17        // looks ahead n positions in the input stream.  That is, processing of item
18        // S[i, j] may be defined in terms of S[i, j+n].
19
20        // Guarantee required: the pipeline compiler must ensure that, when
21        // the kernel is called with an available item count of N for the
22        // input stream, that the corresponding input data buffer has
23        // valid items in the buffer up to and including position N+n,
24        // and that the blocks containing the items from N-1 through N+n are
25        // linearly contiguous.  When the final doMultiBlock call is made,
26        // the pipeline compiler must ensure that the n items at lookahead
27        // positions are zero-initialized.
28
29        // (Note: this guarantee anticipates using a single buffer pointer
30        // for both normal stream access and lookahead access.   This avoids
31        // the cost of extra parameters per doMultiBlock call, but requires
32        // that the corresponding circular buffer have a "lookahead extension area"
33        // that holds a copy of the data at the physical start of buffer).
34
35        LookBehind, /// NOT DONE
36
37        // A LookBehind(n) attribute on an input stream S declares that the kernel
38        // requires access to input items up to n positions prior to the current
39        // processed item position.
40
41        // (Notes: this may allow more efficient advances within n (without saving state).
42        // However, a lookbehind extension area prior to the normal buffer base address
43        // is necessary, which must be initially zero-filled.)
44
45        // A LookBehind(n) attribute on an output stream S declares that the kernel
46        // requires access to up to n previously generated output items.
47        // (Example: lz4d lookbehind(65536)).
48
49        Principal,
50
51        // One input stream can be declared as the principal input buffer for a kernel.
52        // If a kernel has a principal input stream, when processing the final stride,
53        // a MultiBlockKernel assumes the item count of the principle is the correct
54        // one and zero extends / truncates all other input streams to match it.
55
56        Deferred,
57
58        // Normally, the processed item count of fixed rate streams is automatically
59        // updated by the MultiBlock kernel. However, some streams behave like Fixed
60        // rate streams (in that they will always eventually process a Fixed amount of
61        // data) but the kernel processes the data in unpredictable chunks. Rather than
62        // declaring those as Unknown or Bounded rates, marking their rate calculation
63        // as Deferred provides the pipeline with a stronger guarantee when it comes to
64        // buffer size calculations.
65
66        IndependentRegionBegin, IndependentRegionEnd, /// NOT DONE
67
68        // Some kernels can divide their processing into concrete non-overlapping regions
69        // between a beginning and ending position. This is a hard guarantee that regardless
70        // of the computations between the start of the stream and the beginning of the first
71        // independent region or between the *beginning* of any two independent regions, A,
72        // B, the calculations that occur prior to the beginning of B do not affect the
73        // calculations after it --- even if A is started at an arbitrary position with a
74        // zeroed-out kernel state.
75
76        // If a kernel K is processed simultaneously by two threads, K_0 and K_1, and K_1 is
77        // waiting K_0 to finish and update it's kernel state for K_1 to resume at, K_1 can
78        // compute what its state will be and begin processing before K_0 is finished. This
79        // requires a the pipeline to intervene and call an optimized "output-less" instance
80        // of the kernel prior to calling B.
81
82        ConditionalRegionBegin, ConditionalRegionEnd, /// NOT DONE
83
84        // Some kernels have clearly demarcated regions in which a MultiBlock kernel will
85        // produce useful outputs for only the inputs within those regions. This attribute
86        // instructs the kernel to "zero-fill" the output of any non-selected regions,
87        // skipping strides entirely whenever possible.
88
89        // If the same regions are also independent, we can avoid the overhead of "masking
90        // out" the input streams. Otherwise a MultiBlock will use temporary buffers for all
91        // uses of the streams and zero out any non-regions from the data.
92
93        /** OUTPUT STREAM ATTRIBUTES **/
94
95        Add,
96
97        // An Add(K) attribute states that K items will be added to this stream after
98        // processing the final block.
99
100        RoundUpTo,
101
102        // A RoundUpTo(k) attribute indicates the final item count of this stream will
103        // be rounded up to the nearest multiple of k
104
105        /** INPUT/OUTPUT STREAM ATTRIBUTES **/
106
107        Misaligned,
108
109        // Assume that we cannot statically compute the alignment of this stream set and
110        // perform any operations accordingly
111
112        BlockSize, /// NOT DONE
113
114        // A BlockSize(K) attribute, where K=2^k for some value of k>=4 declares
115        // that the layout of stream data items within the corresponding input
116        // or output buffer is arranged in blocks of K items each.   In each
117        // block, the data buffer contains K items of the first stream in the
118        // set, followed by K items of the next stream in the set and so on,
119        // up to and including K items of the last stream in the set.
120
121        // (Note: this replaces the concept of swizzling and anticipates that
122        // the pipeline will take on the role of automatically inserting the
123        // swizzling code necessary).
124
125        ReverseRegionBegin, ReverseRegionEnd, /// NOT DONE
126
127        // Conceptually, reversing a stream S is simple: {S_1,...,S_n} -> {S_n,...,S_1}.
128        // However, this means all of the input data must be computed and stored prior to
129        // executing this kernel. In practice, this is unnecessary as in the context of
130        // text parsing, we're almost always searching for the true starting position of
131        // something ambigious after we've found its end position in some prior kernel.
132
133
134        Swizzled,
135
136        // Whether the input streamset is in swizzled form
137
138
139//        Here is a revised definition of SegmentedReverse:
140
141//        Given a stream of data bits S that is considered to be divided into
142//        segments, and a marker stream S having a one bit at the final position
143//        of each segment, the R = SegmentedReverse(S, M) when
144
145//        R_{i} = S_{l + (h - i)}
146//              where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
147//          and where h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
148//          (l and h are the low and high positions of the segment containing i)
149
150//        This is an invertible operation, so we can apply R to a kernel's input
151//        and then to its output to get a SegmentedReverse version of a kernel
152
153//        A kernel which computes segmented reverse is feasible, but seems complex
154//        to implement, and probably too slow.  I have played around with several
155//        ways of tackling it, no good method yet.
156
157//        If there are multiple segments within a block, we could instead use
158//        the following:
159
160//        BlockSegmentedReverse
161
162//        B_{i} = S_{L + (H - i)}
163//             where l = the maximum j such that j <= i and either j = 0 or M_{j-1} = 1
164//                   h = the minimum j such that j >= i and either j = length(S) -  or M_j = 1
165//                   L = l if l div BlockSize < h divBlockSize, otherwise (i div BlockSize) * BlockSize
166//                   H = h if l div BlockSize < h divBlockSize, otherwise L + BlockSize - 1
167
168//        An alternative way of looking at this is to eliminate all but the first
169//        and last marker positions within a block.
170
171//        The main point is that, if we apply B to inputs, perform the kernel
172//        and the apply B to outputs, we get the same result if we applied R
173//        (assuming that the kernel computations do not cross boundaries in M).
174
175//        This will be more efficient to compute, but still involves overhead
176//        for shifting and combining streams.
177
178//        I think it may be better to focus on the ReverseKernel adapter, that
179//        handles the reverse operations for both input and output.   This actually
180//        gives more flexibility, because, in a multiblock scenario, we can process
181//        the longest sequence of blocks such that both the beginning and end blocks
182//        have a one bit.   If there are any interior blocks with one bits, then
183//        they will be handled automatically without special shifting and masking.
184
185//        By the way, in my designs, I am wanting to have a callable Multiblock
186//        function, so that the Multiblock function for a Reversed Kernel just
187//        does a little work before calling the Multiblock function of the base kernel.
188//        That seems to have disappeared in the current system.
189
190
191        /** KERNEL ATTRIBUTES **/
192
193        SelectMinimumInputLength, /// NOT DONE
194
195        // If a kernel has multiple input streams and their final item count differs,
196        // a MultiBlock kernel will select the *minimum* input item count as it's
197        // principle item length and truncate the streams to fit
198
199        // NOTE: this is the default if a kernel does not have SelectMaximumInputLength
200        // set and no PrincipalInputStream was declared.
201
202        SelectMaximumInputLength, /// NOT DONE
203
204        // If a kernel has multiple input streams and their final item count differs,
205        // a MultiBlock kernel will select the *maximum* input item count as it's
206        // principle item length and zero-extend the streams accordingly.
207
208        CanTerminateEarly,
209
210        // Indicates that this kernel can call setTerminationSignal() to terminate the
211        // kernel prior to processing all of its input streams.
212
213    };
214
215    bool isAdd() const {
216        return mKind == KindId::Add;
217    }
218
219    bool isPrincipal() const {
220        return mKind == KindId::Principal;
221    }
222
223    bool isRoundUpTo() const {
224        return mKind == KindId::RoundUpTo;
225    }
226
227    bool isBlockSize() const {
228        return mKind == KindId::BlockSize;
229    }
230
231    unsigned amount() const {
232        return mAmount;
233    }
234
235    void setAmount(const unsigned amount) {
236        mAmount = amount;
237    }
238
239    bool operator == (const Attribute & other) const {
240        return mKind == other.mKind && mAmount == other.mAmount;
241    }
242
243    bool operator != (const Attribute & other) const {
244        return !(*this == other);
245    }
246
247protected:
248
249    KindId getKind() const {
250        return mKind;
251    }
252
253    friend struct AttributeSet;
254    friend struct Binding;
255    friend Attribute Add1();
256    friend Attribute Principal();
257    friend Attribute RoundUpTo(const unsigned);
258    friend Attribute LookAhead(const unsigned);
259    friend Attribute LookBehind(const unsigned);
260    friend Attribute Deferred();
261    friend Attribute Misaligned();
262    friend Attribute ConditionalRegionBegin();
263    friend Attribute ConditionalRegionEnd();
264    friend Attribute Swizzled();
265    friend Attribute CanTerminateEarly();
266
267    Attribute(const KindId kind, const unsigned k) : mKind(kind), mAmount(k) { }
268
269private:
270
271    const KindId    mKind;
272    unsigned        mAmount;
273};
274
275struct AttributeSet : public std::vector<Attribute> {
276
277    using AttributeId = Attribute::KindId;
278
279    const AttributeSet & getAttributes() const {
280        return *this;
281    }
282
283    Attribute & findOrAddAttribute(const AttributeId id) {
284        if (Attribute * const attr = __findAttribute(id)) {
285            return *attr;
286        } else {
287            return addAttribute(Attribute(id, 0));
288        }
289    }
290
291    Attribute & findAttribute(const AttributeId id) const {
292        return *__findAttribute(id);
293    }
294
295    Attribute & addAttribute(Attribute attribute);
296
297    bool hasAttributes() const {
298        return !empty();
299    }
300
301    bool hasAttribute(const AttributeId id) const {
302        return __findAttribute(id) != nullptr;
303    }
304
305    AttributeSet() = default;
306
307    AttributeSet(Attribute && attr) { emplace_back(std::move(attr)); }
308
309    AttributeSet(std::initializer_list<Attribute> attrs) : std::vector<Attribute>(attrs) { }
310
311private:
312
313    Attribute * __findAttribute(const AttributeId id) const;
314
315};
316
317
318inline Attribute Add1() {
319    return Attribute(Attribute::KindId::Add, 1);
320}
321
322inline Attribute RoundUpTo(const unsigned k) {
323    return Attribute(Attribute::KindId::RoundUpTo, k);
324}
325
326inline Attribute Principal() {
327    return Attribute(Attribute::KindId::Principal, 0);
328}
329
330inline Attribute LookAhead(const unsigned k) {
331    return Attribute(Attribute::KindId::LookAhead, k);
332}
333
334inline Attribute LookBehind(const unsigned k) {
335    return Attribute(Attribute::KindId::LookBehind, k);
336}
337
338inline Attribute Deferred() {
339    return Attribute(Attribute::KindId::Deferred, 0);
340}
341
342inline Attribute Misaligned() {
343    return Attribute(Attribute::KindId::Misaligned, 0);
344}
345
346inline Attribute ConditionalRegionBegin() {
347    return Attribute(Attribute::KindId::ConditionalRegionBegin, 0);
348}
349
350inline Attribute ConditionalRegionEnd() {
351    return Attribute(Attribute::KindId::ConditionalRegionEnd, 0);
352}
353
354inline Attribute CanTerminateEarly() {
355    return Attribute(Attribute::KindId::CanTerminateEarly, 0);
356}
357
358inline Attribute Swizzled() {
359    return Attribute(Attribute::KindId::Swizzled, 0);
360}
361
362}
363
364#endif // ATTRIBUTES_H
Note: See TracBrowser for help on using the repository browser.