source: icGREP/icgrep-devel/icgrep/kernels/swizzle.h

Last change on this file was 6261, checked in by nmedfort, 6 months ago

Work on OptimizationBranch?; revisited pipeline termination

File size: 2.9 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5#ifndef SWIZZLE_H
6#define SWIZZLE_H
7
8#include "kernel.h"
9namespace IDISA { class IDISA_Builder; }
10
11
12// The SwizzleGenerator class creates a kernel that transforms a set of bit streams into a swizzled form.
13// In swizzled form, one "swizzle field" each from a set of streams are grouped together to be processed
14// as a unit using SIMD operations.   For example, for a swizzle field width of 64 and a block size of 256,
15// 4 streams are swizzled together to be operated on as a group.  The ratio of the block size to the
16// swizzle field size is known as the swizzle factor, in this case 4.
17
18// Swizzled form is convenient for performing sequential operations on parallel sets of streams,
19// such as compression of each swizzle field by known counts, or stitching together known numbers
20// of bits from two different sources.
21
22// Any number of bit streams may be swizzled together.  However, the outputSets are always
23// grouped together in multiples of the swizzle factor.   If the bit stream count is not
24// an exact multiple of the swizzle factor, null streams (all zero bits) are added for
25// each swizzle group.
26
27// The input streams may come from any number of parallel input sets, each of the same size.
28// The number of inputSets defaults to 1.
29
30// The output streams may be separated in a number of output sets.  However, output streams
31// sets must be a multiple of the swizzle factor size.
32
33// For example: consider the following 4 streams (32 bits each)
34// Stream 1:   000pomnl 000000kj 00000ihg 00fedcba
35// Stream 2:   00065432 0000001z 00000yxw 00vutsrq
36// Stream 3:   000POMNL 000000KJ 00000IHG 00FEDCBA
37// Stream 4:   00065432 0000001Z 00000YXW 00VUTSRQ
38//
39// The swizzled output using a field width of 8 produces the following swizzles.
40//
41// Swizzle 1:  00VUTSRQ 00FEDCBA 00vutsrq 00fedcba
42// Swizzle 2:  00000YXW 00000IHG 00000yxw 00000ihg
43// Swizzle 3:  0000001Z 000000KJ 0000001z 000000kj
44// Swizzle 4:  00065432 000POMNL 00065432 000pomnl
45//
46// Now it might be convenient to compress all fields of swizzle 1 by 2, all fields of swizzle 2 by 5
47// and so on.
48//
49namespace kernel {
50
51class SwizzleGenerator : public BlockOrientedKernel {
52public:
53
54    SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> &, const std::vector<StreamSet *> & inputs, const std::vector<StreamSet *> & outputs, const unsigned fieldWidth = sizeof(size_t) * 8);
55
56protected:
57
58    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & b) override;
59
60private:
61    const unsigned mBitStreamCount;
62    const unsigned mFieldWidth;
63};
64
65
66class SwizzleByGather : public BlockOrientedKernel {
67public:
68    SwizzleByGather(const std::unique_ptr<kernel::KernelBuilder> & b);
69
70protected:
71    void generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
72private:
73};
74
75}
76
77#endif
78
Note: See TracBrowser for help on using the repository browser.