source: icGREP/icgrep-devel/icgrep/kernels/swizzle.h @ 5335

Last change on this file since 5335 was 5332, checked in by cameron, 2 years ago

Swizzle Generator kernel - iniitial check-in

File size: 2.6 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5#ifndef SWIZZLE_H
6#define SWIZZLE_H
7
8#include "kernel.h"
9namespace IDISA { class IDISA_Builder; }
10
11
12// The SwizzleGenerator class creates a kernel that transforms a set of bit streams into a swizzled form.
13// In swizzled form, one "swizzle field" each from a set of streams are grouped together to be processed
14// as a unit using SIMD operations.   For example, for a swizzle field width of 64 and a block size of 256,
15// 4 streams are swizzled together to be operated on as a group.  The ratio of the block size to the
16// swizzle field size is known as the swizzle factor, in this case 4.
17
18// Swizzled form is convenient for performing sequential operations on parallel sets of streams,
19// such as compression of each swizzle field by known counts, or stitching together known numbers
20// of bits from two different sources.
21
22// Any number of bit streams may be swizzled together.  However, the outputSets are always
23// grouped together in multiples of the swizzle factor.   If the bit stream count is not
24// an exact multiple of the swizzle factor, null streams (all zero bits) are added for
25// each swizzle group.
26
27// The input streams may come from any number of parallel input sets, each of the same size.
28// The number of inputSets defaults to 1.
29
30// The output streams may be separated in a number of output sets.  However, output streams
31// sets must be a multiple of the swizzle factor size.
32
33// For example: consider the following 4 streams (32 bits each)
34// Stream 1:   abcdef00 ghi00000 jk000000 lmnop000
35// Stream 2:   qrstuv00 wxy00000 z1000000 23456000
36// Stream 3:   ABCDEF00 GHI00000 JK000000 LMNOP000
37// Stream 4:   QRSTUV00 WZY00000 Z1000000 23456000
38//
39// The swizzled output using a field width of 8 produces the following swizzles.
40//
41// Swizzle 1:  abcdef00 qrstuv00 ABCDEF00 QRSTUV00
42// Swizzle 2:  ghi00000 wxy00000 GHI00000 WZY00000
43// Swizzle 3:  jk000000 z1000000 JK000000 Z1000000
44// Swizzle 4:  lmnop000 23456000 LMNOP000 23456000
45//
46// Now it might be convenient to all fields of swizzle 1 by 2, all fields of swizzle 2 by 5 and so on.
47//
48namespace kernel {
49
50class SwizzleGenerator : public BlockOrientedKernel {
51public:
52   
53    SwizzleGenerator(IDISA::IDISA_Builder * iBuilder, unsigned bitStreamCount, unsigned outputSets = 1, unsigned inputSets = 1, unsigned fieldWidth = 64);
54   
55protected:
56   
57    void generateDoBlockMethod() override;
58   
59private:
60    const unsigned mBitStreamCount;
61    const unsigned mFieldWidth;
62    const unsigned mSwizzleFactor;
63    const unsigned mInputSets;
64    const unsigned mOutputSets;
65};
66
67}
68   
69#endif
70
Note: See TracBrowser for help on using the repository browser.