source: icGREP/icgrep-devel/icgrep/kernels/swizzle.h @ 5385

Last change on this file since 5385 was 5349, checked in by hongpum, 2 years ago

Minor fix on docs for Swizzle.

File size: 2.7 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5#ifndef SWIZZLE_H
6#define SWIZZLE_H
7
8#include "kernel.h"
9namespace IDISA { class IDISA_Builder; }
10
11
12// The SwizzleGenerator class creates a kernel that transforms a set of bit streams into a swizzled form.
13// In swizzled form, one "swizzle field" each from a set of streams are grouped together to be processed
14// as a unit using SIMD operations.   For example, for a swizzle field width of 64 and a block size of 256,
15// 4 streams are swizzled together to be operated on as a group.  The ratio of the block size to the
16// swizzle field size is known as the swizzle factor, in this case 4.
17
18// Swizzled form is convenient for performing sequential operations on parallel sets of streams,
19// such as compression of each swizzle field by known counts, or stitching together known numbers
20// of bits from two different sources.
21
22// Any number of bit streams may be swizzled together.  However, the outputSets are always
23// grouped together in multiples of the swizzle factor.   If the bit stream count is not
24// an exact multiple of the swizzle factor, null streams (all zero bits) are added for
25// each swizzle group.
26
27// The input streams may come from any number of parallel input sets, each of the same size.
28// The number of inputSets defaults to 1.
29
30// The output streams may be separated in a number of output sets.  However, output streams
31// sets must be a multiple of the swizzle factor size.
32
33// For example: consider the following 4 streams (32 bits each)
34// Stream 1:   abcdef00 ghi00000 jk000000 lmnop000
35// Stream 2:   qrstuv00 wxy00000 z1000000 23456000
36// Stream 3:   ABCDEF00 GHI00000 JK000000 LMNOP000
37// Stream 4:   QRSTUV00 WZY00000 Z1000000 23456000
38//
39// The swizzled output using a field width of 8 produces the following swizzles.
40//
41// Swizzle 1:  abcdef00 qrstuv00 ABCDEF00 QRSTUV00
42// Swizzle 2:  ghi00000 wxy00000 GHI00000 WZY00000
43// Swizzle 3:  jk000000 z1000000 JK000000 Z1000000
44// Swizzle 4:  lmnop000 23456000 LMNOP000 23456000
45//
46// Now it might be convenient to compress all fields of swizzle 1 by 2, all fields of swizzle 2 by 5
47// and so on.
48//
49namespace kernel {
50
51class SwizzleGenerator : public BlockOrientedKernel {
52public:
53   
54    SwizzleGenerator(IDISA::IDISA_Builder * iBuilder, unsigned bitStreamCount, unsigned outputSets = 1, unsigned inputSets = 1, unsigned fieldWidth = 64);
55   
56protected:
57   
58    void generateDoBlockMethod() override;
59   
60private:
61    const unsigned mBitStreamCount;
62    const unsigned mFieldWidth;
63    const unsigned mSwizzleFactor;
64    const unsigned mInputSets;
65    const unsigned mOutputSets;
66};
67
68}
69   
70#endif
71
Note: See TracBrowser for help on using the repository browser.