source: icGREP/icgrep-devel/icgrep/kernels/swizzle.cpp @ 5425

Last change on this file since 5425 was 5335, checked in by cameron, 2 years ago

Recast to bitBlockType if a fieldwidth other than 64 is chosen

File size: 4.0 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "swizzle.h"
7#include <IR_Gen/idisa_builder.h>
8#include <llvm/IR/Value.h>
9
10using namespace llvm;
11
12namespace kernel {
13
14SwizzleGenerator::SwizzleGenerator(IDISA::IDISA_Builder * iBuilder, unsigned bitStreamCount, unsigned outputSets, unsigned inputSets, unsigned fieldWidth)
15    : BlockOrientedKernel(iBuilder, "swizzle" + std::to_string(fieldWidth) + ":" + std::to_string(bitStreamCount), {}, {}, {}, {}, {})
16    , mBitStreamCount(bitStreamCount)
17    , mFieldWidth(fieldWidth)
18    , mSwizzleFactor(iBuilder->getBitBlockWidth() / fieldWidth)
19    , mInputSets(inputSets)
20    , mOutputSets(outputSets) {
21        assert((fieldWidth > 0) && ((fieldWidth & (fieldWidth - 1)) == 0) && "fieldWidth must be a power of 2");
22        assert(fieldWidth < iBuilder->getBitBlockWidth() && "fieldWidth must be less than the block width");
23        assert(mSwizzleFactor > 1 && "fieldWidth must be less than the block width");
24        unsigned inputStreamsPerSet = (bitStreamCount + inputSets - 1)/inputSets;
25        unsigned outputStreamsPerSet = (bitStreamCount + outputSets - 1)/outputSets;
26        // Maybe the following is unnecessary.
27        //assert(inputStreamsPerSet % swizzleFactor == 0 && "input sets must be an exact multiple of the swizzle factor");
28        assert(outputStreamsPerSet % mSwizzleFactor == 0 && "output sets must be an exact multiple of the swizzle factor");
29        for (unsigned i = 0; i < mInputSets; i++) {
30            mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(inputStreamsPerSet, 1), "inputGroup" + std::to_string(i)});
31        }
32        for (unsigned i = 0; i < mOutputSets; i++) {
33            mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(outputStreamsPerSet, 1), "outputGroup" + std::to_string(i)});
34        }
35    }
36
37void SwizzleGenerator::generateDoBlockMethod() {
38       
39    // We may need a few passes depending on the swizzle factor
40    unsigned passes = std::log2(mSwizzleFactor);
41   
42    unsigned swizzleGroups = (mBitStreamCount + mSwizzleFactor - 1)/mSwizzleFactor;
43    unsigned inputStreamsPerSet = (mBitStreamCount + mInputSets - 1)/mInputSets;
44    unsigned outputStreamsPerSet = (mBitStreamCount + mOutputSets - 1)/mOutputSets;
45
46    for (unsigned grp = 0; grp < swizzleGroups; grp++) {
47        // First load all the data.
48        std::vector<Value *> sourceBlocks;       
49        std::vector<Value *> targetBlocks;       
50        for (unsigned i = 0; i < mSwizzleFactor; i++) {
51            unsigned streamNo = grp * mSwizzleFactor + i;
52            if (streamNo < mBitStreamCount) {
53                unsigned inputSetNo = streamNo / inputStreamsPerSet;
54                unsigned j = streamNo % inputStreamsPerSet;
55                sourceBlocks.push_back(loadInputStreamBlock("inputGroup" + std::to_string(inputSetNo), iBuilder->getInt32(j)));
56            }
57            else {
58                // Fill in the remaining logically required streams of the last swizzle group with null values.
59                sourceBlocks.push_back(Constant::getNullValue(iBuilder->getBitBlockType()));
60            }
61        }
62        // Now perform the swizzle passes.
63        for (unsigned p = 0; p < passes; p++) {
64            std::vector<Value *> targetBlocks;
65            for (unsigned i = 0; i < mSwizzleFactor/2; i++) {
66                targetBlocks.push_back(iBuilder->esimd_mergel(mFieldWidth, sourceBlocks[i], sourceBlocks[i+mSwizzleFactor/2]));
67                targetBlocks.push_back(iBuilder->esimd_mergeh(mFieldWidth, sourceBlocks[i], sourceBlocks[i+mSwizzleFactor/2]));
68            }
69            sourceBlocks = targetBlocks;
70        }
71        for (unsigned i = 0; i < mSwizzleFactor; i++) {
72            unsigned streamNo = grp * mSwizzleFactor + i;
73            unsigned outputSetNo = streamNo / outputStreamsPerSet;
74            unsigned j = streamNo % outputStreamsPerSet;
75            storeOutputStreamBlock("outputGroup" + std::to_string(outputSetNo), iBuilder->getInt32(j), iBuilder->bitCast(sourceBlocks[i]));
76        }
77    }
78}
79
80
81}
Note: See TracBrowser for help on using the repository browser.