source: icGREP/icgrep-devel/icgrep/kernels/swizzle.cpp @ 5513

Last change on this file since 5513 was 5471, checked in by xuedongx, 2 years ago

bug fix

File size: 4.1 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "swizzle.h"
7#include <kernels/kernel_builder.h>
8
9using namespace llvm;
10
11namespace kernel {
12
13SwizzleGenerator::SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned outputSets, unsigned inputSets, unsigned fieldWidth)
14: BlockOrientedKernel("swizzle" + std::to_string(fieldWidth) + ":" + std::to_string(bitStreamCount), {}, {}, {}, {}, {})
15, mBitStreamCount(bitStreamCount)
16, mFieldWidth(fieldWidth)
17, mSwizzleFactor(iBuilder->getBitBlockWidth() / fieldWidth)
18, mInputSets(inputSets)
19, mOutputSets(outputSets) {
20    assert((fieldWidth > 0) && ((fieldWidth & (fieldWidth - 1)) == 0) && "fieldWidth must be a power of 2");
21    assert(fieldWidth < iBuilder->getBitBlockWidth() && "fieldWidth must be less than the block width");
22    assert(mSwizzleFactor > 1 && "fieldWidth must be less than the block width");
23    unsigned inputStreamsPerSet = (bitStreamCount + inputSets - 1)/inputSets;
24    unsigned outputStreamsPerSet = (bitStreamCount + outputSets - 1)/outputSets;
25    // Maybe the following is unnecessary.
26    //assert(inputStreamsPerSet % swizzleFactor == 0 && "input sets must be an exact multiple of the swizzle factor");
27    assert(outputStreamsPerSet % mSwizzleFactor == 0 && "output sets must be an exact multiple of the swizzle factor");
28    for (unsigned i = 0; i < mInputSets; i++) {
29        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(inputStreamsPerSet, 1), "inputGroup" + std::to_string(i)});
30    }
31    for (unsigned i = 0; i < mOutputSets; i++) {
32        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(outputStreamsPerSet, 1), "outputGroup" + std::to_string(i)});
33    }
34}
35
36void SwizzleGenerator::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
37       
38    // We may need a few passes depending on the swizzle factor
39    const unsigned swizzleFactor = mSwizzleFactor;
40    const unsigned passes = std::log2(mSwizzleFactor);
41    const unsigned swizzleGroups = (mBitStreamCount + mSwizzleFactor - 1)/mSwizzleFactor;
42    const unsigned inputStreamsPerSet = (mBitStreamCount + mInputSets - 1)/mInputSets;
43    const unsigned outputStreamsPerSet = (mBitStreamCount + mOutputSets - 1)/mOutputSets;
44
45    Value * sourceBlocks[swizzleFactor];
46    Value * targetBlocks[swizzleFactor];
47
48    for (unsigned grp = 0; grp < swizzleGroups; grp++) {
49        // First load all the data.       
50        for (unsigned i = 0; i < swizzleFactor; i++) {
51            unsigned streamNo = grp * swizzleFactor + i;
52            if (streamNo < mBitStreamCount) {
53                unsigned inputSetNo = streamNo / inputStreamsPerSet;
54                unsigned j = streamNo % inputStreamsPerSet;
55                sourceBlocks[i] = iBuilder->loadInputStreamBlock("inputGroup" + std::to_string(inputSetNo), iBuilder->getInt32(j));
56            } else {
57                // Fill in the remaining logically required streams of the last swizzle group with null values.
58                sourceBlocks[i] = Constant::getNullValue(iBuilder->getBitBlockType());
59            }
60        }
61        // Now perform the swizzle passes.
62        for (unsigned p = 0; p < passes; p++) {
63            for (unsigned i = 0; i < swizzleFactor / 2; i++) {
64                targetBlocks[i * 2] = iBuilder->esimd_mergel(mFieldWidth, sourceBlocks[i], sourceBlocks[i + (swizzleFactor / 2)]);
65                targetBlocks[(i * 2) + 1] = iBuilder->esimd_mergeh(mFieldWidth, sourceBlocks[i], sourceBlocks[i + (swizzleFactor / 2)]);
66            }
67            for (unsigned i = 0; i < swizzleFactor; i++) {
68                sourceBlocks[i] = targetBlocks[i];
69            }
70        }
71        for (unsigned i = 0; i < swizzleFactor; i++) {
72            unsigned streamNo = grp * swizzleFactor + i;
73            unsigned outputSetNo = streamNo / outputStreamsPerSet;
74            unsigned j = streamNo % outputStreamsPerSet;
75            iBuilder->storeOutputStreamBlock("outputGroup" + std::to_string(outputSetNo), iBuilder->getInt32(j), iBuilder->bitCast(sourceBlocks[i]));
76        }
77    }
78}
79
80
81}
Note: See TracBrowser for help on using the repository browser.