source: icGREP/icgrep-devel/icgrep/kernels/swizzle.cpp @ 6026

Last change on this file since 6026 was 6026, checked in by xwa163, 15 months ago
  1. Implement SwizzledMultiplePDEPkernel with the same logic as new PDEPkernel, remove LZ4MultiplePDEPkernel, improve the performance
  2. Remove some unnecessary include
  3. Add prefix for some kernels
  4. Remove a legacy kernel
File size: 4.2 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "swizzle.h"
7#include <kernels/kernel_builder.h>
8#include <string>
9
10using namespace llvm;
11
12namespace kernel {
13SwizzleGenerator::SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned outputSets, unsigned inputSets, unsigned fieldWidth, std::string prefix)
14: BlockOrientedKernel(prefix + "swizzle" + std::to_string(fieldWidth) + ":" + std::to_string(bitStreamCount) + "_" + std::to_string(outputSets) + "_" + std::to_string(inputSets) , {}, {}, {}, {}, {})
15, mBitStreamCount(bitStreamCount)
16, mFieldWidth(fieldWidth)
17, mSwizzleFactor(iBuilder->getBitBlockWidth() / fieldWidth)
18, mInputSets(inputSets)
19, mOutputSets(outputSets) {
20    assert((fieldWidth > 0) && ((fieldWidth & (fieldWidth - 1)) == 0) && "fieldWidth must be a power of 2");
21    assert(fieldWidth < iBuilder->getBitBlockWidth() && "fieldWidth must be less than the block width");
22    assert(mSwizzleFactor > 1 && "fieldWidth must be less than the block width");
23    unsigned inputStreamsPerSet = (bitStreamCount + inputSets - 1)/inputSets;
24    unsigned outputStreamsPerSet = (bitStreamCount + outputSets - 1)/outputSets;
25    // Maybe the following is unnecessary.
26    //assert(inputStreamsPerSet % swizzleFactor == 0 && "input sets must be an exact multiple of the swizzle factor");
27    assert(outputStreamsPerSet % mSwizzleFactor == 0 && "output sets must be an exact multiple of the swizzle factor");
28    for (unsigned i = 0; i < mInputSets; i++) {
29        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(inputStreamsPerSet, 1), "inputGroup" + std::to_string(i)});
30    }
31    for (unsigned i = 0; i < mOutputSets; i++) {
32        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(outputStreamsPerSet, 1), "outputGroup" + std::to_string(i), FixedRate(1), BlockSize(fieldWidth)});
33    }
34}
35
36void SwizzleGenerator::generateDoBlockMethod(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
37       
38    // We may need a few passes depending on the swizzle factor
39    const unsigned swizzleFactor = mSwizzleFactor;
40    const unsigned passes = std::log2(mSwizzleFactor);
41    const unsigned swizzleGroups = (mBitStreamCount + mSwizzleFactor - 1)/mSwizzleFactor;
42    const unsigned inputStreamsPerSet = (mBitStreamCount + mInputSets - 1)/mInputSets;
43    const unsigned outputStreamsPerSet = (mBitStreamCount + mOutputSets - 1)/mOutputSets;
44
45    Value * sourceBlocks[swizzleFactor];
46    Value * targetBlocks[swizzleFactor];
47
48    for (unsigned grp = 0; grp < swizzleGroups; grp++) {
49        // First load all the data.       
50        for (unsigned i = 0; i < swizzleFactor; i++) {
51            unsigned streamNo = grp * swizzleFactor + i;
52            if (streamNo < mBitStreamCount) {
53                unsigned inputSetNo = streamNo / inputStreamsPerSet;
54                unsigned j = streamNo % inputStreamsPerSet;
55                sourceBlocks[i] = iBuilder->loadInputStreamBlock("inputGroup" + std::to_string(inputSetNo), iBuilder->getInt32(j));
56            } else {
57                // Fill in the remaining logically required streams of the last swizzle group with null values.
58                sourceBlocks[i] = Constant::getNullValue(iBuilder->getBitBlockType());
59            }
60        }
61        // Now perform the swizzle passes.
62        for (unsigned p = 0; p < passes; p++) {
63            for (unsigned i = 0; i < swizzleFactor / 2; i++) {
64                targetBlocks[i * 2] = iBuilder->esimd_mergel(mFieldWidth, sourceBlocks[i], sourceBlocks[i + (swizzleFactor / 2)]);
65                targetBlocks[(i * 2) + 1] = iBuilder->esimd_mergeh(mFieldWidth, sourceBlocks[i], sourceBlocks[i + (swizzleFactor / 2)]);
66            }
67            for (unsigned i = 0; i < swizzleFactor; i++) {
68                sourceBlocks[i] = targetBlocks[i];
69            }
70        }
71        for (unsigned i = 0; i < swizzleFactor; i++) {
72            unsigned streamNo = grp * swizzleFactor + i;
73            unsigned outputSetNo = streamNo / outputStreamsPerSet;
74            unsigned j = streamNo % outputStreamsPerSet;
75            iBuilder->storeOutputStreamBlock("outputGroup" + std::to_string(outputSetNo), iBuilder->getInt32(j), iBuilder->bitCast(sourceBlocks[i]));
76        }
77    }
78}
79
80
81}
Note: See TracBrowser for help on using the repository browser.