source: icGREP/icgrep-devel/icgrep/kernels/swizzle.cpp @ 5436

Last change on this file since 5436 was 5436, checked in by nmedfort, 2 years ago

Continued refactoring work. PabloKernel? now abstract base type with a 'generatePabloMethod' hook to generate Pablo code.

File size: 3.9 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 */
5
6#include "swizzle.h"
7#include <kernels/kernel_builder.h>
8
9using namespace llvm;
10
11namespace kernel {
12
13SwizzleGenerator::SwizzleGenerator(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, unsigned bitStreamCount, unsigned outputSets, unsigned inputSets, unsigned fieldWidth)
14: BlockOrientedKernel("swizzle" + std::to_string(fieldWidth) + ":" + std::to_string(bitStreamCount), {}, {}, {}, {}, {})
15, mBitStreamCount(bitStreamCount)
16, mFieldWidth(fieldWidth)
17, mSwizzleFactor(iBuilder->getBitBlockWidth() / fieldWidth)
18, mInputSets(inputSets)
19, mOutputSets(outputSets) {
20    assert((fieldWidth > 0) && ((fieldWidth & (fieldWidth - 1)) == 0) && "fieldWidth must be a power of 2");
21    assert(fieldWidth < iBuilder->getBitBlockWidth() && "fieldWidth must be less than the block width");
22    assert(mSwizzleFactor > 1 && "fieldWidth must be less than the block width");
23    unsigned inputStreamsPerSet = (bitStreamCount + inputSets - 1)/inputSets;
24    unsigned outputStreamsPerSet = (bitStreamCount + outputSets - 1)/outputSets;
25    // Maybe the following is unnecessary.
26    //assert(inputStreamsPerSet % swizzleFactor == 0 && "input sets must be an exact multiple of the swizzle factor");
27    assert(outputStreamsPerSet % mSwizzleFactor == 0 && "output sets must be an exact multiple of the swizzle factor");
28    for (unsigned i = 0; i < mInputSets; i++) {
29        mStreamSetInputs.push_back(Binding{iBuilder->getStreamSetTy(inputStreamsPerSet, 1), "inputGroup" + std::to_string(i)});
30    }
31    for (unsigned i = 0; i < mOutputSets; i++) {
32        mStreamSetOutputs.push_back(Binding{iBuilder->getStreamSetTy(outputStreamsPerSet, 1), "outputGroup" + std::to_string(i)});
33    }
34}
35
36void SwizzleGenerator::generateDoBlockMethod() {
37       
38    // We may need a few passes depending on the swizzle factor
39    unsigned passes = std::log2(mSwizzleFactor);
40   
41    unsigned swizzleGroups = (mBitStreamCount + mSwizzleFactor - 1)/mSwizzleFactor;
42    unsigned inputStreamsPerSet = (mBitStreamCount + mInputSets - 1)/mInputSets;
43    unsigned outputStreamsPerSet = (mBitStreamCount + mOutputSets - 1)/mOutputSets;
44
45    for (unsigned grp = 0; grp < swizzleGroups; grp++) {
46        // First load all the data.
47        std::vector<Value *> sourceBlocks;       
48        std::vector<Value *> targetBlocks;       
49        for (unsigned i = 0; i < mSwizzleFactor; i++) {
50            unsigned streamNo = grp * mSwizzleFactor + i;
51            if (streamNo < mBitStreamCount) {
52                unsigned inputSetNo = streamNo / inputStreamsPerSet;
53                unsigned j = streamNo % inputStreamsPerSet;
54                sourceBlocks.push_back(loadInputStreamBlock("inputGroup" + std::to_string(inputSetNo), iBuilder->getInt32(j)));
55            }
56            else {
57                // Fill in the remaining logically required streams of the last swizzle group with null values.
58                sourceBlocks.push_back(Constant::getNullValue(iBuilder->getBitBlockType()));
59            }
60        }
61        // Now perform the swizzle passes.
62        for (unsigned p = 0; p < passes; p++) {
63            std::vector<Value *> targetBlocks;
64            for (unsigned i = 0; i < mSwizzleFactor/2; i++) {
65                targetBlocks.push_back(iBuilder->esimd_mergel(mFieldWidth, sourceBlocks[i], sourceBlocks[i+mSwizzleFactor/2]));
66                targetBlocks.push_back(iBuilder->esimd_mergeh(mFieldWidth, sourceBlocks[i], sourceBlocks[i+mSwizzleFactor/2]));
67            }
68            sourceBlocks = targetBlocks;
69        }
70        for (unsigned i = 0; i < mSwizzleFactor; i++) {
71            unsigned streamNo = grp * mSwizzleFactor + i;
72            unsigned outputSetNo = streamNo / outputStreamsPerSet;
73            unsigned j = streamNo % outputStreamsPerSet;
74            storeOutputStreamBlock("outputGroup" + std::to_string(outputSetNo), iBuilder->getInt32(j), iBuilder->bitCast(sourceBlocks[i]));
75        }
76    }
77}
78
79
80}
Note: See TracBrowser for help on using the repository browser.