Ignore:
Timestamp:
Feb 10, 2017, 1:46:17 PM (2 years ago)
Author:
nmedfort
Message:

Replaced short vector construction in IDISA_Builder with stack allocated arrays.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r5260 r5309  
    66
    77#include "idisa_sse_builder.h"
    8 #include <llvm/IR/IRBuilder.h>
    9 #include <llvm/IR/Constants.h>
    10 #include <llvm/IR/Intrinsics.h>
    11 #include <llvm/IR/Function.h>
    12 #include <llvm/IR/Module.h>
    138
    149namespace IDISA {
    15 
    1610
    1711Value * IDISA_SSE2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
     
    4135            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
    4236            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
    43             Value * mask = CreateCall(signmask_f64func, a_as_pd);
    44             return mask;
     37            return CreateCall(signmask_f64func, a_as_pd);
    4538        }
    4639        if (fw == 8) {
    4740            Value * pmovmskb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_pmovmskb_128);
    48             Value * mask = CreateCall(pmovmskb_func, fwCast(8, a));
    49             return mask;
     41            return CreateCall(pmovmskb_func, fwCast(8, a));
    5042        }
    5143    }
    52     const unsigned fieldCount = mBitBlockWidth / fw;
     44    const auto fieldCount = mBitBlockWidth / fw;
    5345    if ((fieldCount > 4) && (fieldCount <= 16)) {
    5446        Value * pmovmskb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_pmovmskb_128);
    55         int fieldBytes = fw/8;
     47        int fieldBytes = fw / 8;
    5648        int hiByte = fieldBytes - 1;
    57         std::vector<Constant*> Idxs;
     49        Constant * Idxs[16];
    5850        for (unsigned i = 0; i < fieldCount; i++) {
    59             Idxs.push_back(getInt32(fieldBytes*i+hiByte));
     51            Idxs[i] = getInt32(fieldBytes * i + hiByte);
    6052        }
    6153        for (unsigned i = fieldCount; i < 16; i++) {
    62             Idxs.push_back(getInt32(mBitBlockWidth/8));
     54            Idxs[i] = getInt32(mBitBlockWidth / 8);
    6355        }
    64         Value * packh = CreateShuffleVector(fwCast(8, a), fwCast(8, allZeroes()), ConstantVector::get(Idxs));
    65         Value * mask = CreateCall(pmovmskb_func, packh);
    66         return mask;
     56        Value * packh = CreateShuffleVector(fwCast(8, a), fwCast(8, allZeroes()), ConstantVector::get({Idxs, 16}));
     57        return CreateCall(pmovmskb_func, packh);
    6758    }
    6859    // Otherwise use default SSE logic.
     
    7970            return CreateCall(signmask_f32func, a_as_ps);
    8071        }
    81     }
    82     else if ((fw == 64) && (mBitBlockWidth == 256)) {
     72    } else if ((fw == 64) && (mBitBlockWidth == 256)) {
    8373        Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
    8474        Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
    85         std::vector<Constant*> Idxs;
    86         for (unsigned i = 0; i < mBitBlockWidth/fw; i++) {
    87             Idxs.push_back(getInt32(2*i+1));
     75        Constant * Idxs[4];
     76        for (unsigned i = 0; i < 4; i++) {
     77            Idxs[i] = getInt32(2 * i + 1);
    8878        }
    89         Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get(Idxs));
     79        Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get({Idxs, 4}));
    9080        Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
    9181        Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
Note: See TracChangeset for help on using the changeset viewer.