Ignore:
Timestamp:
Feb 10, 2017, 1:46:17 PM (2 years ago)
Author:
nmedfort
Message:

Replaced short vector construction in IDISA_Builder with stack allocated arrays.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r5260 r5309  
    66
    77#include "idisa_avx_builder.h"
    8 #include <llvm/IR/IRBuilder.h>
    9 #include <llvm/IR/Constants.h>
    10 #include <llvm/IR/Intrinsics.h>
    11 #include <llvm/IR/Function.h>
    12 #include <llvm/IR/Module.h>
    138
    149namespace IDISA {
     
    2116            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
    2217            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
    23             Value * mask = CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
    24             return mask;
    25         }
    26         else if (fw == 32) {
     18            return CreateCall(signmask_f64func, a_as_pd);
     19        } else if (fw == 32) {
    2720            Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
    2821            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
    2922            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
    30             Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
    31             return mask;
     23            return CreateCall(signmask_f32func, a_as_ps);
    3224        }
    33     }
    34     else if (mBitBlockWidth == 512) {
     25    } else if (mBitBlockWidth == 512) {
    3526        if (fw == 64) {
    36             Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
     27            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth / 32);
    3728            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
    38             std::vector<Constant*> Idxs;
     29            Constant * indicies[8];
    3930            for (unsigned i = 0; i < 8; i++) {
    40                 Idxs.push_back(getInt32(2*i+1));
     31                indicies[i] = getInt32(2 * i + 1);
    4132            }
    42             Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get(Idxs));
     33            Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get({indicies, 8}));
    4334            Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
    4435            Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
    4536            Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
    46             Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({pack_as_ps}));
    47             return mask;
     37            return CreateCall(signmask_f32func, pack_as_ps);
    4838        }
    4939    }
     
    5343   
    5444Value * IDISA_AVX2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
    55     unsigned field_count = 2 * mBitBlockWidth/fw;
    56     Value * aVec = fwCast(fw/2, a);
    57     Value * bVec = fwCast(fw/2, b);
    58     if (fw <= 64) {
    59         std::vector<Constant*> Idxs;
    60         for (unsigned i = 0; i < field_count/4; i++) {
    61             Idxs.push_back(getInt32(2*i));
     45    if (fw <= 64) {       
     46        Value * aVec = fwCast(fw / 2, a);
     47        Value * bVec = fwCast(fw / 2, b);
     48        const auto field_count = 2 * mBitBlockWidth / fw;
     49        Constant * Idxs[field_count];
     50        const auto H = (field_count / 2);
     51        const auto Q = (field_count / 4);
     52        for (unsigned i = 0; i < Q; i++) {
     53            Idxs[i] = getInt32(2 * i);
     54            Idxs[i + Q] = getInt32((2 * i) + 1);
     55            Idxs[i + H] = getInt32((2 * i) + H);
     56            Idxs[i + H + Q] = getInt32((2 * i) + 1 + H);
    6257        }
    63         for (unsigned i = 0; i < field_count/4; i++) {
    64             Idxs.push_back(getInt32(2*i + 1));
    65         }
    66         for (unsigned i = 0; i < field_count/4; i++) {
    67             Idxs.push_back(getInt32(field_count/2 + 2*i));
    68         }
    69         for (unsigned i = 0; i < field_count/4; i++) {
    70             Idxs.push_back(getInt32(field_count/2 + 2*i + 1));
    71         }
    72         Value * shufa = CreateShuffleVector(aVec, aVec, ConstantVector::get(Idxs));
    73         Value * shufb = CreateShuffleVector(bVec, bVec, ConstantVector::get(Idxs));
    74         return hsimd_packh(mBitBlockWidth/2, shufa, shufb);
     58        Value * shufa = CreateShuffleVector(aVec, aVec, ConstantVector::get({Idxs, field_count}));
     59        Value * shufb = CreateShuffleVector(bVec, bVec, ConstantVector::get({Idxs, field_count}));
     60        return hsimd_packh(mBitBlockWidth / 2, shufa, shufb);
    7561    }
    7662    // Otherwise use default SSE logic.
     
    7965
    8066Value * IDISA_AVX2_Builder::hsimd_packl(unsigned fw, Value * a, Value * b) {
    81     unsigned field_count = 2 * mBitBlockWidth/fw;
    82     Value * aVec = fwCast(fw/2, a);
    83     Value * bVec = fwCast(fw/2, b);
    8467    if (fw <= 64) {
    85         std::vector<Constant*> Idxs;
    86         for (unsigned i = 0; i < field_count/4; i++) {
    87             Idxs.push_back(getInt32(2*i));
     68        Value * aVec = fwCast(fw / 2, a);
     69        Value * bVec = fwCast(fw / 2, b);
     70        const auto field_count = 2 * mBitBlockWidth / fw;
     71        Constant * Idxs[field_count];
     72        const auto H = (field_count / 2);
     73        const auto Q = (field_count / 4);
     74        for (unsigned i = 0; i < Q; i++) {
     75            Idxs[i] = getInt32(2 * i);
     76            Idxs[i + Q] = getInt32((2 * i) + 1);
     77            Idxs[i + H] = getInt32((2 * i) + H);
     78            Idxs[i + H + Q] = getInt32((2 * i) + H + 1);
    8879        }
    89         for (unsigned i = 0; i < field_count/4; i++) {
    90             Idxs.push_back(getInt32(2*i + 1));
    91         }
    92         for (unsigned i = 0; i < field_count/4; i++) {
    93             Idxs.push_back(getInt32(field_count/2 + 2*i));
    94         }
    95         for (unsigned i = 0; i < field_count/4; i++) {
    96             Idxs.push_back(getInt32(field_count/2 + 2*i + 1));
    97         }
    98         Value * shufa = CreateShuffleVector(aVec, aVec, ConstantVector::get(Idxs));
    99         Value * shufb = CreateShuffleVector(bVec, bVec, ConstantVector::get(Idxs));
    100         return hsimd_packl(mBitBlockWidth/2, shufa, shufb);
     80        Value * shufa = CreateShuffleVector(aVec, aVec, ConstantVector::get({Idxs, field_count}));
     81        Value * shufb = CreateShuffleVector(bVec, bVec, ConstantVector::get({Idxs, field_count}));
     82        return hsimd_packl(mBitBlockWidth / 2, shufa, shufb);
    10183    }
    10284    // Otherwise use default SSE logic.
     
    127109        Value * a_low = fwCast(16, simd_and(a, simd_lomask(fw)));
    128110        Value * b_low = fwCast(16, simd_and(b, simd_lomask(fw)));
    129         Value * pack = CreateCall(vpackuswbfunc, {a_low, b_low});
    130         return pack;
     111        return CreateCall(vpackuswbfunc, {a_low, b_low});
    131112    }
    132113    // Otherwise use default SSE logic.
     
    139120        Value * a_low = simd_srli(fw, a, fw/2);
    140121        Value * b_low = simd_srli(fw, b, fw/2);
    141         Value * pack = CreateCall(vpackuswbfunc, {a_low, b_low});
    142         return pack;
     122        return CreateCall(vpackuswbfunc, {a_low, b_low});
    143123    }
    144124    // Otherwise use default SSE logic.
     
    167147        carry_out = bitCast(CreateZExt(carry_out, getIntNTy(mBitBlockWidth)));
    168148    }
    169     return std::pair<Value *, Value *>(carry_out, bitCast(sum));
     149    return std::pair<Value *, Value *>{carry_out, bitCast(sum)};
    170150}
    171151   
Note: See TracChangeset for help on using the changeset viewer.