Changeset 4955 for icGREP


Ignore:
Timestamp:
Mar 5, 2016, 8:21:03 AM (3 years ago)
Author:
cameron
Message:

Improved implementation of simd_pack on AVX2

Location:
icGREP/icgrep-devel/icgrep/IDISA
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_avx_builder.cpp

    r4954 r4955  
    1414
    1515Value * IDISA_AVX_Builder::hsimd_signmask(unsigned fw, Value * a) {
    16     if (mBitBlockWidth == 256) {
    17         if (fw == 64) {
    18             Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_pd_256);
    19             Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
    20             Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
    21             Value * mask = CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
    22             return mask;
    23         }
    24         else if (fw == 32) {
    25             Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
    26             Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
    27             Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
    28             Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
    29             return mask;
    30         }
     16    if (fw == 64) {
     17        Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_pd_256);
     18        Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
     19        Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
     20        Value * mask = CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
     21        return mask;
    3122    }
    32     else if (mBitBlockWidth == 512) {
    33         if (fw == 64) {
    34             Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
    35             Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
    36             std::vector<Constant*> Idxs;
    37             for (unsigned i = 0; i < 8; i++) {
    38                 Idxs.push_back(getInt32(2*i+1));
    39             }
    40             Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get(Idxs));
    41             Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
    42             Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
    43             Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
    44             Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({pack_as_ps}));
    45             return mask;
    46         }
     23    else if (fw == 32) {
     24        Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
     25        Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
     26        Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
     27        Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
     28        return mask;
    4729    }
    4830    Value * mask = CreateICmpSLT(fwCast(fw, a), ConstantAggregateZero::get(fwVectorType(fw)));
     
    5032}
    5133   
     34Value * IDISA_AVX2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {
     35    unsigned field_count = 2 * mBitBlockWidth/fw;
     36    Value * aVec = fwCast(fw/2, a);
     37    Value * bVec = fwCast(fw/2, b);
     38    if (fw <= 64) {
     39        std::vector<Constant*> Idxs;
     40        for (unsigned i = 0; i < field_count/4; i++) {
     41            Idxs.push_back(getInt32(2*i + 1));
     42        }
     43        for (unsigned i = 0; i < field_count/4; i++) {
     44            Idxs.push_back(getInt32(2*i));
     45        }
     46        for (unsigned i = 0; i < field_count/4; i++) {
     47            Idxs.push_back(getInt32(field_count/2 + 2*i + 1));
     48        }
     49        for (unsigned i = 0; i < field_count/4; i++) {
     50            Idxs.push_back(getInt32(field_count/2 + 2*i));
     51        }
     52        Value * shufa = CreateShuffleVector(aVec, aVec, ConstantVector::get(Idxs));
     53        Value * shufb = CreateShuffleVector(bVec, bVec, ConstantVector::get(Idxs));
     54        Value * pk = hsimd_packh(128, shufa, shufb);
     55        return pk;
     56    }
     57    else {
     58        std::vector<Constant*> Idxs;
     59        for (unsigned i = 0; i < field_count; i++) {
     60            Idxs.push_back(getInt32(2*i));
     61        }
     62        return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
     63    }
    5264}
     65
     66Value * IDISA_AVX2_Builder::hsimd_packl(unsigned fw, Value * a, Value * b) {
     67    unsigned field_count = 2 * mBitBlockWidth/fw;
     68    Value * aVec = fwCast(fw/2, a);
     69    Value * bVec = fwCast(fw/2, b);
     70    if (fw <= 64) {
     71        std::vector<Constant*> Idxs;
     72        for (unsigned i = 0; i < field_count/4; i++) {
     73            Idxs.push_back(getInt32(2*i + 1));
     74        }
     75        for (unsigned i = 0; i < field_count/4; i++) {
     76            Idxs.push_back(getInt32(2*i));
     77        }
     78        for (unsigned i = 0; i < field_count/4; i++) {
     79            Idxs.push_back(getInt32(field_count/2 + 2*i + 1));
     80        }
     81        for (unsigned i = 0; i < field_count/4; i++) {
     82            Idxs.push_back(getInt32(field_count/2 + 2*i));
     83        }
     84        Value * shufa = CreateShuffleVector(aVec, aVec, ConstantVector::get(Idxs));
     85        Value * shufb = CreateShuffleVector(bVec, bVec, ConstantVector::get(Idxs));
     86        return hsimd_packl(128, shufa, shufb);
     87    }
     88    else {
     89        std::vector<Constant*> Idxs;
     90        for (unsigned i = 0; i < field_count; i++) {
     91            Idxs.push_back(getInt32(2*i+1));
     92        }
     93        return CreateShuffleVector(aVec, bVec, ConstantVector::get(Idxs));
     94    }
     95}
     96}
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_avx_builder.h

    r4903 r4955  
    2828    IDISA_AVX2_Builder(Module * m, Type * bitBlockType) : IDISA_AVX_Builder(m, bitBlockType) {
    2929    }
    30 
     30    Value * hsimd_packh(unsigned fw, Value * a, Value * b) override;
     31    Value * hsimd_packl(unsigned fw, Value * a, Value * b) override;
    3132    ~IDISA_AVX2_Builder() {};
    3233};
Note: See TracChangeset for help on using the changeset viewer.