Ignore:
Timestamp:
Mar 5, 2016, 8:35:53 AM (3 years ago)
Author:
cameron
Message:

Merge in 512-bit hsimd_signmask

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_avx_builder.cpp

    r4955 r4956  
    1414
    1515Value * IDISA_AVX_Builder::hsimd_signmask(unsigned fw, Value * a) {
    16     if (fw == 64) {
    17         Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_pd_256);
    18         Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
    19         Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
    20         Value * mask = CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
    21         return mask;
     16    if (mBitBlockWidth == 256) {
     17        if (fw == 64) {
     18            Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_pd_256);
     19            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
     20            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
     21            Value * mask = CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
     22            return mask;
     23        }
     24        else if (fw == 32) {
     25            Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
     26            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
     27            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
     28            Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
     29            return mask;
     30        }
    2231    }
    23     else if (fw == 32) {
    24         Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
    25         Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
    26         Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
    27         Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
    28         return mask;
     32    else if (mBitBlockWidth == 512) {
     33        if (fw == 64) {
     34            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
     35            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
     36            std::vector<Constant*> Idxs;
     37            for (unsigned i = 0; i < 8; i++) {
     38                Idxs.push_back(getInt32(2*i+1));
     39            }
     40            Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get(Idxs));
     41            Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
     42            Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
     43            Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
     44            Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({pack_as_ps}));
     45            return mask;
     46        }
    2947    }
    3048    Value * mask = CreateICmpSLT(fwCast(fw, a), ConstantAggregateZero::get(fwVectorType(fw)));
Note: See TracChangeset for help on using the changeset viewer.