Changeset 4998


Ignore:
Timestamp:
Mar 31, 2016, 1:00:09 PM (18 months ago)
Author:
cameron
Message:

SSE/SSE2 overrides for hsimd_signmask; support BlockSize?=256 long addition on SSE

Location:
icGREP/icgrep-devel/icgrep/IDISA
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_sse_builder.cpp

    r4996 r4998  
    1717    if ((fw == 16) && (mBitBlockWidth == 128)) {
    1818        Value * packuswb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_packuswb_128);
    19         return CreateCall(packuswb_func, std::vector<Value *>({simd_srli(16, a, 8), simd_srli(16, b, 8)}));
     19        return CreateCall2(packuswb_func, simd_srli(16, a, 8), simd_srli(16, b, 8));
    2020    }
    2121    // Otherwise use default logic.
     
    2727        Value * packuswb_func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_packuswb_128);
    2828        Value * mask = simd_lomask(16);
    29         return CreateCall(packuswb_func, std::vector<Value *>({fwCast(16, simd_and(a, mask)), fwCast(16, simd_and(b, mask))}));
     29        return CreateCall2(packuswb_func, fwCast(16, simd_and(a, mask)), fwCast(16, simd_and(b, mask)));
    3030    }
    3131    // Otherwise use default logic.
     
    3333}
    3434
     35Value * IDISA_SSE2_Builder::hsimd_signmask(unsigned fw, Value * a) {
     36    // SSE2 special case using Intrinsic::x86_sse2_movmsk_pd (fw=32 only)
     37    if (mBitBlockWidth == 128) {
     38        if (fw == 64) {
     39            Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse2_movmsk_pd);
     40            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
     41            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
     42            Value * mask = CreateCall(signmask_f64func, a_as_pd);
     43            return mask;
     44        }
     45    }
     46    // Otherwise use default SSE logic.
     47    return IDISA_SSE_Builder::hsimd_signmask(fw, a);
    3548}
     49
     50Value * IDISA_SSE_Builder::hsimd_signmask(unsigned fw, Value * a) {
     51    // SSE special cases using Intrinsic::x86_sse_movmsk_ps (fw=32 only)
     52    if (fw == 32) {
     53        Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse_movmsk_ps);
     54        Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
     55        Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
     56        if (mBitBlockWidth == 128) {
     57            return CreateCall(signmask_f32func, a_as_ps);
     58        }
     59    }
     60    else if ((fw == 64) && (mBitBlockWidth == 256)) {
     61        Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
     62        Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
     63        std::vector<Constant*> Idxs;
     64        for (unsigned i = 0; i < mBitBlockWidth/fw; i++) {
     65            Idxs.push_back(getInt32(2*i+1));
     66        }
     67        Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get(Idxs));
     68        Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
     69        Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
     70        Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_sse_movmsk_ps);
     71        Value * mask = CreateCall(signmask_f32func, pack_as_ps);
     72        return mask;
     73    }
     74    // Otherwise use default logic.
     75    return IDISA_Builder::hsimd_signmask(fw, a);
     76}
     77   
     78}
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_sse_builder.h

    r4902 r4998  
    2222    IDISA_SSE_Builder(Module * m, Type * bitBlockType) : IDISA_Builder(m, bitBlockType) {
    2323    }
     24    Value * hsimd_signmask(unsigned fw, Value * a) override;
    2425    ~IDISA_SSE_Builder() {};
    2526
     
    3132    IDISA_SSE2_Builder(Module * m, Type * bitBlockType) : IDISA_SSE_Builder(m, bitBlockType) {
    3233    }
     34    Value * hsimd_signmask(unsigned fw, Value * a) override;
    3335    Value * hsimd_packh(unsigned fw, Value * a, Value * b) override;
    3436    Value * hsimd_packl(unsigned fw, Value * a, Value * b) override;
Note: See TracChangeset for help on using the changeset viewer.