Changeset 4954 for icGREP


Ignore:
Timestamp:
Mar 3, 2016, 12:49:35 PM (3 years ago)
Author:
cameron
Message:

Support for -BlockSize?=512 on AVX2

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_avx_builder.cpp

    r4903 r4954  
    1414
    1515Value * IDISA_AVX_Builder::hsimd_signmask(unsigned fw, Value * a) {
    16     if (fw == 64) {
    17         Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_pd_256);
    18         Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
    19         Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
    20         Value * mask = CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
    21         return mask;
     16    if (mBitBlockWidth == 256) {
     17        if (fw == 64) {
     18            Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_pd_256);
     19            Type * bitBlock_f64type = VectorType::get(getDoubleTy(), mBitBlockWidth/64);
     20            Value * a_as_pd = CreateBitCast(a, bitBlock_f64type);
     21            Value * mask = CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
     22            return mask;
     23        }
     24        else if (fw == 32) {
     25            Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
     26            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
     27            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
     28            Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
     29            return mask;
     30        }
    2231    }
    23     else if (fw == 32) {
    24         Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
    25         Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
    26         Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
    27         Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
    28         return mask;
     32    else if (mBitBlockWidth == 512) {
     33        if (fw == 64) {
     34            Type * bitBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/32);
     35            Value * a_as_ps = CreateBitCast(a, bitBlock_f32type);
     36            std::vector<Constant*> Idxs;
     37            for (unsigned i = 0; i < 8; i++) {
     38                Idxs.push_back(getInt32(2*i+1));
     39            }
     40            Value * packh = CreateShuffleVector(a_as_ps, UndefValue::get(bitBlock_f32type), ConstantVector::get(Idxs));
     41            Type * halfBlock_f32type = VectorType::get(getFloatTy(), mBitBlockWidth/64);
     42            Value * pack_as_ps = CreateBitCast(packh, halfBlock_f32type);
     43            Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
     44            Value * mask = CreateCall(signmask_f32func, std::vector<Value *>({pack_as_ps}));
     45            return mask;
     46        }
    2947    }
    3048    Value * mask = CreateICmpSLT(fwCast(fw, a), ConstantAggregateZero::get(fwVectorType(fw)));
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r4951 r4954  
    131131        Value * carry_out_strm = iBuilder->simd_or(carrygen, iBuilder->simd_and(carryprop, iBuilder->CreateNot(sum)));
    132132        setCarryOut(localIndex, carry_out_strm);
    133     } else if (mBitBlockWidth == 256) {
     133    } else if (mBitBlockWidth >= 256) {
    134134        // using LONG_ADD
    135135        Value * carryq_value = getCarryIn(localIndex);
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r4952 r4954  
    217217   
    218218    int blockSize = bitBlockType->isIntegerTy() ? cast<IntegerType>(bitBlockType)->getIntegerBitWidth() : cast<VectorType>(bitBlockType)->getBitWidth();
    219     if (blockSize == 256) {
     219    if (blockSize >= 256) {
    220220        if (hasAVX2) {
    221221            return new IDISA::IDISA_AVX2_Builder(mod, bitBlockType);
Note: See TracChangeset for help on using the changeset viewer.