Ignore:
Timestamp:
Nov 25, 2015, 11:36:18 AM (3 years ago)
Author:
cameron
Message:

Parallel long addition within icgrep improves performance on AVX2

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IDISA/idisa_builder.cpp

    r4879 r4881  
    186186}
    187187
     188#if (BLOCK_SIZE==256)
     189#define SIGNMASK_AVX2
     190#endif
     191
    188192Value * IDISA_Builder::hsimd_signmask(unsigned fw, Value * a) {
     193#ifdef SIGNMASK_AVX2
     194    if (fw == 64) {
     195        Value * signmask_f64func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_pd_256);
     196        Type * bitBlock_f64type = VectorType::get(mLLVMBuilder->getDoubleTy(), mBitBlockWidth/64);
     197        Value * a_as_pd = mLLVMBuilder->CreateBitCast(a, bitBlock_f64type);
     198        Value * mask = mLLVMBuilder->CreateCall(signmask_f64func, std::vector<Value *>({a_as_pd}));
     199        return mask;
     200    }
     201    else if (fw == 32) {
     202        Value * signmask_f32func = Intrinsic::getDeclaration(mMod, Intrinsic::x86_avx_movmsk_ps_256);
     203        Type * bitBlock_f32type = VectorType::get(mLLVMBuilder->getFloatTy(), mBitBlockWidth/32);
     204        Value * a_as_ps = mLLVMBuilder->CreateBitCast(a, bitBlock_f32type);
     205        Value * mask = mLLVMBuilder->CreateCall(signmask_f32func, std::vector<Value *>({a_as_ps}));
     206        return mask;
     207    }
     208#endif
    189209    Value * mask = mLLVMBuilder->CreateICmpSLT(fwCast(fw, a), ConstantAggregateZero::get(fwVectorType(fw)));
    190210    return mLLVMBuilder->CreateBitCast(mask, mLLVMBuilder->getIntNTy(mBitBlockWidth/fw));
     
    194214    Value * aVec = fwCast(fw, a);
    195215    return mLLVMBuilder->CreateExtractElement(aVec, mLLVMBuilder->getInt32(fieldIndex));
     216}
     217
     218Value * IDISA_Builder::mvmd_insert(unsigned fw, Value * blk, Value * elt, unsigned fieldIndex) {
     219    Value * vec = fwCast(fw, blk);
     220    return mLLVMBuilder->CreateInsertElement(vec, elt, mLLVMBuilder->getInt32(fieldIndex));
    196221}
    197222
Note: See TracChangeset for help on using the changeset viewer.