Ignore:
Timestamp:
Jun 9, 2018, 1:51:10 PM (11 months ago)
Author:
cameron
Message:

IDISA_SSSE3 builder; other IDISA operations

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6063 r6076  
    387387
    388388   
    389 llvm::Value * IDISA_AVX2_Builder::mvmd_shuffle(unsigned fw, llvm::Value * a, llvm::Value * shuffle_table) {
     389llvm::Value * IDISA_AVX2_Builder::mvmd_shuffle(unsigned fw, llvm::Value * a, llvm::Value * index_vector) {
    390390    if (mBitBlockWidth == 256 && fw > 32) {
     391        const unsigned fieldCount = mBitBlockWidth/fw;
    391392        // Create a table for shuffling with smaller field widths.
     393        Constant * idxMask = ConstantVector::getSplat(fieldCount, ConstantInt::get(getIntNTy(fw), fieldCount-1));
     394        Value * idx = simd_and(index_vector, idxMask);
    392395        unsigned half_fw = fw/2;
    393396        unsigned field_count = mBitBlockWidth/half_fw;
     
    395398        Constant * Idxs[field_count];
    396399        for (unsigned int i = 0; i < field_count; i++) {
    397             Idxs[i] = getInt32(i & 1);
     400            Idxs[i] = ConstantInt::get(getIntNTy(fw/2), i & 1);
    398401        }
    399402        Constant * splat01 = ConstantVector::get({Idxs, field_count});
    400         Value * half_shuffle_table = simd_or(shuffle_table, mvmd_slli(half_fw, shuffle_table, 1));
    401         half_shuffle_table = simd_add(fw, simd_add(fw, half_shuffle_table, half_shuffle_table), splat01);
    402         Value * rslt = mvmd_shuffle(half_fw, a, half_shuffle_table);
     403        Value * half_fw_indexes = simd_or(idx, mvmd_slli(half_fw, idx, 1));
     404        half_fw_indexes = simd_add(fw, simd_add(fw, half_fw_indexes, half_fw_indexes), splat01);
     405        Value * rslt = mvmd_shuffle(half_fw, a, half_fw_indexes);
    403406        return rslt;
    404407    }
    405408    if (mBitBlockWidth == 256 && fw == 32) {
    406409        Value * shuf32Func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx2_permd);
    407         return CreateCall(shuf32Func, {fwCast(32, a), fwCast(32, shuffle_table)});
    408     }
    409     return IDISA_Builder::mvmd_shuffle(fw, a, shuffle_table);
     410        return CreateCall(shuf32Func, {fwCast(32, a), fwCast(32, index_vector)});
     411    }
     412    return IDISA_Builder::mvmd_shuffle(fw, a, index_vector);
    410413}
    411414
     
    580583}
    581584
    582 llvm::Value * IDISA_AVX512F_Builder::mvmd_shuffle(unsigned fw, llvm::Value * a, llvm::Value * shuffle_table) {
     585llvm::Value * IDISA_AVX512F_Builder::mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector) {
    583586    const unsigned fieldCount = mBitBlockWidth/fw;
    584587    if (mBitBlockWidth == 512 && fw == 32) {
    585588        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_vpermt2var_d_512);
    586589        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    587         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, a), mask});
     590        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, data_table), fwCast(fw, data_table), mask});
    588591    }
    589592    if (mBitBlockWidth == 512 && fw == 64) {
    590593        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_vpermt2var_q_512);
    591594        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    592         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, a), mask});
     595        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, data_table), fwCast(fw, data_table), mask});
    593596    }
    594597    if (mBitBlockWidth == 512 && fw == 16 && hostCPUFeatures.hasAVX512BW) {
    595598        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_maskz_vpermt2var_hi_512);
    596599        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    597         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, a), mask});
    598     }
    599     return IDISA_Builder::mvmd_shuffle(fw, a, shuffle_table);
    600 }
    601 
    602 llvm::Value * IDISA_AVX512F_Builder::mvmd_shuffle2(unsigned fw, Value * a, Value * b, llvm::Value * shuffle_table) {
     600        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, data_table), fwCast(fw, data_table), mask});
     601    }
     602    return IDISA_Builder::mvmd_shuffle(fw, data_table, index_vector);
     603}
     604
     605llvm::Value * IDISA_AVX512F_Builder::mvmd_shuffle2(unsigned fw, Value * table0, llvm::Value * table1, llvm::Value * index_vector) {
    603606    const unsigned fieldCount = mBitBlockWidth/fw;
    604607    if (mBitBlockWidth == 512 && fw == 32) {
    605608        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_vpermt2var_d_512);
    606609        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    607         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, b), mask});
     610        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, table0), fwCast(fw, table1), mask});
    608611    }
    609612    if (mBitBlockWidth == 512 && fw == 64) {
    610613        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_mask_vpermt2var_q_512);
    611614        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    612         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, b), mask});
     615        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, table0), fwCast(fw, table1), mask});
    613616    }
    614617    if (mBitBlockWidth == 512 && fw == 16 && hostCPUFeatures.hasAVX512BW) {
    615618        Value * permuteFunc = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_avx512_maskz_vpermt2var_hi_512);
    616619        Constant * mask = ConstantInt::getAllOnesValue(getIntNTy(fieldCount));
    617         return CreateCall(permuteFunc, {fwCast(fw, shuffle_table), fwCast(fw, a), fwCast(fw, b), mask});
    618     }
    619     return IDISA_Builder::mvmd_shuffle2(fw, a, b, shuffle_table);
     620        return CreateCall(permuteFunc, {fwCast(fw, index_vector), fwCast(fw, table0), fwCast(fw, table1), mask});
     621    }
     622    return IDISA_Builder::mvmd_shuffle2(fw, table0, table1, index_vector);
    620623}
    621624
Note: See TracChangeset for help on using the changeset viewer.