Changeset 6103


Ignore:
Timestamp:
Jun 19, 2018, 1:49:49 PM (3 months ago)
Author:
cameron
Message:

AVX-512 pshufb has a new intrinsic as of LLVM 4.0

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6102 r6103  
    754754    if (hostCPUFeatures.hasAVX512BW && ((fw == 1) || (fw == 2))) {
    755755        // Bit interleave using shuffle.
    756         Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx512_mask_pshuf_b_512);
    757756        // Make a shuffle table that translates the lower 4 bits of each byte in
    758757        // order to spread out the bits: xxxxdcba => .d.c.b.a
     
    761760        // Merge the bytes.
    762761        Value * byte_merge = esimd_mergeh(8, a, b);
     762#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(4, 0, 0)       
     763        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx512_mask_pshuf_b_512);
     764        // Make a shuffle table that translates the lower 4 bits of each byte in
     765        // order to spread out the bits: xxxxdcba => .d.c.b.a
     766        // We use two copies of the table for the AVX2 _mm256_shuffle_epi8
    763767        Value * zeroByteSplat = fwCast(8, allZeroes());
    764768        Constant * mask = ConstantInt::getAllOnesValue(getInt64Ty());
    765769        Value * low_bits = CreateCall(shufFn, {interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))), zeroByteSplat, mask});
    766770        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4)), zeroByteSplat, mask}), fw);
     771#else
     772        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx512_pshuf_b_512);
     773        Value * low_bits = CreateCall(shufFn, {interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8)))});
     774        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), fw);
     775#endif
    767776        Value * lo_move_back = simd_srli(16, low_bits, 8-fw);
    768777        Value * hi_move_fwd = simd_slli(16, high_bits, 8-fw);
     
    804813    if (hostCPUFeatures.hasAVX512BW && ((fw == 1) || (fw == 2))) {
    805814        // Bit interleave using shuffle.
    806         Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx512_mask_pshuf_b_512);
    807815        // Make a shuffle table that translates the lower 4 bits of each byte in
    808816        // order to spread out the bits: xxxxdcba => .d.c.b.a
     
    811819        // Merge the bytes.
    812820        Value * byte_merge = esimd_mergel(8, a, b);
     821       
     822#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(4, 0, 0)       
     823        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx512_mask_pshuf_b_512);
     824        // Make a shuffle table that translates the lower 4 bits of each byte in
     825        // order to spread out the bits: xxxxdcba => .d.c.b.a
     826        // We use two copies of the table for the AVX2 _mm256_shuffle_epi8
    813827        Value * zeroByteSplat = fwCast(8, allZeroes());
    814828        Constant * mask = ConstantInt::getAllOnesValue(getInt64Ty());
    815829        Value * low_bits = CreateCall(shufFn, {interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))), zeroByteSplat, mask});
    816830        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4)), zeroByteSplat, mask}), fw);
     831#else
     832        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx512_pshuf_b_512);
     833        Value * low_bits = CreateCall(shufFn, {interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8)))});
     834        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), fw);
     835#endif
    817836        Value * lo_move_back = simd_srli(16, low_bits, 8-fw);
    818837        Value * hi_move_fwd = simd_slli(16, high_bits, 8-fw);
Note: See TracChangeset for help on using the changeset viewer.