Changeset 6098


Ignore:
Timestamp:
Jun 16, 2018, 6:52:23 PM (3 months ago)
Author:
cameron
Message:

esimd_mergel/h 1 for AVX-512

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6087 r6098  
    751751}
    752752
     753Value * IDISA_AVX512F_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
     754    if (hostCPUFeatures.hasAVX512BW && ((fw == 1) || (fw == 2))) {
     755        // Bit interleave using shuffle.
     756        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx512_mask_pshuf_b_512);
     757        // Make a shuffle table that translates the lower 4 bits of each byte in
     758        // order to spread out the bits: xxxxdcba => .d.c.b.a
     759        // We use two copies of the table for the AVX2 _mm256_shuffle_epi8
     760        Constant * interleave_table = bit_interleave_byteshuffle_table(fw);
     761        // Merge the bytes.
     762        Value * byte_merge = esimd_mergeh(8, a, b);
     763        Value * zeroByteSplat = fwCast(8, allZeroes());
     764        Constant * mask = ConstantInt::getAllOnesValue(getInt64Ty());
     765        Value * low_bits = CreateCall(shufFn, {interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))), zeroByteSplat, mask});
     766        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4)), zeroByteSplat, mask}), fw);
     767        // For each 16-bit field, interleave the low bits of the two bytes.
     768        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
     769        // For each 16-bit field, interleave the high bits of the two bytes.
     770        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
     771        return simd_or(low_bits, high_bits);
     772    }
     773    // Otherwise use default AVX2 logic.
     774    return IDISA_AVX2_Builder::esimd_mergeh(fw, a, b);
     775}
     776
     777Value * IDISA_AVX512F_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) {
     778    if (hostCPUFeatures.hasAVX512BW && ((fw == 1) || (fw == 2))) {
     779        // Bit interleave using shuffle.
     780        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx512_mask_pshuf_b_512);
     781        // Make a shuffle table that translates the lower 4 bits of each byte in
     782        // order to spread out the bits: xxxxdcba => .d.c.b.a
     783        // We use two copies of the table for the AVX2 _mm256_shuffle_epi8
     784        Constant * interleave_table = bit_interleave_byteshuffle_table(fw);
     785        // Merge the bytes.
     786        Value * byte_merge = esimd_mergel(8, a, b);
     787        Value * zeroByteSplat = fwCast(8, allZeroes());
     788        Constant * mask = ConstantInt::getAllOnesValue(getInt64Ty());
     789        Value * low_bits = CreateCall(shufFn, {interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))), zeroByteSplat, mask});
     790        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4)), zeroByteSplat, mask}), fw);
     791        // For each 16-bit field, interleave the low bits of the two bytes.
     792        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
     793        // For each 16-bit field, interleave the high bits of the two bytes.
     794        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
     795        return simd_or(low_bits, high_bits);
     796    }
     797    // Otherwise use default AVX2 logic.
     798    return IDISA_AVX2_Builder::esimd_mergel(fw, a, b);
     799}
     800
     801
    753802void IDISA_AVX512F_Builder::getAVX512Features() {
    754803    llvm::StringMap<bool> features;
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.h

    r6076 r6098  
    7474    llvm::Value * hsimd_packh(unsigned fw, llvm::Value * a, llvm::Value * b) override;
    7575    llvm::Value * hsimd_packl(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     76    llvm::Value * esimd_mergeh(unsigned fw, llvm::Value * a, llvm::Value * b) override;
     77    llvm::Value * esimd_mergel(unsigned fw, llvm::Value * a, llvm::Value * b) override;
    7678    llvm::Value * esimd_bitspread(unsigned fw, llvm::Value * bitmask) override;
    7779    llvm::Value * simd_popcount(unsigned fw, llvm::Value * a) override;
Note: See TracChangeset for help on using the changeset viewer.