Changeset 6100


Ignore:
Timestamp:
Jun 17, 2018, 1:10:11 PM (4 weeks ago)
Author:
cameron
Message:

esimd bit merge optimizations for AVX-512

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6099 r6100  
    765765        Value * low_bits = CreateCall(shufFn, {interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))), zeroByteSplat, mask});
    766766        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4)), zeroByteSplat, mask}), fw);
    767         // For each 16-bit field, interleave the low bits of the two bytes.
    768         low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
    769         // For each 16-bit field, interleave the high bits of the two bytes.
    770         high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
    771         return simd_or(low_bits, high_bits);
     767        Value * lo_move_back = simd_srli(16, low_bits, 8-fw);
     768        Value * hi_move_fwd = simd_slli(16, high_bits, 8-fw);
     769        return simd_or(simd_if(1, simd_himask(16), high_bits, low_bits), simd_or(lo_move_back, hi_move_fwd));
    772770    }
    773771    // Otherwise use default AVX2 logic.
     
    789787        Value * low_bits = CreateCall(shufFn, {interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))), zeroByteSplat, mask});
    790788        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4)), zeroByteSplat, mask}), fw);
    791         // For each 16-bit field, interleave the low bits of the two bytes.
    792         low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
    793         // For each 16-bit field, interleave the high bits of the two bytes.
    794         high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
    795         return simd_or(low_bits, high_bits);
     789        Value * lo_move_back = simd_srli(16, low_bits, 8-fw);
     790        Value * hi_move_fwd = simd_slli(16, high_bits, 8-fw);
     791        return simd_or(simd_if(1, simd_himask(16), high_bits, low_bits), simd_or(lo_move_back, hi_move_fwd));
    796792    }
    797793    // Otherwise use default AVX2 logic.
Note: See TracChangeset for help on using the changeset viewer.