Ignore:
Timestamp:
Jun 13, 2018, 9:51:17 PM (10 months ago)
Author:
cameron
Message:

IDISA builder updates for esimd_mergel/h

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6076 r6087  
    100100
    101101Value * IDISA_AVX2_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
    102     if ((fw == 1) && (mBitBlockWidth == 256)) {
     102    if ((fw == 1) || (fw == 2)) {
    103103        // Bit interleave using shuffle.   
    104104        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx2_pshuf_b);
     
    106106        // order to spread out the bits: xxxxdcba => .d.c.b.a
    107107        // We use two copies of the table for the AVX2 _mm256_shuffle_epi8
    108         Constant * bit_interleave[32];
    109         for (unsigned i = 0; i < 32; i++) {
    110             bit_interleave[i] = getInt8((i & 1) | ((i & 2) << 1) | ((i & 4) << 2) | ((i & 8) << 3));
    111         }
    112         Constant * interleave_table = ConstantVector::get({bit_interleave, 32});
     108        Constant * interleave_table = bit_interleave_byteshuffle_table(fw);
    113109        // Merge the bytes.
    114110        Value * byte_merge = esimd_mergeh(8, a, b);
    115111        Value * low_bits = CreateCall(shufFn, {interleave_table,  fwCast(8, simd_and(byte_merge, simd_lomask(8)))});
    116         Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), 1);
     112        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), fw);
    117113        // For each 16-bit field, interleave the low bits of the two bytes.
    118         low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 7));
     114        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
    119115        // For each 16-bit field, interleave the high bits of the two bytes.
    120         high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 7));
     116        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
    121117        return simd_or(low_bits, high_bits);
    122118    }
     
    132128
    133129Value * IDISA_AVX2_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) {
    134     if ((fw == 1) && (mBitBlockWidth == 256)) {
     130    if ((fw == 1) || (fw == 2)) {
    135131        // Bit interleave using shuffle.   
    136132        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx2_pshuf_b);
     
    138134        // order to spread out the bits: xxxxdcba => .d.c.b.a
    139135        // We use two copies of the table for the AVX2 _mm256_shuffle_epi8
    140         Constant * bit_interleave[32];
    141         for (unsigned i = 0; i < 32; i++) {
    142             bit_interleave[i] = getInt8((i & 1) | ((i & 2) << 1) | ((i & 4) << 2) | ((i & 8) << 3));
    143         }
    144         Constant * interleave_table = ConstantVector::get({bit_interleave, 32});
     136        Constant * interleave_table = bit_interleave_byteshuffle_table(fw);
    145137        // Merge the bytes.
    146138        Value * byte_merge = esimd_mergel(8, a, b);
    147139        Value * low_bits = CreateCall(shufFn, {interleave_table,  fwCast(8, simd_and(byte_merge, simd_lomask(8)))});
    148         Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), 1);
     140        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), fw);
    149141        // For each 16-bit field, interleave the low bits of the two bytes.
    150         low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 7));
     142        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
    151143        // For each 16-bit field, interleave the high bits of the two bytes.
    152         high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 7));
     144        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
    153145        return simd_or(low_bits, high_bits);
    154146    }
Note: See TracChangeset for help on using the changeset viewer.