Changeset 6063


Ignore:
Timestamp:
May 31, 2018, 4:36:43 PM (5 months ago)
Author:
cameron
Message:

bit interleave (esimd_merge<1>) for AVX2

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6058 r6063  
    100100
    101101Value * IDISA_AVX2_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
     102    if ((fw == 1) && (mBitBlockWidth == 256)) {
     103        // Bit interleave using shuffle.   
     104        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx2_pshuf_b);
     105        // Make a shuffle table that translates the lower 4 bits of each byte in
     106        // order to spread out the bits: xxxxdcba => .d.c.b.a
     107        // We use two copies of the table for the AVX2 _mm256_shuffle_epi8
     108        Constant * bit_interleave[32];
     109        for (unsigned i = 0; i < 32; i++) {
     110            bit_interleave[i] = getInt8((i & 1) | ((i & 2) << 1) | ((i & 4) << 2) | ((i & 8) << 3));
     111        }
     112        Constant * interleave_table = ConstantVector::get({bit_interleave, 32});
     113        // Merge the bytes.
     114        Value * byte_merge = esimd_mergeh(8, a, b);
     115        Value * low_bits = CreateCall(shufFn, {interleave_table,  fwCast(8, simd_and(byte_merge, simd_lomask(8)))});
     116        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), 1);
     117        // For each 16-bit field, interleave the low bits of the two bytes.
     118        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 7));
     119        // For each 16-bit field, interleave the high bits of the two bytes.
     120        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 7));
     121        return simd_or(low_bits, high_bits);
     122    }
    102123#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(6, 0, 0)
    103124    if ((fw == 128) && (mBitBlockWidth == 256)) {
     
    111132
    112133Value * IDISA_AVX2_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) {
     134    if ((fw == 1) && (mBitBlockWidth == 256)) {
     135        // Bit interleave using shuffle.   
     136        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx2_pshuf_b);
     137        // Make a shuffle table that translates the lower 4 bits of each byte in
     138        // order to spread out the bits: xxxxdcba => .d.c.b.a
     139        // We use two copies of the table for the AVX2 _mm256_shuffle_epi8
     140        Constant * bit_interleave[32];
     141        for (unsigned i = 0; i < 32; i++) {
     142            bit_interleave[i] = getInt8((i & 1) | ((i & 2) << 1) | ((i & 4) << 2) | ((i & 8) << 3));
     143        }
     144        Constant * interleave_table = ConstantVector::get({bit_interleave, 32});
     145        // Merge the bytes.
     146        Value * byte_merge = esimd_mergel(8, a, b);
     147        Value * low_bits = CreateCall(shufFn, {interleave_table,  fwCast(8, simd_and(byte_merge, simd_lomask(8)))});
     148        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), 1);
     149        // For each 16-bit field, interleave the low bits of the two bytes.
     150        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 7));
     151        // For each 16-bit field, interleave the high bits of the two bytes.
     152        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 7));
     153        return simd_or(low_bits, high_bits);
     154    }
    113155#if LLVM_VERSION_INTEGER < LLVM_VERSION_CODE(6, 0, 0)
    114156    if ((fw == 128) && (mBitBlockWidth == 256)) {
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r6059 r6063  
    138138
    139139Value * IDISA_Builder::simd_ugt(unsigned fw, Value * a, Value * b) {
     140    if (fw == 1) return simd_and(a, simd_not(b));
    140141    if (fw < 8) report_fatal_error("Unsupported field width: ugt " + std::to_string(fw));
    141142    return CreateSExt(CreateICmpUGT(fwCast(fw, a), fwCast(fw, b)), fwVectorType(fw));
Note: See TracChangeset for help on using the changeset viewer.