Changeset 6087


Ignore:
Timestamp:
Jun 13, 2018, 9:51:17 PM (7 days ago)
Author:
cameron
Message:

IDISA builder updates for esimd_mergel/h

Location:
icGREP/icgrep-devel/icgrep/IR_Gen
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_avx_builder.cpp

    r6076 r6087  
    100100
    101101Value * IDISA_AVX2_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
    102     if ((fw == 1) && (mBitBlockWidth == 256)) {
     102    if ((fw == 1) || (fw == 2)) {
    103103        // Bit interleave using shuffle.   
    104104        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx2_pshuf_b);
     
    106106        // order to spread out the bits: xxxxdcba => .d.c.b.a
    107107        // We use two copies of the table for the AVX2 _mm256_shuffle_epi8
    108         Constant * bit_interleave[32];
    109         for (unsigned i = 0; i < 32; i++) {
    110             bit_interleave[i] = getInt8((i & 1) | ((i & 2) << 1) | ((i & 4) << 2) | ((i & 8) << 3));
    111         }
    112         Constant * interleave_table = ConstantVector::get({bit_interleave, 32});
     108        Constant * interleave_table = bit_interleave_byteshuffle_table(fw);
    113109        // Merge the bytes.
    114110        Value * byte_merge = esimd_mergeh(8, a, b);
    115111        Value * low_bits = CreateCall(shufFn, {interleave_table,  fwCast(8, simd_and(byte_merge, simd_lomask(8)))});
    116         Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), 1);
     112        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), fw);
    117113        // For each 16-bit field, interleave the low bits of the two bytes.
    118         low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 7));
     114        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
    119115        // For each 16-bit field, interleave the high bits of the two bytes.
    120         high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 7));
     116        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
    121117        return simd_or(low_bits, high_bits);
    122118    }
     
    132128
    133129Value * IDISA_AVX2_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) {
    134     if ((fw == 1) && (mBitBlockWidth == 256)) {
     130    if ((fw == 1) || (fw == 2)) {
    135131        // Bit interleave using shuffle.   
    136132        Value * shufFn = Intrinsic::getDeclaration(getModule(),  Intrinsic::x86_avx2_pshuf_b);
     
    138134        // order to spread out the bits: xxxxdcba => .d.c.b.a
    139135        // We use two copies of the table for the AVX2 _mm256_shuffle_epi8
    140         Constant * bit_interleave[32];
    141         for (unsigned i = 0; i < 32; i++) {
    142             bit_interleave[i] = getInt8((i & 1) | ((i & 2) << 1) | ((i & 4) << 2) | ((i & 8) << 3));
    143         }
    144         Constant * interleave_table = ConstantVector::get({bit_interleave, 32});
     136        Constant * interleave_table = bit_interleave_byteshuffle_table(fw);
    145137        // Merge the bytes.
    146138        Value * byte_merge = esimd_mergel(8, a, b);
    147139        Value * low_bits = CreateCall(shufFn, {interleave_table,  fwCast(8, simd_and(byte_merge, simd_lomask(8)))});
    148         Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), 1);
     140        Value * high_bits = simd_slli(16, CreateCall(shufFn, {interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))}), fw);
    149141        // For each 16-bit field, interleave the low bits of the two bytes.
    150         low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 7));
     142        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
    151143        // For each 16-bit field, interleave the high bits of the two bytes.
    152         high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 7));
     144        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
    153145        return simd_or(low_bits, high_bits);
    154146    }
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r6083 r6087  
    758758}
    759759
     760
     761Constant * IDISA_Builder::bit_interleave_byteshuffle_table(unsigned fw) {
     762    const unsigned fieldCount = mBitBlockWidth/8;
     763    if (fw > 2) llvm::report_fatal_error("bit_interleave_byteshuffle_table requires fw == 1 or fw == 2");
     764    // Bit interleave using shuffle.
     765    // Make a shuffle table that translates the lower 4 bits of each byte in
     766    // order to spread out the bits: xxxxdcba => .d.c.b.a (fw = 1)
     767    Constant * bit_interleave[fieldCount];
     768    for (unsigned i = 0; i < fieldCount; i++) {
     769        if (fw == 1)
     770            bit_interleave[i] = getInt8((i & 1) | ((i & 2) << 1) | ((i & 4) << 2) | ((i & 8) << 3));
     771        else bit_interleave[i] = getInt8((i & 3) | ((i & 0x0C) << 2));
     772    }
     773    return ConstantVector::get({bit_interleave, fieldCount});
     774}
     775
     776
    760777IDISA_Builder::IDISA_Builder(LLVMContext & C, unsigned vectorWidth, unsigned stride)
    761778: CBuilder(C)
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r6076 r6087  
    201201
    202202protected:
     203    llvm::Constant * bit_interleave_byteshuffle_table(unsigned fw);  // support function for merge using shuffles.
     204
    203205    const unsigned              mBitBlockWidth;
    204206    const unsigned              mStride;
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r6076 r6087  
    188188}
    189189
    190 Constant * IDISA_SSSE3_Builder::bit_interleave_byteshuffle_table(unsigned fw) {
    191     const unsigned fieldCount = mBitBlockWidth/8;
    192     if (fw > 2) llvm::report_fatal_error("bit_interleave_byteshuffle_table requires fw == 1 or fw == 2");
    193     // Bit interleave using shuffle.
    194     // Make a shuffle table that translates the lower 4 bits of each byte in
    195     // order to spread out the bits: xxxxdcba => .d.c.b.a (fw = 1)
    196     Constant * bit_interleave[fieldCount];
    197     for (unsigned i = 0; i < fieldCount; i++) {
    198         if (fw == 1)
    199             bit_interleave[i] = getInt8((i & 1) | ((i & 2) << 1) | ((i & 4) << 2) | ((i & 8) << 3));
    200         else bit_interleave[i] = getInt8((i & 3) | ((i & 0x0C) << 2));
    201     }
    202     return ConstantVector::get({bit_interleave, fieldCount});
    203 }
    204 
    205190Value * IDISA_SSSE3_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
    206191    if ((fw == 1) || (fw == 2)) {
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.h

    r6076 r6087  
    5959    ~IDISA_SSSE3_Builder() {}
    6060
    61 private:
    62     llvm::Constant * bit_interleave_byteshuffle_table(unsigned fw);
    6361};
    6462
Note: See TracChangeset for help on using the changeset viewer.