Ignore:
Timestamp:
Jun 9, 2018, 1:51:10 PM (11 months ago)
Author:
cameron
Message:

IDISA_SSSE3 builder; other IDISA operations

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_sse_builder.cpp

    r6057 r6076  
    11/*
    2  *  Copyright (c) 2016 International Characters.
     2 *  Copyright (c) 2018 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
     
    1414std::string IDISA_SSE_Builder::getBuilderUniqueName() { return mBitBlockWidth != 128 ? "SSE_" + std::to_string(mBitBlockWidth) : "SSE";}
    1515std::string IDISA_SSE2_Builder::getBuilderUniqueName() { return mBitBlockWidth != 128 ? "SSE2_" + std::to_string(mBitBlockWidth) : "SSE2";}
     16std::string IDISA_SSSE3_Builder::getBuilderUniqueName() { return mBitBlockWidth != 128 ? "SSSE3_" + std::to_string(mBitBlockWidth) : "SSSE3";}
    1617
    1718Value * IDISA_SSE2_Builder::hsimd_packh(unsigned fw, Value * a, Value * b) {   
     
    2122    }
    2223    // Otherwise use default logic.
    23     return IDISA_Builder::hsimd_packh(fw, a, b);
     24    return IDISA_SSE_Builder::hsimd_packh(fw, a, b);
    2425}
    2526
     
    154155}
    155156   
    156 Value * IDISA_SSE2_Builder::mvmd_shuffle(unsigned fw, Value * a, Value * shuffle_table) {
     157Value * IDISA_SSE2_Builder::mvmd_shuffle(unsigned fw, Value * a, Value * index_vector) {
    157158    if ((mBitBlockWidth == 128) && (fw == 64)) {
    158159        // First create a vector with exchanged values of the 2 fields.
     
    165166        Value * xchg_vec = ConstantVector::get({xchg, 2});
    166167        Constant * oneSplat = ConstantVector::getSplat(2, ConstantInt::get(getInt64Ty(), 1));
    167         Value * exchange_mask = simd_eq(fw, simd_and(shuffle_table, oneSplat), xchg_vec);
     168        Value * exchange_mask = simd_eq(fw, simd_and(index_vector, oneSplat), xchg_vec);
    168169        Value * rslt = simd_xor(simd_and(changed, exchange_mask), a);
    169170        return rslt;
    170171    }
    171     return IDISA_Builder::mvmd_shuffle(fw, a, shuffle_table);
     172    return IDISA_SSE_Builder::mvmd_shuffle(fw, a, index_vector);
    172173}
    173174
     
    187188}
    188189
    189 
    190 }
     190Constant * IDISA_SSSE3_Builder::bit_interleave_byteshuffle_table(unsigned fw) {
     191    const unsigned fieldCount = mBitBlockWidth/8;
     192    if (fw > 2) llvm::report_fatal_error("bit_interleave_byteshuffle_table requires fw == 1 or fw == 2");
     193    // Bit interleave using shuffle.
     194    // Make a shuffle table that translates the lower 4 bits of each byte in
     195    // order to spread out the bits: xxxxdcba => .d.c.b.a (fw = 1)
     196    Constant * bit_interleave[fieldCount];
     197    for (unsigned i = 0; i < fieldCount; i++) {
     198        if (fw == 1)
     199            bit_interleave[i] = getInt8((i & 1) | ((i & 2) << 1) | ((i & 4) << 2) | ((i & 8) << 3));
     200        else bit_interleave[i] = getInt8((i & 3) | ((i & 0x0C) << 2));
     201    }
     202    return ConstantVector::get({bit_interleave, fieldCount});
     203}
     204
     205Value * IDISA_SSSE3_Builder::esimd_mergeh(unsigned fw, Value * a, Value * b) {
     206    if ((fw == 1) || (fw == 2)) {
     207        Constant * interleave_table = bit_interleave_byteshuffle_table(fw);
     208        // Merge the bytes.
     209        Value * byte_merge = esimd_mergeh(8, a, b);
     210        Value * low_bits = mvmd_shuffle(8, interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))));
     211        Value * high_bits = simd_slli(16, mvmd_shuffle(8, interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))), fw);
     212        // For each 16-bit field, interleave the low bits of the two bytes.
     213        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
     214        // For each 16-bit field, interleave the high bits of the two bytes.
     215        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
     216        return simd_or(low_bits, high_bits);
     217    }
     218    // Otherwise use default SSE logic.
     219    return IDISA_SSE2_Builder::esimd_mergeh(fw, a, b);
     220}
     221
     222Value * IDISA_SSSE3_Builder::esimd_mergel(unsigned fw, Value * a, Value * b) {
     223    if ((fw == 1) || (fw == 2)) {
     224        Constant * interleave_table = bit_interleave_byteshuffle_table(fw);
     225        // Merge the bytes.
     226        Value * byte_merge = esimd_mergel(8, a, b);
     227        Value * low_bits = mvmd_shuffle(8, interleave_table, fwCast(8, simd_and(byte_merge, simd_lomask(8))));
     228        Value * high_bits = simd_slli(16, mvmd_shuffle(8, interleave_table, fwCast(8, simd_srli(8, byte_merge, 4))), fw);
     229        // For each 16-bit field, interleave the low bits of the two bytes.
     230        low_bits = simd_or(simd_and(low_bits, simd_lomask(16)), simd_srli(16, low_bits, 8-fw));
     231        // For each 16-bit field, interleave the high bits of the two bytes.
     232        high_bits = simd_or(simd_and(high_bits, simd_himask(16)), simd_slli(16, high_bits, 8-fw));
     233        return simd_or(low_bits, high_bits);
     234    }
     235    // Otherwise use default SSE2 logic.
     236    return IDISA_SSE2_Builder::esimd_mergel(fw, a, b);
     237}
     238
     239llvm::Value * IDISA_SSSE3_Builder::mvmd_shuffle(unsigned fw, llvm::Value * data_table, llvm::Value * index_vector) {
     240    if (mBitBlockWidth == 128 && fw > 8) {
     241        // Create a table for shuffling with smaller field widths.
     242        const unsigned fieldCount = mBitBlockWidth/fw;
     243        Constant * idxMask = ConstantVector::getSplat(fieldCount, ConstantInt::get(getIntNTy(fw), fieldCount-1));
     244        Value * idx = simd_and(index_vector, idxMask);
     245        unsigned half_fw = fw/2;
     246        unsigned field_count = mBitBlockWidth/half_fw;
     247        // Build a ConstantVector of alternating 0 and 1 values.
     248        Constant * Idxs[field_count];
     249        for (unsigned int i = 0; i < field_count; i++) {
     250            Idxs[i] = ConstantInt::get(getIntNTy(fw/2), i & 1);
     251        }
     252        Constant * splat01 = ConstantVector::get({Idxs, field_count});
     253       
     254        Value * half_fw_indexes = simd_or(idx, mvmd_slli(half_fw, idx, 1));
     255        half_fw_indexes = simd_add(fw, simd_add(fw, half_fw_indexes, half_fw_indexes), splat01);
     256        Value * rslt = mvmd_shuffle(half_fw, data_table, half_fw_indexes);
     257        return rslt;
     258    }
     259    if (mBitBlockWidth == 128 && fw == 8) {
     260        Value * shuf8Func = Intrinsic::getDeclaration(getModule(), Intrinsic::x86_ssse3_pshuf_b_128);
     261        return CreateCall(shuf8Func, {fwCast(8, data_table), fwCast(8, simd_and(index_vector, simd_lomask(8)))});
     262    }
     263    return IDISA_SSE2_Builder::mvmd_shuffle(fw, data_table, index_vector);
     264}
     265
     266
     267}
Note: See TracChangeset for help on using the changeset viewer.