Ignore:
Timestamp:
May 14, 2015, 11:51:32 AM (4 years ago)
Author:
nmedfort
Message:

Possible fix for 256-bit mode

Location:
icGREP/icgrep-devel/icgrep
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/include/simd-lib/idisa_cpp/idisa_avx2.cpp

    r4151 r4552  
    610610#define avx_general_combine256(x, y) \
    611611    (_mm256_insertf128_si256(_mm256_castsi128_si256(y), x, 1))
     612
     613// Prevents erroneous clang template expansion compile errors. Should not be used unless necessary!
     614#define TEMPLATE_SUBTRACT(x, y) \
     615    (((x) > (y)) ? ((x) - (y)) : (y))
     616
    612617//The total number of operations is 2.0
    613618IDISA_ALWAYS_INLINE bitblock256_t simd_nor(bitblock256_t arg1, bitblock256_t arg2)
     
    11091114template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srli(bitblock256_t arg1)
    11101115{
    1111         return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<(128-sh)>(_mm256_castsi128_si256(avx_select_hi128(arg1)))) : simd256<128>::srli<(sh-128)>(avx_move_hi128_to_lo128(arg1)));
     1116    return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<(128-sh)>(_mm256_castsi128_si256(avx_select_hi128(arg1)))) : simd256<128>::srli<TEMPLATE_SUBTRACT(sh,128)>(avx_move_hi128_to_lo128(arg1)));
    11121117}
    11131118
     
    15811586
    15821587//The total number of operations is 9.5
    1583 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::slli(bitblock256_t arg1)
    1584 {
    1585         return ((sh < 128) ? simd_or(simd256<128>::slli<sh>(arg1), avx_move_lo128_to_hi128(simd256<128>::srli<(128-sh)>(arg1))) : simd256<128>::slli<(sh-128)>(avx_move_lo128_to_hi128(arg1)));
     1588template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::slli(bitblock256_t arg1) {
     1589    return (sh < 128) ? simd_or(simd256<128>::slli<sh>(arg1), avx_move_lo128_to_hi128(simd256<128>::srli<(sh < 128) ? (128 - sh) : 0>(arg1))) : simd256<128>::slli<TEMPLATE_SUBTRACT(sh, 128)>(avx_move_lo128_to_hi128(arg1));
    15861590}
    15871591
     
    20912095template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1)
    20922096{
    2093         return simd_or(simd_and(simd256<64>::himask(), simd256<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd256<64>::srli<sh>(arg1) : simd256<(32)>::srai<(sh-(32))>(simd256<64>::srli<(32)>(arg1))));
     2097    return simd_or(simd_and(simd256<64>::himask(), simd256<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd256<64>::srli<sh>(arg1) : simd256<(32)>::srai<TEMPLATE_SUBTRACT(sh,32)>(simd256<64>::srli<(32)>(arg1))));
    20942098}
    20952099
     
    20972101template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1)
    20982102{
    2099         return simd_or(simd_and(simd256<128>::himask(), simd256<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd256<128>::srli<sh>(arg1) : simd256<(64)>::srai<(sh-(64))>(simd256<128>::srli<(64)>(arg1))));
     2103    return simd_or(simd_and(simd256<128>::himask(), simd256<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd256<128>::srli<sh>(arg1) : simd256<(64)>::srai<TEMPLATE_SUBTRACT(sh,64)>(simd256<128>::srli<(64)>(arg1))));
    21002104}
    21012105
     
    21032107template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1)
    21042108{
    2105         return simd_or(simd_and(simd256<256>::himask(), simd256<(128)>::srai<((sh < (128)) ? sh : (128))>(arg1)), ((sh <= (128)) ? simd256<256>::srli<sh>(arg1) : simd256<(128)>::srai<(sh-(128))>(simd256<256>::srli<(128)>(arg1))));
     2109    return simd_or(simd_and(simd256<256>::himask(), simd256<(128)>::srai<((sh < (128)) ? sh : (128))>(arg1)), ((sh <= (128)) ? simd256<256>::srli<sh>(arg1) : simd256<(128)>::srai<TEMPLATE_SUBTRACT(sh,128)>(simd256<256>::srli<(128)>(arg1))));
    21062110}
    21072111
     
    36733677}
    36743678
     3679#undef TEMPLATE_SUBTRACT
     3680
    36753681#endif
  • icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp

    r4549 r4552  
    10251025    Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE));
    10261026    Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE));
    1027     Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
     1027    Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");
    10281028    result_value = b.CreateBitCast(adv_longint, mBitBlockType);
    10291029   
Note: See TracChangeset for help on using the changeset viewer.