Changeset 4552
- Timestamp:
- May 14, 2015, 11:51:32 AM (4 years ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/include/simd-lib/idisa_cpp/idisa_avx2.cpp
r4151 r4552 610 610 #define avx_general_combine256(x, y) \ 611 611 (_mm256_insertf128_si256(_mm256_castsi128_si256(y), x, 1)) 612 613 // Prevents erroneous clang template expansion compile errors. Should not be used unless necessary! 614 #define TEMPLATE_SUBTRACT(x, y) \ 615 (((x) > (y)) ? ((x) - (y)) : (y)) 616 612 617 //The total number of operations is 2.0 613 618 IDISA_ALWAYS_INLINE bitblock256_t simd_nor(bitblock256_t arg1, bitblock256_t arg2) … … 1109 1114 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srli(bitblock256_t arg1) 1110 1115 { 1111 return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<(128-sh)>(_mm256_castsi128_si256(avx_select_hi128(arg1)))) : simd256<128>::srli<(sh-128)>(avx_move_hi128_to_lo128(arg1)));1116 return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<(128-sh)>(_mm256_castsi128_si256(avx_select_hi128(arg1)))) : simd256<128>::srli<TEMPLATE_SUBTRACT(sh,128)>(avx_move_hi128_to_lo128(arg1))); 1112 1117 } 1113 1118 … … 1581 1586 1582 1587 //The total number of operations is 9.5 1583 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::slli(bitblock256_t arg1) 1584 { 1585 return ((sh < 128) ? simd_or(simd256<128>::slli<sh>(arg1), avx_move_lo128_to_hi128(simd256<128>::srli<(128-sh)>(arg1))) : simd256<128>::slli<(sh-128)>(avx_move_lo128_to_hi128(arg1))); 1588 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::slli(bitblock256_t arg1) { 1589 return (sh < 128) ? simd_or(simd256<128>::slli<sh>(arg1), avx_move_lo128_to_hi128(simd256<128>::srli<(sh < 128) ? (128 - sh) : 0>(arg1))) : simd256<128>::slli<TEMPLATE_SUBTRACT(sh, 128)>(avx_move_lo128_to_hi128(arg1)); 1586 1590 } 1587 1591 … … 2091 2095 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1) 2092 2096 { 2093 return simd_or(simd_and(simd256<64>::himask(), simd256<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd256<64>::srli<sh>(arg1) : simd256<(32)>::srai<(sh-(32))>(simd256<64>::srli<(32)>(arg1))));2097 return simd_or(simd_and(simd256<64>::himask(), simd256<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd256<64>::srli<sh>(arg1) : simd256<(32)>::srai<TEMPLATE_SUBTRACT(sh,32)>(simd256<64>::srli<(32)>(arg1)))); 2094 2098 } 2095 2099 … … 2097 2101 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1) 2098 2102 { 2099 return simd_or(simd_and(simd256<128>::himask(), simd256<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd256<128>::srli<sh>(arg1) : simd256<(64)>::srai<(sh-(64))>(simd256<128>::srli<(64)>(arg1))));2103 return simd_or(simd_and(simd256<128>::himask(), simd256<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd256<128>::srli<sh>(arg1) : simd256<(64)>::srai<TEMPLATE_SUBTRACT(sh,64)>(simd256<128>::srli<(64)>(arg1)))); 2100 2104 } 2101 2105 … … 2103 2107 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1) 2104 2108 { 2105 return simd_or(simd_and(simd256<256>::himask(), simd256<(128)>::srai<((sh < (128)) ? sh : (128))>(arg1)), ((sh <= (128)) ? simd256<256>::srli<sh>(arg1) : simd256<(128)>::srai<(sh-(128))>(simd256<256>::srli<(128)>(arg1))));2109 return simd_or(simd_and(simd256<256>::himask(), simd256<(128)>::srai<((sh < (128)) ? sh : (128))>(arg1)), ((sh <= (128)) ? simd256<256>::srli<sh>(arg1) : simd256<(128)>::srai<TEMPLATE_SUBTRACT(sh,128)>(simd256<256>::srli<(128)>(arg1)))); 2106 2110 } 2107 2111 … … 3673 3677 } 3674 3678 3679 #undef TEMPLATE_SUBTRACT 3680 3675 3681 #endif -
icGREP/icgrep-devel/icgrep/pablo/pablo_compiler.cpp
r4549 r4552 1025 1025 Value* advanceq_longint = b.CreateBitCast(genCarryDataLoad(advanceIndex), b.getIntNTy(BLOCK_SIZE)); 1026 1026 Value* strm_longint = b.CreateBitCast(strm_value, b.getIntNTy(BLOCK_SIZE)); 1027 Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, block_shift), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance");1027 Value* adv_longint = b.CreateOr(b.CreateShl(strm_longint, shift_amount), b.CreateLShr(advanceq_longint, BLOCK_SIZE - shift_amount), "advance"); 1028 1028 result_value = b.CreateBitCast(adv_longint, mBitBlockType); 1029 1029
Note: See TracChangeset
for help on using the changeset viewer.