Changeset 2191 for trunk


Ignore:
Timestamp:
May 30, 2012, 11:14:12 AM (7 years ago)
Author:
cameron
Message:

Replace sh-128 with sh&12

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx.cpp

    r2143 r2191  
    1212
    1313#include "immintrin.h"
     14
     15#include "idisa_sse2.cpp"
    1416
    1517typedef __m256 bitblock256_t;
     
    9294{
    9395public:
    94         static IDISA_ALWAYS_INLINE bitblock256_t sll(bitblock256_t arg1, bitblock256_t arg2);
    95         static IDISA_ALWAYS_INLINE bitblock256_t srl(bitblock256_t arg1, bitblock256_t arg2);
    9696        static IDISA_ALWAYS_INLINE bitblock256_t load_unaligned(const bitblock256_t* arg1);
    9797        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t srli(bitblock256_t arg1);
     
    103103        static IDISA_ALWAYS_INLINE bitblock256_t load_aligned(const bitblock256_t* arg1);
    104104        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock256_t arg1, bitblock256_t* arg2);
     105        static IDISA_ALWAYS_INLINE bitblock256_t sll(bitblock256_t arg1, bitblock256_t arg2);
     106        static IDISA_ALWAYS_INLINE bitblock256_t srl(bitblock256_t arg1, bitblock256_t arg2);
    105107};
    106108
     
    110112IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2);
    111113IDISA_ALWAYS_INLINE bitblock256_t simd_or(bitblock256_t arg1, bitblock256_t arg2);
     114IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2);
    112115IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2);
    113 IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2);
    114116template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::max(bitblock256_t arg1, bitblock256_t arg2);
    115117template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::max(bitblock256_t arg1, bitblock256_t arg2);
     
    531533
    532534//Implementation Part
    533 #include "idisa_sse2.cpp"
    534535
    535536#define avx_move_lo128_to_hi128(x) \
     
    578579
    579580//The total number of operations is 1.0
     581IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2)
     582{
     583        return _mm256_and_ps(arg1, arg2);
     584}
     585
     586//The total number of operations is 1.0
    580587IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2)
    581588{
    582589        return _mm256_xor_ps(arg1, arg2);
    583 }
    584 
    585 //The total number of operations is 1.0
    586 IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2)
    587 {
    588         return _mm256_and_ps(arg1, arg2);
    589590}
    590591
     
    10691070template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srli(bitblock256_t arg1)
    10701071{
    1071         return (((sh%8) == 0) ? avx_byte_shift_right(arg1, (sh/8)) : ((sh >= 64) ? simd256<64>::srli<(sh-64)>(avx_byte_shift_right(arg1, 8)) : simd_or(simd256<64>::srli<sh>(arg1), avx_byte_shift_right(simd256<64>::slli<(64-sh)>(arg1), 8))));
     1072        return (((sh%8) == 0) ? avx_byte_shift_right(arg1, (sh/8)) : ((sh >= 64) ? simd256<64>::srli<(sh&63)>(avx_byte_shift_right(arg1, 8)) : simd_or(simd256<64>::srli<sh>(arg1), avx_byte_shift_right(simd256<64>::slli<((128-sh)&63)>(arg1), 8))));
    10721073}
    10731074
     
    10751076template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srli(bitblock256_t arg1)
    10761077{
    1077         return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<(128-sh)>(((bitblock256_t)(_mm256_castsi128_si256(avx_select_hi128(arg1)))))) : simd256<128>::srli<(sh-128)>(avx_move_hi128_to_lo128(arg1)));
     1078        return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<((256-sh)&127)>(((bitblock256_t)(_mm256_castsi128_si256(avx_select_hi128(arg1)))))) : simd256<128>::srli<(sh&127)>(avx_move_hi128_to_lo128(arg1)));
    10781079}
    10791080
     
    13901391template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::slli(bitblock256_t arg1)
    13911392{
    1392         return (((sh%8) == 0) ? avx_byte_shift_left(arg1, (sh/8)) : ((sh >= 64) ? simd256<64>::slli<(sh-64)>(avx_byte_shift_left(arg1, 8)) : simd_or(simd256<64>::slli<sh>(arg1), avx_byte_shift_left(simd256<64>::srli<(64-sh)>(arg1), 8))));
     1393        return (((sh%8) == 0) ? avx_byte_shift_left(arg1, (sh/8)) : ((sh >= 64) ? simd256<64>::slli<(sh&63)>(avx_byte_shift_left(arg1, 8)) : simd_or(simd256<64>::slli<sh>(arg1), avx_byte_shift_left(simd256<64>::srli<((128-sh)&63)>(arg1), 8))));
    13931394}
    13941395
     
    13961397template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::slli(bitblock256_t arg1)
    13971398{
    1398         return ((sh < 128) ? simd_or(simd256<128>::slli<sh>(arg1), avx_move_lo128_to_hi128(simd256<128>::srli<(128-sh)>(arg1))) : simd256<128>::slli<(sh-128)>(avx_move_lo128_to_hi128(arg1)));
     1399        return ((sh < 128) ? simd_or(simd256<128>::slli<sh>(arg1), avx_move_lo128_to_hi128(simd256<128>::srli<((256-sh)&127)>(arg1))) : simd256<128>::slli<(sh&127)>(avx_move_lo128_to_hi128(arg1)));
    13991400}
    14001401
     
    34433444}
    34443445
    3445 
    34463446#endif
Note: See TracChangeset for help on using the changeset viewer.