Changeset 2127 for trunk/lib/idisa_cpp


Ignore:
Timestamp:
May 22, 2012, 9:13:57 AM (7 years ago)
Author:
cameron
Message:

Hand coded bitblock256::srl, sll

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx.cpp

    r2123 r2127  
    9292{
    9393public:
     94        static IDISA_ALWAYS_INLINE bitblock256_t sll(bitblock256_t arg1, bitblock256_t arg2);
     95        static IDISA_ALWAYS_INLINE bitblock256_t srl(bitblock256_t arg1, bitblock256_t arg2);
    9496        static IDISA_ALWAYS_INLINE bitblock256_t load_unaligned(const bitblock256_t* arg1);
    9597        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t srli(bitblock256_t arg1);
     
    108110IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2);
    109111IDISA_ALWAYS_INLINE bitblock256_t simd_or(bitblock256_t arg1, bitblock256_t arg2);
     112IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2);
    110113IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2);
    111 IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2);
    112114template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::max(bitblock256_t arg1, bitblock256_t arg2);
    113115template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::max(bitblock256_t arg1, bitblock256_t arg2);
     
    523525
    524526//Implementation Part
     527#include "idisa_sse2.cpp"
    525528
    526529#define avx_move_lo128_to_hi128(x) \
     
    569572
    570573//The total number of operations is 1.0
     574IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2)
     575{
     576        return _mm256_xor_ps(arg1, arg2);
     577}
     578
     579//The total number of operations is 1.0
    571580IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2)
    572581{
    573582        return _mm256_and_ps(arg1, arg2);
    574 }
    575 
    576 //The total number of operations is 1.0
    577 IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2)
    578 {
    579         return _mm256_xor_ps(arg1, arg2);
    580583}
    581584
     
    33623365}
    33633366
     3367IDISA_ALWAYS_INLINE bitblock256_t bitblock256::sll(bitblock256_t r, bitblock256_t shft)
     3368{
     3369        bitblock128_t s = avx_select_lo128(shft);
     3370        bitblock128_t c128 = _mm_cvtsi32_si128(128);
     3371        bitblock128_t x = avx_select_lo128(r);
     3372        bitblock128_t y = avx_select_hi128(r);
     3373
     3374        return
     3375        avx_general_combine256(
     3376           _mm_or_si128(
     3377                _mm_or_si128(bitblock128::sll(x, s), bitblock128::sll(y, _mm_sub_epi32(s, c128))),
     3378                bitblock128::srl(y, _mm_sub_epi32(c128, s))),
     3379        bitblock128::sll(y, s));
     3380}
     3381
     3382IDISA_ALWAYS_INLINE bitblock256_t bitblock256::srl(bitblock256_t r, bitblock256_t shft)
     3383{
     3384        bitblock128_t s = avx_select_lo128(shft);
     3385        bitblock128_t c128 = _mm_cvtsi32_si128(128);
     3386        bitblock128_t x = avx_select_lo128(r);
     3387        bitblock128_t y = avx_select_hi128(r);
     3388
     3389        return
     3390        avx_general_combine256(
     3391           bitblock128::srl(x, s),
     3392           _mm_or_si128(
     3393                _mm_or_si128(bitblock128::srl(y, s), bitblock128::srl(x, _mm_sub_epi32(s, c128))),
     3394                bitblock128::sll(x, _mm_sub_epi32(c128, s))));
     3395}
     3396
    33643397#endif
Note: See TracChangeset for help on using the changeset viewer.