Ignore:
Timestamp:
Mar 3, 2012, 1:37:28 PM (7 years ago)
Author:
cameron
Message:

Updates for AVX, reverted casts

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx.cpp

    r1884 r1953  
    9292{
    9393public:
    94         static IDISA_ALWAYS_INLINE bitblock256_t load_unaligned(const float const* arg1);
     94        static IDISA_ALWAYS_INLINE bitblock256_t load_unaligned(const float* arg1);
    9595        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t srli(bitblock256_t arg1);
    9696        static IDISA_ALWAYS_INLINE void store_aligned(bitblock256_t arg1, float* arg2);
     
    9999        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock256_t arg1);
    100100        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t slli(bitblock256_t arg1);
    101         static IDISA_ALWAYS_INLINE bitblock256_t load_aligned(const float const* arg1);
     101        static IDISA_ALWAYS_INLINE bitblock256_t load_aligned(const float* arg1);
    102102        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock256_t arg1, float* arg2);
    103103};
     
    10661066template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srli(bitblock256_t arg1)
    10671067{
    1068         return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<(128-sh)>(reinterpret_cast<bitblock256_t>(_mm256_castsi128_si256(avx_select_hi128(arg1))))) : simd256<128>::srli<(sh-128)>(avx_move_hi128_to_lo128(arg1)));
     1068        return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<(128-sh)>(((bitblock256_t)(_mm256_castsi128_si256(avx_select_hi128(arg1)))))) : simd256<128>::srli<(sh-128)>(avx_move_hi128_to_lo128(arg1)));
    10691069}
    10701070
     
    15961596template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::constant()
    15971597{
    1598         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val)));
     1598        return ((bitblock256_t)(_mm256_set_epi32((int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val))));
    15991599}
    16001600
     
    16021602template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::constant()
    16031603{
    1604         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val), (int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val)));
     1604        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val), (int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val))));
    16051605}
    16061606
     
    16081608template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::constant()
    16091609{
    1610         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val)));
     1610        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val))));
    16111611}
    16121612
     
    17111711template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask()
    17121712{
    1713         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1)));
     1713        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1))));
    17141714}
    17151715
     
    17171717template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask()
    17181718{
    1719         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1)));
     1719        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1))));
    17201720}
    17211721
     
    17231723template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask()
    17241724{
    1725         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1)));
     1725        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1))));
    17261726}
    17271727
     
    19871987template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::himask()
    19881988{
    1989         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0)));
     1989        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0))));
    19901990}
    19911991
     
    19931993template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::himask()
    19941994{
    1995         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0)));
     1995        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0))));
    19961996}
    19971997
     
    19991999template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::himask()
    20002000{
    2001         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0)));
     2001        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0))));
    20022002}
    20032003
     
    22902290template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<8>::signmask(bitblock256_t arg1)
    22912291{
    2292         return ((reinterpret_cast<uint64_t>(_mm_movemask_epi8(reinterpret_cast<__m128i>(avx_select_hi128(arg1))))<<16)|reinterpret_cast<uint64_t>(_mm_movemask_epi8(reinterpret_cast<__m128i>(avx_select_lo128(arg1)))));
     2292        return ((((uint64_t)(_mm_movemask_epi8(((__m128i)(avx_select_hi128(arg1))))))<<16)|((uint64_t)(_mm_movemask_epi8(((__m128i)(avx_select_lo128(arg1)))))));
    22932293}
    22942294
     
    29572957template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<32>::extract(bitblock256_t arg1)
    29582958{
    2959         return ((pos < 4) ? (reinterpret_cast<uint64_t>(((4294967296ULL)-1))&_mm_extract_epi32(avx_select_lo128(arg1), (int32_t)(pos))) : (reinterpret_cast<uint64_t>(((4294967296ULL)-1))&_mm_extract_epi32(avx_select_hi128(arg1), (int32_t)((pos-4)))));
     2959        return ((pos < 4) ? (((uint64_t)(((4294967296ULL)-1)))&_mm_extract_epi32(avx_select_lo128(arg1), (int32_t)(pos))) : (((uint64_t)(((4294967296ULL)-1)))&_mm_extract_epi32(avx_select_hi128(arg1), (int32_t)((pos-4)))));
    29602960}
    29612961
     
    33093309
    33103310//The total number of operations is 1.0
    3311 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_unaligned(const float const* arg1)
    3312 {
    3313         return _mm256_loadu_ps((float const*)(arg1));
     3311IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_unaligned(const float* arg1)
     3312{
     3313        return _mm256_loadu_ps((float*)(arg1));
    33143314}
    33153315
     
    33233323IDISA_ALWAYS_INLINE void bitblock256::store_aligned(bitblock256_t arg1, float* arg2)
    33243324{
    3325         _mm256_store_ps((float*)(arg1), arg2);
     3325        _mm256_store_ps((float*)(arg2), arg1);
    33263326}
    33273327
     
    33353335IDISA_ALWAYS_INLINE bool bitblock256::all(bitblock256_t arg1)
    33363336{
    3337         return _mm256_testz_si256(reinterpret_cast<__m256i>(simd_not(arg1)), reinterpret_cast<__m256i>(simd256<8>::constant<-1>())) == 1;
     3337        return _mm256_testz_si256(((__m256i)(simd_not(arg1))), ((__m256i)(simd256<8>::constant<-1>()))) == 1;
    33383338}
    33393339
     
    33473347IDISA_ALWAYS_INLINE bool bitblock256::any(bitblock256_t arg1)
    33483348{
    3349         return _mm256_testz_si256(reinterpret_cast<__m256i>(arg1), reinterpret_cast<__m256i>(arg1)) == 0;
    3350 }
    3351 
    3352 //The total number of operations is 1.0
    3353 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_aligned(const float const* arg1)
    3354 {
    3355         return _mm256_load_ps((float const*)(arg1));
     3349        return _mm256_testz_si256(((__m256i)(arg1)), ((__m256i)(arg1))) == 0;
     3350}
     3351
     3352//The total number of operations is 1.0
     3353IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_aligned(const float* arg1)
     3354{
     3355        return _mm256_load_ps((float*)(arg1));
    33563356}
    33573357
     
    33593359IDISA_ALWAYS_INLINE void bitblock256::store_unaligned(bitblock256_t arg1, float* arg2)
    33603360{
    3361         _mm256_storeu_ps((float*)(arg1), arg2);
     3361        _mm256_storeu_ps((float*)(arg2), arg1);
    33623362}
    33633363
Note: See TracChangeset for help on using the changeset viewer.