Changeset 1953 for trunk/lib/idisa_cpp


Ignore:
Timestamp:
Mar 3, 2012, 1:37:28 PM (7 years ago)
Author:
cameron
Message:

Updates for AVX, reverted casts

Location:
trunk/lib/idisa_cpp
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx.cpp

    r1884 r1953  
    9292{
    9393public:
    94         static IDISA_ALWAYS_INLINE bitblock256_t load_unaligned(const float const* arg1);
     94        static IDISA_ALWAYS_INLINE bitblock256_t load_unaligned(const float* arg1);
    9595        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t srli(bitblock256_t arg1);
    9696        static IDISA_ALWAYS_INLINE void store_aligned(bitblock256_t arg1, float* arg2);
     
    9999        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock256_t arg1);
    100100        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t slli(bitblock256_t arg1);
    101         static IDISA_ALWAYS_INLINE bitblock256_t load_aligned(const float const* arg1);
     101        static IDISA_ALWAYS_INLINE bitblock256_t load_aligned(const float* arg1);
    102102        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock256_t arg1, float* arg2);
    103103};
     
    10661066template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srli(bitblock256_t arg1)
    10671067{
    1068         return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<(128-sh)>(reinterpret_cast<bitblock256_t>(_mm256_castsi128_si256(avx_select_hi128(arg1))))) : simd256<128>::srli<(sh-128)>(avx_move_hi128_to_lo128(arg1)));
     1068        return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<(128-sh)>(((bitblock256_t)(_mm256_castsi128_si256(avx_select_hi128(arg1)))))) : simd256<128>::srli<(sh-128)>(avx_move_hi128_to_lo128(arg1)));
    10691069}
    10701070
     
    15961596template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::constant()
    15971597{
    1598         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val)));
     1598        return ((bitblock256_t)(_mm256_set_epi32((int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val))));
    15991599}
    16001600
     
    16021602template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::constant()
    16031603{
    1604         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val), (int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val)));
     1604        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val), (int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val))));
    16051605}
    16061606
     
    16081608template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::constant()
    16091609{
    1610         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val)));
     1610        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val))));
    16111611}
    16121612
     
    17111711template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask()
    17121712{
    1713         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1)));
     1713        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1))));
    17141714}
    17151715
     
    17171717template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask()
    17181718{
    1719         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1)));
     1719        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1))));
    17201720}
    17211721
     
    17231723template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask()
    17241724{
    1725         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1)));
     1725        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1))));
    17261726}
    17271727
     
    19871987template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::himask()
    19881988{
    1989         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0)));
     1989        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0))));
    19901990}
    19911991
     
    19931993template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::himask()
    19941994{
    1995         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0)));
     1995        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0))));
    19961996}
    19971997
     
    19991999template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::himask()
    20002000{
    2001         return reinterpret_cast<bitblock256_t>(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0)));
     2001        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0))));
    20022002}
    20032003
     
    22902290template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<8>::signmask(bitblock256_t arg1)
    22912291{
    2292         return ((reinterpret_cast<uint64_t>(_mm_movemask_epi8(reinterpret_cast<__m128i>(avx_select_hi128(arg1))))<<16)|reinterpret_cast<uint64_t>(_mm_movemask_epi8(reinterpret_cast<__m128i>(avx_select_lo128(arg1)))));
     2292        return ((((uint64_t)(_mm_movemask_epi8(((__m128i)(avx_select_hi128(arg1))))))<<16)|((uint64_t)(_mm_movemask_epi8(((__m128i)(avx_select_lo128(arg1)))))));
    22932293}
    22942294
     
    29572957template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<32>::extract(bitblock256_t arg1)
    29582958{
    2959         return ((pos < 4) ? (reinterpret_cast<uint64_t>(((4294967296ULL)-1))&_mm_extract_epi32(avx_select_lo128(arg1), (int32_t)(pos))) : (reinterpret_cast<uint64_t>(((4294967296ULL)-1))&_mm_extract_epi32(avx_select_hi128(arg1), (int32_t)((pos-4)))));
     2959        return ((pos < 4) ? (((uint64_t)(((4294967296ULL)-1)))&_mm_extract_epi32(avx_select_lo128(arg1), (int32_t)(pos))) : (((uint64_t)(((4294967296ULL)-1)))&_mm_extract_epi32(avx_select_hi128(arg1), (int32_t)((pos-4)))));
    29602960}
    29612961
     
    33093309
    33103310//The total number of operations is 1.0
    3311 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_unaligned(const float const* arg1)
    3312 {
    3313         return _mm256_loadu_ps((float const*)(arg1));
     3311IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_unaligned(const float* arg1)
     3312{
     3313        return _mm256_loadu_ps((float*)(arg1));
    33143314}
    33153315
     
    33233323IDISA_ALWAYS_INLINE void bitblock256::store_aligned(bitblock256_t arg1, float* arg2)
    33243324{
    3325         _mm256_store_ps((float*)(arg1), arg2);
     3325        _mm256_store_ps((float*)(arg2), arg1);
    33263326}
    33273327
     
    33353335IDISA_ALWAYS_INLINE bool bitblock256::all(bitblock256_t arg1)
    33363336{
    3337         return _mm256_testz_si256(reinterpret_cast<__m256i>(simd_not(arg1)), reinterpret_cast<__m256i>(simd256<8>::constant<-1>())) == 1;
     3337        return _mm256_testz_si256(((__m256i)(simd_not(arg1))), ((__m256i)(simd256<8>::constant<-1>()))) == 1;
    33383338}
    33393339
     
    33473347IDISA_ALWAYS_INLINE bool bitblock256::any(bitblock256_t arg1)
    33483348{
    3349         return _mm256_testz_si256(reinterpret_cast<__m256i>(arg1), reinterpret_cast<__m256i>(arg1)) == 0;
    3350 }
    3351 
    3352 //The total number of operations is 1.0
    3353 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_aligned(const float const* arg1)
    3354 {
    3355         return _mm256_load_ps((float const*)(arg1));
     3349        return _mm256_testz_si256(((__m256i)(arg1)), ((__m256i)(arg1))) == 0;
     3350}
     3351
     3352//The total number of operations is 1.0
     3353IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_aligned(const float* arg1)
     3354{
     3355        return _mm256_load_ps((float*)(arg1));
    33563356}
    33573357
     
    33593359IDISA_ALWAYS_INLINE void bitblock256::store_unaligned(bitblock256_t arg1, float* arg2)
    33603360{
    3361         _mm256_storeu_ps((float*)(arg1), arg2);
     3361        _mm256_storeu_ps((float*)(arg2), arg1);
    33623362}
    33633363
  • trunk/lib/idisa_cpp/idisa_neon.cpp

    r1884 r1953  
    795795template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srli(bitblock128_t arg1)
    796796{
    797         return ((sh == 0) ? arg1 : reinterpret_cast<bitblock128_t>(vshrq_n_u8((uint8x16_t)(arg1), (int32_t)(sh))));
     797        return ((sh == 0) ? arg1 : ((bitblock128_t)(vshrq_n_u8((uint8x16_t)(arg1), (int32_t)(sh)))));
    798798}
    799799
     
    801801template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srli(bitblock128_t arg1)
    802802{
    803         return ((sh == 0) ? arg1 : reinterpret_cast<bitblock128_t>(vshrq_n_u16((uint16x8_t)(arg1), (int32_t)(sh))));
     803        return ((sh == 0) ? arg1 : ((bitblock128_t)(vshrq_n_u16((uint16x8_t)(arg1), (int32_t)(sh)))));
    804804}
    805805
     
    807807template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srli(bitblock128_t arg1)
    808808{
    809         return ((sh == 0) ? arg1 : reinterpret_cast<bitblock128_t>(vshrq_n_u32((uint32x4_t)(arg1), (int32_t)(sh))));
     809        return ((sh == 0) ? arg1 : ((bitblock128_t)(vshrq_n_u32((uint32x4_t)(arg1), (int32_t)(sh)))));
    810810}
    811811
     
    813813template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srli(bitblock128_t arg1)
    814814{
    815         return ((sh == 0) ? arg1 : reinterpret_cast<bitblock128_t>(vshrq_n_u64((uint64x2_t)(arg1), (int32_t)(sh))));
     815        return ((sh == 0) ? arg1 : ((bitblock128_t)(vshrq_n_u64((uint64x2_t)(arg1), (int32_t)(sh)))));
    816816}
    817817
     
    10791079template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::slli(bitblock128_t arg1)
    10801080{
    1081         return ((sh == 8) ? simd128<32>::constant<0>() : reinterpret_cast<bitblock128_t>(vshlq_n_u8((uint8x16_t)(arg1), (int32_t)(sh))));
     1081        return ((sh == 8) ? simd128<32>::constant<0>() : ((bitblock128_t)(vshlq_n_u8((uint8x16_t)(arg1), (int32_t)(sh)))));
    10821082}
    10831083
     
    10851085template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::slli(bitblock128_t arg1)
    10861086{
    1087         return ((sh == 16) ? simd128<32>::constant<0>() : reinterpret_cast<bitblock128_t>(vshlq_n_u16((uint16x8_t)(arg1), (int32_t)(sh))));
     1087        return ((sh == 16) ? simd128<32>::constant<0>() : ((bitblock128_t)(vshlq_n_u16((uint16x8_t)(arg1), (int32_t)(sh)))));
    10881088}
    10891089
     
    10911091template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::slli(bitblock128_t arg1)
    10921092{
    1093         return ((sh == 32) ? simd128<32>::constant<0>() : reinterpret_cast<bitblock128_t>(vshlq_n_u32((uint32x4_t)(arg1), (int32_t)(sh))));
     1093        return ((sh == 32) ? simd128<32>::constant<0>() : ((bitblock128_t)(vshlq_n_u32((uint32x4_t)(arg1), (int32_t)(sh)))));
    10941094}
    10951095
     
    10971097template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::slli(bitblock128_t arg1)
    10981098{
    1099         return ((sh == 64) ? simd128<32>::constant<0>() : reinterpret_cast<bitblock128_t>(vshlq_n_u64((uint64x2_t)(arg1), (int32_t)(sh))));
     1099        return ((sh == 64) ? simd128<32>::constant<0>() : ((bitblock128_t)(vshlq_n_u64((uint64x2_t)(arg1), (int32_t)(sh)))));
    11001100}
    11011101
     
    15691569template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
    15701570{
    1571         return ((sh == 0) ? arg1 : reinterpret_cast<bitblock128_t>(vshrq_n_s8((int8x16_t)(arg1), (int32_t)(sh))));
     1571        return ((sh == 0) ? arg1 : ((bitblock128_t)(vshrq_n_s8((int8x16_t)(arg1), (int32_t)(sh)))));
    15721572}
    15731573
     
    15751575template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
    15761576{
    1577         return ((sh == 0) ? arg1 : reinterpret_cast<bitblock128_t>(vshrq_n_s16((int16x8_t)(arg1), (int32_t)(sh))));
     1577        return ((sh == 0) ? arg1 : ((bitblock128_t)(vshrq_n_s16((int16x8_t)(arg1), (int32_t)(sh)))));
    15781578}
    15791579
     
    15811581template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
    15821582{
    1583         return ((sh == 0) ? arg1 : reinterpret_cast<bitblock128_t>(vshrq_n_s32((int32x4_t)(arg1), (int32_t)(sh))));
     1583        return ((sh == 0) ? arg1 : ((bitblock128_t)(vshrq_n_s32((int32x4_t)(arg1), (int32_t)(sh)))));
    15841584}
    15851585
     
    15871587template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    15881588{
    1589         return ((sh == 0) ? arg1 : reinterpret_cast<bitblock128_t>(vshrq_n_s64((int64x2_t)(arg1), (int32_t)(sh))));
     1589        return ((sh == 0) ? arg1 : ((bitblock128_t)(vshrq_n_s64((int64x2_t)(arg1), (int32_t)(sh)))));
    15901590}
    15911591
  • trunk/lib/idisa_cpp/idisa_sse4_1.cpp

    r1924 r1953  
    26902690template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<32>::extract(bitblock128_t arg1)
    26912691{
    2692         return (reinterpret_cast<uint64_t>(((4294967296ULL)-1))&_mm_extract_epi32(arg1, (int32_t)(pos)));
     2692        return (((uint64_t)(((4294967296ULL)-1)))&_mm_extract_epi32(arg1, (int32_t)(pos)));
    26932693}
    26942694
  • trunk/lib/idisa_cpp/idisa_sse4_2.cpp

    r1924 r1953  
    26722672template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<32>::extract(bitblock128_t arg1)
    26732673{
    2674         return (reinterpret_cast<uint64_t>(((4294967296ULL)-1))&_mm_extract_epi32(arg1, (int32_t)(pos)));
     2674        return (((uint64_t)(((4294967296ULL)-1)))&_mm_extract_epi32(arg1, (int32_t)(pos)));
    26752675}
    26762676
Note: See TracChangeset for help on using the changeset viewer.