Changeset 2230 for trunk/lib/idisa_cpp


Ignore:
Timestamp:
Jul 2, 2012, 2:39:28 PM (7 years ago)
Author:
cameron
Message:

Library updates: simplify simd<fw>::constant

Location:
trunk/lib/idisa_cpp
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx.cpp

    r2191 r2230  
    103103        static IDISA_ALWAYS_INLINE bitblock256_t load_aligned(const bitblock256_t* arg1);
    104104        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock256_t arg1, bitblock256_t* arg2);
    105         static IDISA_ALWAYS_INLINE bitblock256_t sll(bitblock256_t arg1, bitblock256_t arg2);
    106         static IDISA_ALWAYS_INLINE bitblock256_t srl(bitblock256_t arg1, bitblock256_t arg2);
    107105};
    108106
     
    112110IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2);
    113111IDISA_ALWAYS_INLINE bitblock256_t simd_or(bitblock256_t arg1, bitblock256_t arg2);
     112IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2);
    114113IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2);
    115 IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2);
    116114template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::max(bitblock256_t arg1, bitblock256_t arg2);
    117115template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::max(bitblock256_t arg1, bitblock256_t arg2);
     
    579577
    580578//The total number of operations is 1.0
     579IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2)
     580{
     581        return _mm256_xor_ps(arg1, arg2);
     582}
     583
     584//The total number of operations is 1.0
    581585IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2)
    582586{
    583587        return _mm256_and_ps(arg1, arg2);
    584 }
    585 
    586 //The total number of operations is 1.0
    587 IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2)
    588 {
    589         return _mm256_xor_ps(arg1, arg2);
    590588}
    591589
     
    15761574template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::constant()
    15771575{
    1578         return ((val < 0) ? simd256<(4)>::constant<((val<<2)|(val^(-4)))>() : simd256<(4)>::constant<((val<<2)|val)>());
     1576        return simd256<(4)>::constant<((val<<2)|(val&(3)))>();
    15791577}
    15801578
     
    15821580template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::constant()
    15831581{
    1584         return ((val < 0) ? simd256<(8)>::constant<((val<<4)|(val^(-16)))>() : simd256<(8)>::constant<((val<<4)|val)>());
     1582        return simd256<(8)>::constant<((val<<4)|(val&(15)))>();
    15851583}
    15861584
     
    29402938}
    29412939
    2942 //The total number of operations is 1.0
     2940//The total number of operations is 13.0
    29432941template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::fill(uint64_t val1)
    29442942{
     
    29462944}
    29472945
    2948 //The total number of operations is 5.0
     2946//The total number of operations is 29.0
    29492947template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::fill(uint64_t val1)
    29502948{
     
    31203118}
    31213119
    3122 //The total number of operations is 5.0
     3120//The total number of operations is 29.0
    31233121template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
    31243122{
     
    32103208}
    32113209
    3212 //The total number of operations is 1.0
     3210//The total number of operations is 13.0
    32133211template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::fill2(uint64_t val1, uint64_t val2)
    32143212{
    3215         return mvmd256<(32)>::fill4(0, val1, 0, val2);
    3216 }
    3217 
    3218 //The total number of operations is 5.0
     3213        return simd256<1>::ifh(simd256<(128)>::himask(), mvmd256<64>::fill(val1), mvmd256<64>::fill(val2));
     3214}
     3215
     3216//The total number of operations is 29.0
    32193217template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::fill2(uint64_t val1, uint64_t val2)
    32203218{
     
    34083406}
    34093407
    3410 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::sll(bitblock256_t r, bitblock256_t shft)
    3411 {
    3412         bitblock128_t s = avx_select_lo128(shft);
    3413         bitblock128_t c128 = _mm_cvtsi32_si128(128);
    3414         bitblock128_t x = avx_select_hi128(r);
    3415         bitblock128_t y = avx_select_lo128(r);
    3416 
    3417         if (bitblock128::any(simd128<16>::srli<7>(s))) {
    3418           x = bitblock128::sll(y, _mm_sub_epi32(s, c128));
    3419           y = simd128<1>::constant<0>();
    3420         }
    3421         else {
    3422           x = simd_or(bitblock128::sll(x, s), bitblock128::srl(y, _mm_sub_epi32(c128, s)));
    3423           y = bitblock128::sll(y, s);
    3424         }
    3425         return avx_general_combine256(x, y);
    3426 }
    3427 
    3428 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::srl(bitblock256_t r, bitblock256_t shft)
    3429 {
    3430         bitblock128_t s = avx_select_lo128(shft);
    3431         bitblock128_t c128 = _mm_cvtsi32_si128(128);
    3432         bitblock128_t x = avx_select_hi128(r);
    3433         bitblock128_t y = avx_select_lo128(r);
    3434 
    3435         if (bitblock128::any(simd128<16>::srli<7>(s))) {
    3436           y = bitblock128::srl(x, _mm_sub_epi32(s, c128));
    3437           x = simd128<1>::constant<0>();
    3438         }
    3439         else {
    3440           y = simd_or(bitblock128::srl(y, s), bitblock128::sll(x, _mm_sub_epi32(c128, s)));
    3441           x = bitblock128::srl(x, s);
    3442         }
    3443         return avx_general_combine256(x, y);
    3444 }
    3445 
    34463408#endif
  • trunk/lib/idisa_cpp/idisa_neon.cpp

    r1953 r2230  
    107107IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    108108IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     109IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    109110IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    110 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    111111template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    112112template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    409409template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill(uint64_t val1);
    410410template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::fill(uint64_t val1);
     411template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::fill(uint64_t val1);
    411412template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<1>::extract(bitblock128_t arg1);
    412413template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<2>::extract(bitblock128_t arg1);
     
    500501
    501502//The total number of operations is 1.0
     503IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
     504{
     505        return veorq_u64(arg1, arg2);
     506}
     507
     508//The total number of operations is 1.0
    502509IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    503510{
    504511        return vandq_u64(arg1, arg2);
    505 }
    506 
    507 //The total number of operations is 1.0
    508 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    509 {
    510         return veorq_u64(arg1, arg2);
    511512}
    512513
     
    819820template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1)
    820821{
    821         return ((sh == 64) ? neon_shift_right_64_bits(arg1) : ((sh > 64) ? simd128<64>::srli<(sh-64)>(neon_shift_right_64_bits(arg1)) : simd_or(neon_shift_right_64_bits(simd128<64>::slli<(64-sh)>(arg1)), simd128<64>::srli<sh>(arg1))));
     822        return ((sh == 64) ? neon_shift_right_64_bits(arg1) : ((sh > 64) ? simd128<64>::srli<(sh&63)>(neon_shift_right_64_bits(arg1)) : simd_or(neon_shift_right_64_bits(simd128<64>::slli<(64-sh)>(arg1)), simd128<64>::srli<sh>(arg1))));
    822823}
    823824
     
    11031104template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1)
    11041105{
    1105         return ((sh == 128) ? simd128<32>::constant<0>() : ((sh >= 64) ? simd128<64>::slli<(sh-64)>(neon_shift_left_64_bits(arg1)) : simd_or(neon_shift_left_64_bits(simd128<64>::srli<(64-sh)>(arg1)), simd128<64>::slli<sh>(arg1))));
     1106        return ((sh == 128) ? simd128<32>::constant<0>() : ((sh >= 64) ? simd128<64>::slli<(sh&63)>(neon_shift_left_64_bits(arg1)) : simd_or(neon_shift_left_64_bits(simd128<64>::srli<(64-sh)>(arg1)), simd128<64>::slli<sh>(arg1))));
    11061107}
    11071108
     
    12571258template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant()
    12581259{
    1259         return ((val < 0) ? simd128<(4)>::constant<((val<<2)|(val^(-4)))>() : simd128<(4)>::constant<((val<<2)|val)>());
     1260        return simd128<(4)>::constant<((val<<2)|(val&(3)))>();
    12601261}
    12611262
     
    12631264template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::constant()
    12641265{
    1265         return ((val < 0) ? simd128<(8)>::constant<((val<<4)|(val^(-16)))>() : simd128<(8)>::constant<((val<<4)|val)>());
     1266        return simd128<(8)>::constant<((val<<4)|(val&(15)))>();
    12661267}
    12671268
     
    24512452}
    24522453
     2454//The total number of operations is 3.0
     2455template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::fill(uint64_t val1)
     2456{
     2457        return mvmd128<(64)>::fill2(0, val1);
     2458}
     2459
    24532460//The total number of operations is 1.0
    24542461template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<1>::extract(bitblock128_t arg1)
  • trunk/lib/idisa_cpp/idisa_sse2.cpp

    r1924 r2230  
    113113IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    114114IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     115IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    115116IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    116 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    117117template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    118118template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    526526
    527527//The total number of operations is 1.0
     528IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
     529{
     530        return _mm_xor_si128(arg1, arg2);
     531}
     532
     533//The total number of operations is 1.0
    528534IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    529535{
    530536        return _mm_and_si128(arg1, arg2);
    531 }
    532 
    533 //The total number of operations is 1.0
    534 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    535 {
    536         return _mm_xor_si128(arg1, arg2);
    537537}
    538538
     
    938938template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1)
    939939{
    940         return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh-64)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<(64-sh)>(arg1), (int32_t)(8)))));
     940        return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh&63)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<((128-sh)&63)>(arg1), (int32_t)(8)))));
    941941}
    942942
     
    12341234template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1)
    12351235{
    1236         return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh-64)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<(64-sh)>(arg1), (int32_t)(8)))));
     1236        return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh&63)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<((128-sh)&63)>(arg1), (int32_t)(8)))));
    12371237}
    12381238
     
    14011401template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant()
    14021402{
    1403         return ((val < 0) ? simd128<(4)>::constant<((val<<2)|(val^(-4)))>() : simd128<(4)>::constant<((val<<2)|val)>());
     1403        return simd128<(4)>::constant<((val<<2)|(val&(3)))>();
    14041404}
    14051405
     
    14071407template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::constant()
    14081408{
    1409         return ((val < 0) ? simd128<(8)>::constant<((val<<4)|(val^(-16)))>() : simd128<(8)>::constant<((val<<4)|val)>());
     1409        return simd128<(8)>::constant<((val<<4)|(val&(15)))>();
    14101410}
    14111411
  • trunk/lib/idisa_cpp/idisa_sse3.cpp

    r1924 r2230  
    113113IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    114114IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     115IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    115116IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    116 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    117117template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    118118template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    526526
    527527//The total number of operations is 1.0
     528IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
     529{
     530        return _mm_xor_si128(arg1, arg2);
     531}
     532
     533//The total number of operations is 1.0
    528534IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    529535{
    530536        return _mm_and_si128(arg1, arg2);
    531 }
    532 
    533 //The total number of operations is 1.0
    534 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    535 {
    536         return _mm_xor_si128(arg1, arg2);
    537537}
    538538
     
    938938template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1)
    939939{
    940         return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh-64)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<(64-sh)>(arg1), (int32_t)(8)))));
     940        return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh&63)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<((128-sh)&63)>(arg1), (int32_t)(8)))));
    941941}
    942942
     
    12341234template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1)
    12351235{
    1236         return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh-64)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<(64-sh)>(arg1), (int32_t)(8)))));
     1236        return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh&63)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<((128-sh)&63)>(arg1), (int32_t)(8)))));
    12371237}
    12381238
     
    14011401template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant()
    14021402{
    1403         return ((val < 0) ? simd128<(4)>::constant<((val<<2)|(val^(-4)))>() : simd128<(4)>::constant<((val<<2)|val)>());
     1403        return simd128<(4)>::constant<((val<<2)|(val&(3)))>();
    14041404}
    14051405
     
    14071407template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::constant()
    14081408{
    1409         return ((val < 0) ? simd128<(8)>::constant<((val<<4)|(val^(-16)))>() : simd128<(8)>::constant<((val<<4)|val)>());
     1409        return simd128<(8)>::constant<((val<<4)|(val&(15)))>();
    14101410}
    14111411
  • trunk/lib/idisa_cpp/idisa_sse4_1.cpp

    r1953 r2230  
    114114IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    115115IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     116IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    116117IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    117 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    118118template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    119119template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    531531
    532532//The total number of operations is 1.0
     533IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
     534{
     535        return _mm_xor_si128(arg1, arg2);
     536}
     537
     538//The total number of operations is 1.0
    533539IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    534540{
    535541        return _mm_and_si128(arg1, arg2);
    536 }
    537 
    538 //The total number of operations is 1.0
    539 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    540 {
    541         return _mm_xor_si128(arg1, arg2);
    542542}
    543543
     
    948948template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1)
    949949{
    950         return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh-64)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<(64-sh)>(arg1), (int32_t)(8)))));
     950        return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh&63)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<((128-sh)&63)>(arg1), (int32_t)(8)))));
    951951}
    952952
     
    12441244template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1)
    12451245{
    1246         return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh-64)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<(64-sh)>(arg1), (int32_t)(8)))));
     1246        return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh&63)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<((128-sh)&63)>(arg1), (int32_t)(8)))));
    12471247}
    12481248
     
    14111411template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant()
    14121412{
    1413         return ((val < 0) ? simd128<(4)>::constant<((val<<2)|(val^(-4)))>() : simd128<(4)>::constant<((val<<2)|val)>());
     1413        return simd128<(4)>::constant<((val<<2)|(val&(3)))>();
    14141414}
    14151415
     
    14171417template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::constant()
    14181418{
    1419         return ((val < 0) ? simd128<(8)>::constant<((val<<4)|(val^(-16)))>() : simd128<(8)>::constant<((val<<4)|val)>());
     1419        return simd128<(8)>::constant<((val<<4)|(val&(15)))>();
    14201420}
    14211421
  • trunk/lib/idisa_cpp/idisa_sse4_2.cpp

    r1953 r2230  
    114114IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    115115IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     116IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    116117IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    117 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    118118template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    119119template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    531531
    532532//The total number of operations is 1.0
     533IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
     534{
     535        return _mm_xor_si128(arg1, arg2);
     536}
     537
     538//The total number of operations is 1.0
    533539IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    534540{
    535541        return _mm_and_si128(arg1, arg2);
    536 }
    537 
    538 //The total number of operations is 1.0
    539 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    540 {
    541         return _mm_xor_si128(arg1, arg2);
    542542}
    543543
     
    940940template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1)
    941941{
    942         return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh-64)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<(64-sh)>(arg1), (int32_t)(8)))));
     942        return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh&63)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<((128-sh)&63)>(arg1), (int32_t)(8)))));
    943943}
    944944
     
    12341234template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1)
    12351235{
    1236         return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh-64)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<(64-sh)>(arg1), (int32_t)(8)))));
     1236        return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh&63)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<((128-sh)&63)>(arg1), (int32_t)(8)))));
    12371237}
    12381238
     
    14011401template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant()
    14021402{
    1403         return ((val < 0) ? simd128<(4)>::constant<((val<<2)|(val^(-4)))>() : simd128<(4)>::constant<((val<<2)|val)>());
     1403        return simd128<(4)>::constant<((val<<2)|(val&(3)))>();
    14041404}
    14051405
     
    14071407template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::constant()
    14081408{
    1409         return ((val < 0) ? simd128<(8)>::constant<((val<<4)|(val^(-16)))>() : simd128<(8)>::constant<((val<<4)|val)>());
     1409        return simd128<(8)>::constant<((val<<4)|(val&(15)))>();
    14101410}
    14111411
  • trunk/lib/idisa_cpp/idisa_ssse3.cpp

    r1924 r2230  
    114114IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    115115IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     116IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    116117IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    117 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    118118template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    119119template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    531531
    532532//The total number of operations is 1.0
     533IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
     534{
     535        return _mm_xor_si128(arg1, arg2);
     536}
     537
     538//The total number of operations is 1.0
    533539IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    534540{
    535541        return _mm_and_si128(arg1, arg2);
    536 }
    537 
    538 //The total number of operations is 1.0
    539 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    540 {
    541         return _mm_xor_si128(arg1, arg2);
    542542}
    543543
     
    943943template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1)
    944944{
    945         return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh-64)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<(64-sh)>(arg1), (int32_t)(8)))));
     945        return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh&63)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<((128-sh)&63)>(arg1), (int32_t)(8)))));
    946946}
    947947
     
    12391239template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1)
    12401240{
    1241         return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh-64)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<(64-sh)>(arg1), (int32_t)(8)))));
     1241        return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh&63)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<((128-sh)&63)>(arg1), (int32_t)(8)))));
    12421242}
    12431243
     
    14061406template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant()
    14071407{
    1408         return ((val < 0) ? simd128<(4)>::constant<((val<<2)|(val^(-4)))>() : simd128<(4)>::constant<((val<<2)|val)>());
     1408        return simd128<(4)>::constant<((val<<2)|(val&(3)))>();
    14091409}
    14101410
     
    14121412template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::constant()
    14131413{
    1414         return ((val < 0) ? simd128<(8)>::constant<((val<<4)|(val^(-16)))>() : simd128<(8)>::constant<((val<<4)|val)>());
     1414        return simd128<(8)>::constant<((val<<4)|(val&(15)))>();
    14151415}
    14161416
Note: See TracChangeset for help on using the changeset viewer.