Changeset 1740 for trunk/lib


Ignore:
Timestamp:
Nov 27, 2011, 2:38:59 PM (7 years ago)
Author:
huah
Message:

added support for ARM NEON

Location:
trunk/lib
Files:
1 added
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa128.hpp

    r1548 r1740  
    1010#elif defined USE_SSE4_2
    1111#include "idisa_cpp/idisa_sse4_2.cpp"
     12#elif defined USE_NEON
     13#include "idisa_cpp/idisa_neon.cpp"
    1214#else
    1315#include "idisa_cpp/idisa_sse2.cpp"
  • trunk/lib/idisa_cpp/idisa_sse2.cpp

    r1661 r1740  
    106106IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    107107IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     108IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    108109IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    109 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    110110template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    111111template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    519519
    520520//The total number of operations is 1
     521IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
     522{
     523        return _mm_and_si128(arg1, arg2);
     524}
     525
     526//The total number of operations is 1
    521527IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    522528{
    523529        return _mm_xor_si128(arg1, arg2);
    524 }
    525 
    526 //The total number of operations is 1
    527 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    528 {
    529         return _mm_and_si128(arg1, arg2);
    530530}
    531531
     
    573573}
    574574
    575 //The total number of operations is 65
     575//The total number of operations is 64
    576576template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::max(bitblock128_t arg1, bitblock128_t arg2)
    577577{
    578         bitblock128_t hiAns = simd128<(64)>::max(arg1, arg2);
    579         bitblock128_t loAns = simd128<(64)>::umax(arg1, arg2);
    580         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1));
    581         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2));
    582         return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2));
     578        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg1, arg2);
    583579}
    584580
     
    708704}
    709705
    710 //The total number of operations is 66
     706//The total number of operations is 61
    711707template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2)
    712708{
     
    825821template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2)
    826822{
    827         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     823        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    828824        return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    829825}
     
    838834}
    839835
    840 //The total number of operations is 55
     836//The total number of operations is 50
    841837template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2)
    842838{
     
    887883template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2)
    888884{
    889         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     885        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    890886        return simd128<64>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    891887}
    892888
    893 //The total number of operations is 75
     889//The total number of operations is 70
    894890template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2)
    895891{
     
    10421038template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10431039{
    1044         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1040        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    10451041        return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10461042}
     
    10551051}
    10561052
    1057 //The total number of operations is 51
     1053//The total number of operations is 46
    10581054template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10591055{
     
    14841480}
    14851481
    1486 //The total number of operations is 65
     1482//The total number of operations is 64
    14871483template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2)
    14881484{
    1489         bitblock128_t hiAns = simd128<(64)>::min(arg1, arg2);
    1490         bitblock128_t loAns = simd128<(64)>::umin(arg1, arg2);
    1491         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1));
    1492         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2));
    1493         return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2));
     1485        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg2, arg1);
    14941486}
    14951487
     
    15701562template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15711563{
    1572         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1564        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    15731565        return simd_xor(simd128<32>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    15741566}
     
    17321724template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    17331725{
    1734         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1735         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1736 }
    1737 
    1738 //The total number of operations is 21
     1726        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     1727}
     1728
     1729//The total number of operations is 16
    17391730template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    17401731{
    1741         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1742         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1732        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    17431733}
    17441734
     
    18711861template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2)
    18721862{
    1873         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1863        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    18741864        return simd_xor(simd128<32>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    18751865}
     
    20212011}
    20222012
    2023 //The total number of operations is 288
     2013//The total number of operations is 268
    20242014template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2)
    20252015{
     
    20492039}
    20502040
    2051 //The total number of operations is 25
     2041//The total number of operations is 8
    20522042template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1)
    20532043{
    2054         return hsimd128<(16)>::signmask(hsimd128<32>::packh(simd128<32>::constant<0>(), arg1));
    2055 }
    2056 
    2057 //The total number of operations is 32
     2044        return (((((mvmd128<32>::extract<3>(arg1)>>28)&8)|((mvmd128<32>::extract<2>(arg1)>>29)&4))|((mvmd128<32>::extract<1>(arg1)>>30)&2))|(mvmd128<32>::extract<0>(arg1)>>31));
     2045}
     2046
     2047//The total number of operations is 8
    20582048template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1)
    20592049{
    2060         return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));
    2061 }
    2062 
    2063 //The total number of operations is 39
     2050        return (((mvmd128<64>::extract<1>(arg1)>>62)&2)|(mvmd128<64>::extract<0>(arg1)>>63));
     2051}
     2052
     2053//The total number of operations is 15
    20642054template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1)
    20652055{
     
    24572447}
    24582448
    2459 //The total number of operations is 21
     2449//The total number of operations is 16
    24602450template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1)
    24612451{
     
    24992489}
    25002490
    2501 //The total number of operations is 25
     2491//The total number of operations is 20
    25022492template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1)
    25032493{
     
    27902780template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
    27912781{
    2792         return mvmd128<(2)>::srli<(sh*2)>(arg1);
     2782        return simd128<128>::srli<(sh*4)>(arg1);
    27932783}
    27942784
  • trunk/lib/idisa_cpp/idisa_sse3.cpp

    r1661 r1740  
    106106IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    107107IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     108IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    108109IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    109 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    110110template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    111111template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    519519
    520520//The total number of operations is 1
     521IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
     522{
     523        return _mm_and_si128(arg1, arg2);
     524}
     525
     526//The total number of operations is 1
    521527IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    522528{
    523529        return _mm_xor_si128(arg1, arg2);
    524 }
    525 
    526 //The total number of operations is 1
    527 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    528 {
    529         return _mm_and_si128(arg1, arg2);
    530530}
    531531
     
    573573}
    574574
    575 //The total number of operations is 65
     575//The total number of operations is 64
    576576template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::max(bitblock128_t arg1, bitblock128_t arg2)
    577577{
    578         bitblock128_t hiAns = simd128<(64)>::max(arg1, arg2);
    579         bitblock128_t loAns = simd128<(64)>::umax(arg1, arg2);
    580         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1));
    581         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2));
    582         return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2));
     578        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg1, arg2);
    583579}
    584580
     
    708704}
    709705
    710 //The total number of operations is 66
     706//The total number of operations is 61
    711707template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2)
    712708{
     
    825821template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2)
    826822{
    827         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     823        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    828824        return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    829825}
     
    838834}
    839835
    840 //The total number of operations is 55
     836//The total number of operations is 50
    841837template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2)
    842838{
     
    887883template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2)
    888884{
    889         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     885        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    890886        return simd128<64>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    891887}
    892888
    893 //The total number of operations is 75
     889//The total number of operations is 70
    894890template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2)
    895891{
     
    10421038template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10431039{
    1044         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1040        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    10451041        return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10461042}
     
    10551051}
    10561052
    1057 //The total number of operations is 51
     1053//The total number of operations is 46
    10581054template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10591055{
     
    14841480}
    14851481
    1486 //The total number of operations is 65
     1482//The total number of operations is 64
    14871483template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2)
    14881484{
    1489         bitblock128_t hiAns = simd128<(64)>::min(arg1, arg2);
    1490         bitblock128_t loAns = simd128<(64)>::umin(arg1, arg2);
    1491         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1));
    1492         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2));
    1493         return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2));
     1485        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg2, arg1);
    14941486}
    14951487
     
    15701562template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15711563{
    1572         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1564        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    15731565        return simd_xor(simd128<32>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    15741566}
     
    17321724template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    17331725{
    1734         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1735         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1736 }
    1737 
    1738 //The total number of operations is 21
     1726        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     1727}
     1728
     1729//The total number of operations is 16
    17391730template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    17401731{
    1741         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1742         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1732        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    17431733}
    17441734
     
    18711861template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2)
    18721862{
    1873         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1863        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    18741864        return simd_xor(simd128<32>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    18751865}
     
    20212011}
    20222012
    2023 //The total number of operations is 288
     2013//The total number of operations is 268
    20242014template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2)
    20252015{
     
    20492039}
    20502040
    2051 //The total number of operations is 25
     2041//The total number of operations is 8
    20522042template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1)
    20532043{
    2054         return hsimd128<(16)>::signmask(hsimd128<32>::packh(simd128<32>::constant<0>(), arg1));
    2055 }
    2056 
    2057 //The total number of operations is 32
     2044        return (((((mvmd128<32>::extract<3>(arg1)>>28)&8)|((mvmd128<32>::extract<2>(arg1)>>29)&4))|((mvmd128<32>::extract<1>(arg1)>>30)&2))|(mvmd128<32>::extract<0>(arg1)>>31));
     2045}
     2046
     2047//The total number of operations is 8
    20582048template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1)
    20592049{
    2060         return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));
    2061 }
    2062 
    2063 //The total number of operations is 39
     2050        return (((mvmd128<64>::extract<1>(arg1)>>62)&2)|(mvmd128<64>::extract<0>(arg1)>>63));
     2051}
     2052
     2053//The total number of operations is 15
    20642054template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1)
    20652055{
     
    24572447}
    24582448
    2459 //The total number of operations is 21
     2449//The total number of operations is 16
    24602450template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1)
    24612451{
     
    24992489}
    25002490
    2501 //The total number of operations is 25
     2491//The total number of operations is 20
    25022492template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1)
    25032493{
     
    27902780template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
    27912781{
    2792         return mvmd128<(2)>::srli<(sh*2)>(arg1);
     2782        return simd128<128>::srli<(sh*4)>(arg1);
    27932783}
    27942784
     
    28082798template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1)
    28092799{
    2810         return mvmd128<(16)>::srli<(sh*2)>(arg1);
     2800        return simd128<128>::srli<(sh*32)>(arg1);
    28112801}
    28122802
     
    28202810template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1)
    28212811{
    2822         return mvmd128<(64)>::srli<(sh*2)>(arg1);
     2812        return simd128<128>::srli<(sh*128)>(arg1);
    28232813}
    28242814
  • trunk/lib/idisa_cpp/idisa_sse4_1.cpp

    r1661 r1740  
    107107IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    108108IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     109IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    109110IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    110 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    111111template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    112112template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    524524
    525525//The total number of operations is 1
     526IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
     527{
     528        return _mm_and_si128(arg1, arg2);
     529}
     530
     531//The total number of operations is 1
    526532IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    527533{
    528534        return _mm_xor_si128(arg1, arg2);
    529 }
    530 
    531 //The total number of operations is 1
    532 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    533 {
    534         return _mm_and_si128(arg1, arg2);
    535535}
    536536
     
    714714}
    715715
    716 //The total number of operations is 62
     716//The total number of operations is 57
    717717template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2)
    718718{
     
    831831template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2)
    832832{
    833         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     833        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    834834        return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    835835}
     
    841841}
    842842
    843 //The total number of operations is 48
     843//The total number of operations is 43
    844844template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2)
    845845{
     
    893893template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2)
    894894{
    895         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     895        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    896896        return simd128<64>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    897897}
    898898
    899 //The total number of operations is 65
     899//The total number of operations is 60
    900900template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2)
    901901{
     
    10481048template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10491049{
    1050         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1050        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    10511051        return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10521052}
     
    10611061}
    10621062
    1063 //The total number of operations is 47
     1063//The total number of operations is 42
    10641064template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10651065{
     
    17341734template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    17351735{
    1736         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1737         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1738 }
    1739 
    1740 //The total number of operations is 21
     1736        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     1737}
     1738
     1739//The total number of operations is 16
    17411740template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    17421741{
    1743         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1744         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1742        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    17451743}
    17461744
     
    20212019}
    20222020
    2023 //The total number of operations is 272
     2021//The total number of operations is 252
    20242022template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2)
    20252023{
     
    20492047}
    20502048
    2051 //The total number of operations is 7
     2049//The total number of operations is 4
    20522050template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1)
    20532051{
    2054         return hsimd128<(16)>::signmask(hsimd128<32>::packh(simd128<32>::constant<0>(), arg1));
    2055 }
    2056 
    2057 //The total number of operations is 10
     2052        return (((((mvmd128<32>::extract<3>(arg1)>>28)&8)|((mvmd128<32>::extract<2>(arg1)>>29)&4))|((mvmd128<32>::extract<1>(arg1)>>30)&2))|(mvmd128<32>::extract<0>(arg1)>>31));
     2053}
     2054
     2055//The total number of operations is 4
    20582056template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1)
    20592057{
    2060         return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));
    2061 }
    2062 
    2063 //The total number of operations is 17
     2058        return (((mvmd128<64>::extract<1>(arg1)>>62)&2)|(mvmd128<64>::extract<0>(arg1)>>63));
     2059}
     2060
     2061//The total number of operations is 11
    20642062template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1)
    20652063{
     
    24532451}
    24542452
    2455 //The total number of operations is 21
     2453//The total number of operations is 16
    24562454template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1)
    24572455{
     
    24952493}
    24962494
    2497 //The total number of operations is 25
     2495//The total number of operations is 20
    24982496template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1)
    24992497{
     
    26902688template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<32>::extract(bitblock128_t arg1)
    26912689{
    2692         return (((uint64_t)((4294967296UL)-1))&_mm_extract_epi32(arg1, (int32_t)(pos)));
     2690        return (((uint64_t)((4294967296ULL)-1))&_mm_extract_epi32(arg1, (int32_t)(pos)));
    26932691}
    26942692
     
    28222820template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
    28232821{
    2824         return mvmd128<(2)>::srli<(sh*2)>(arg1);
     2822        return simd128<128>::srli<(sh*4)>(arg1);
    28252823}
    28262824
     
    28402838template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1)
    28412839{
    2842         return mvmd128<(16)>::srli<(sh*2)>(arg1);
     2840        return simd128<128>::srli<(sh*32)>(arg1);
    28432841}
    28442842
     
    28522850template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1)
    28532851{
    2854         return mvmd128<(64)>::srli<(sh*2)>(arg1);
     2852        return simd128<128>::srli<(sh*128)>(arg1);
    28552853}
    28562854
  • trunk/lib/idisa_cpp/idisa_sse4_2.cpp

    r1661 r1740  
    107107IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    108108IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     109IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    109110IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    110 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    111111template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    112112template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    524524
    525525//The total number of operations is 1
     526IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
     527{
     528        return _mm_and_si128(arg1, arg2);
     529}
     530
     531//The total number of operations is 1
    526532IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    527533{
    528534        return _mm_xor_si128(arg1, arg2);
    529 }
    530 
    531 //The total number of operations is 1
    532 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    533 {
    534         return _mm_and_si128(arg1, arg2);
    535535}
    536536
     
    706706}
    707707
    708 //The total number of operations is 37
     708//The total number of operations is 32
    709709template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2)
    710710{
     
    823823template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2)
    824824{
    825         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     825        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    826826        return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    827827}
     
    830830template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ult(bitblock128_t arg1, bitblock128_t arg2)
    831831{
    832         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     832        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    833833        return simd128<64>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    834834}
    835835
    836 //The total number of operations is 40
     836//The total number of operations is 35
    837837template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2)
    838838{
     
    889889}
    890890
    891 //The total number of operations is 45
     891//The total number of operations is 40
    892892template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2)
    893893{
     
    10401040template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10411041{
    1042         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1042        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    10431043        return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10441044}
     
    10471047template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10481048{
    1049         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     1049        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    10501050        return simd128<64>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10511051}
    10521052
    1053 //The total number of operations is 36
     1053//The total number of operations is 31
    10541054template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10551055{
     
    15711571template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15721572{
    1573         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     1573        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    15741574        return simd_xor(simd128<64>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    15751575}
     
    17181718template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    17191719{
    1720         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1721         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1722 }
    1723 
    1724 //The total number of operations is 21
     1720        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     1721}
     1722
     1723//The total number of operations is 16
    17251724template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    17261725{
    1727         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1728         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1726        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    17291727}
    17301728
     
    18621860template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2)
    18631861{
    1864         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     1862        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    18651863        return simd_xor(simd128<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    18661864}
     
    20032001}
    20042002
    2005 //The total number of operations is 172
     2003//The total number of operations is 152
    20062004template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2)
    20072005{
     
    20312029}
    20322030
    2033 //The total number of operations is 7
     2031//The total number of operations is 4
    20342032template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1)
    20352033{
    2036         return hsimd128<(16)>::signmask(hsimd128<32>::packh(simd128<32>::constant<0>(), arg1));
    2037 }
    2038 
    2039 //The total number of operations is 10
     2034        return (((((mvmd128<32>::extract<3>(arg1)>>28)&8)|((mvmd128<32>::extract<2>(arg1)>>29)&4))|((mvmd128<32>::extract<1>(arg1)>>30)&2))|(mvmd128<32>::extract<0>(arg1)>>31));
     2035}
     2036
     2037//The total number of operations is 4
    20402038template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1)
    20412039{
    2042         return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));
    2043 }
    2044 
    2045 //The total number of operations is 17
     2040        return (((mvmd128<64>::extract<1>(arg1)>>62)&2)|(mvmd128<64>::extract<0>(arg1)>>63));
     2041}
     2042
     2043//The total number of operations is 11
    20462044template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1)
    20472045{
     
    24352433}
    24362434
    2437 //The total number of operations is 21
     2435//The total number of operations is 16
    24382436template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1)
    24392437{
     
    24772475}
    24782476
    2479 //The total number of operations is 25
     2477//The total number of operations is 20
    24802478template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1)
    24812479{
     
    26722670template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<32>::extract(bitblock128_t arg1)
    26732671{
    2674         return (((uint64_t)((4294967296UL)-1))&_mm_extract_epi32(arg1, (int32_t)(pos)));
     2672        return (((uint64_t)((4294967296ULL)-1))&_mm_extract_epi32(arg1, (int32_t)(pos)));
    26752673}
    26762674
     
    28042802template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
    28052803{
    2806         return mvmd128<(2)>::srli<(sh*2)>(arg1);
     2804        return simd128<128>::srli<(sh*4)>(arg1);
    28072805}
    28082806
     
    28222820template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1)
    28232821{
    2824         return mvmd128<(16)>::srli<(sh*2)>(arg1);
     2822        return simd128<128>::srli<(sh*32)>(arg1);
    28252823}
    28262824
     
    28342832template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1)
    28352833{
    2836         return mvmd128<(64)>::srli<(sh*2)>(arg1);
     2834        return simd128<128>::srli<(sh*128)>(arg1);
    28372835}
    28382836
  • trunk/lib/idisa_cpp/idisa_ssse3.cpp

    r1661 r1740  
    107107IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    108108IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
     109IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    109110IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
    110 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    111111template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2);
    112112template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2);
     
    524524
    525525//The total number of operations is 1
     526IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
     527{
     528        return _mm_and_si128(arg1, arg2);
     529}
     530
     531//The total number of operations is 1
    526532IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2)
    527533{
    528534        return _mm_xor_si128(arg1, arg2);
    529 }
    530 
    531 //The total number of operations is 1
    532 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)
    533 {
    534         return _mm_and_si128(arg1, arg2);
    535535}
    536536
     
    578578}
    579579
    580 //The total number of operations is 65
     580//The total number of operations is 64
    581581template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::max(bitblock128_t arg1, bitblock128_t arg2)
    582582{
    583         bitblock128_t hiAns = simd128<(64)>::max(arg1, arg2);
    584         bitblock128_t loAns = simd128<(64)>::umax(arg1, arg2);
    585         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1));
    586         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2));
    587         return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2));
     583        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg1, arg2);
    588584}
    589585
     
    713709}
    714710
    715 //The total number of operations is 66
     711//The total number of operations is 61
    716712template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2)
    717713{
     
    830826template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2)
    831827{
    832         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     828        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    833829        return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    834830}
     
    843839}
    844840
    845 //The total number of operations is 55
     841//The total number of operations is 50
    846842template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2)
    847843{
     
    892888template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2)
    893889{
    894         bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL)>();
     890        bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>();
    895891        return simd128<64>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    896892}
    897893
    898 //The total number of operations is 75
     894//The total number of operations is 70
    899895template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2)
    900896{
     
    10471043template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10481044{
    1049         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1045        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    10501046        return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    10511047}
     
    10601056}
    10611057
    1062 //The total number of operations is 51
     1058//The total number of operations is 46
    10631059template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10641060{
     
    14891485}
    14901486
    1491 //The total number of operations is 65
     1487//The total number of operations is 64
    14921488template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2)
    14931489{
    1494         bitblock128_t hiAns = simd128<(64)>::min(arg1, arg2);
    1495         bitblock128_t loAns = simd128<(64)>::umin(arg1, arg2);
    1496         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1));
    1497         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2));
    1498         return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2));
     1490        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg2, arg1);
    14991491}
    15001492
     
    15751567template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15761568{
    1577         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1569        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    15781570        return simd_xor(simd128<32>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    15791571}
     
    17341726template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    17351727{
    1736         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1737         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1738 }
    1739 
    1740 //The total number of operations is 21
     1728        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     1729}
     1730
     1731//The total number of operations is 16
    17411732template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    17421733{
    1743         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1744         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1734        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    17451735}
    17461736
     
    18731863template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2)
    18741864{
    1875         bitblock128_t high_bit = simd128<32>::constant<(2147483648UL)>();
     1865        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    18761866        return simd_xor(simd128<32>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    18771867}
     
    20232013}
    20242014
    2025 //The total number of operations is 288
     2015//The total number of operations is 268
    20262016template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2)
    20272017{
     
    20572047}
    20582048
    2059 //The total number of operations is 10
     2049//The total number of operations is 8
    20602050template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1)
    20612051{
    2062         return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));
    2063 }
    2064 
    2065 //The total number of operations is 17
     2052        return (((mvmd128<64>::extract<1>(arg1)>>62)&2)|(mvmd128<64>::extract<0>(arg1)>>63));
     2053}
     2054
     2055//The total number of operations is 15
    20662056template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1)
    20672057{
     
    24562446}
    24572447
    2458 //The total number of operations is 21
     2448//The total number of operations is 16
    24592449template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1)
    24602450{
     
    24982488}
    24992489
    2500 //The total number of operations is 25
     2490//The total number of operations is 20
    25012491template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1)
    25022492{
     
    28252815template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
    28262816{
    2827         return mvmd128<(2)>::srli<(sh*2)>(arg1);
     2817        return simd128<128>::srli<(sh*4)>(arg1);
    28282818}
    28292819
     
    28432833template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1)
    28442834{
    2845         return mvmd128<(16)>::srli<(sh*2)>(arg1);
     2835        return simd128<128>::srli<(sh*32)>(arg1);
    28462836}
    28472837
     
    28552845template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1)
    28562846{
    2857         return mvmd128<(64)>::srli<(sh*2)>(arg1);
     2847        return simd128<128>::srli<(sh*128)>(arg1);
    28582848}
    28592849
Note: See TracChangeset for help on using the changeset viewer.