Changeset 1740 for trunk/lib/idisa_cpp/idisa_sse4_2.cpp
 Timestamp:
 Nov 27, 2011, 2:38:59 PM (7 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/idisa_cpp/idisa_sse4_2.cpp
r1661 r1740 107 107 IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2); 108 108 IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2); 109 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2); 109 110 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2); 110 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);111 111 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2); 112 112 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2); … … 524 524 525 525 //The total number of operations is 1 526 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2) 527 { 528 return _mm_and_si128(arg1, arg2); 529 } 530 531 //The total number of operations is 1 526 532 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2) 527 533 { 528 534 return _mm_xor_si128(arg1, arg2); 529 }530 531 //The total number of operations is 1532 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)533 {534 return _mm_and_si128(arg1, arg2);535 535 } 536 536 … … 706 706 } 707 707 708 //The total number of operations is 3 7708 //The total number of operations is 32 709 709 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2) 710 710 { … … 823 823 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2) 824 824 { 825 bitblock128_t high_bit = simd128<32>::constant<(2147483648UL )>();825 bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>(); 826 826 return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 827 827 } … … 830 830 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ult(bitblock128_t arg1, bitblock128_t arg2) 831 831 { 832 bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL )>();832 bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>(); 833 833 return simd128<64>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 834 834 } 835 835 836 //The total number of operations is 40836 //The total number of operations is 35 837 837 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2) 838 838 { … … 889 889 } 890 890 891 //The total number of operations is 4 5891 //The total number of operations is 40 892 892 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2) 893 893 { … … 1040 1040 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2) 1041 1041 { 1042 bitblock128_t high_bit = simd128<32>::constant<(2147483648UL )>();1042 bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>(); 1043 1043 return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 1044 1044 } … … 1047 1047 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ugt(bitblock128_t arg1, bitblock128_t arg2) 1048 1048 { 1049 bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL )>();1049 bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>(); 1050 1050 return simd128<64>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 1051 1051 } 1052 1052 1053 //The total number of operations is 3 61053 //The total number of operations is 31 1054 1054 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2) 1055 1055 { … … 1571 1571 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umin(bitblock128_t arg1, bitblock128_t arg2) 1572 1572 { 1573 bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL )>();1573 bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>(); 1574 1574 return simd_xor(simd128<64>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 1575 1575 } … … 1718 1718 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1) 1719 1719 { 1720 bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1); 1721 return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))1)>(simd128<64>::constant<1>()), tmp))); 1722 } 1723 1724 //The total number of operations is 21 1720 return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh(32))>(simd128<64>::srli<(32)>(arg1)))); 1721 } 1722 1723 //The total number of operations is 16 1725 1724 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1) 1726 1725 { 1727 bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1); 1728 return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))1)>(simd128<128>::constant<1>()), tmp))); 1726 return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh(64))>(simd128<128>::srli<(64)>(arg1)))); 1729 1727 } 1730 1728 … … 1862 1860 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2) 1863 1861 { 1864 bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL )>();1862 bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>(); 1865 1863 return simd_xor(simd128<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 1866 1864 } … … 2003 2001 } 2004 2002 2005 //The total number of operations is 1 722003 //The total number of operations is 152 2006 2004 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2) 2007 2005 { … … 2031 2029 } 2032 2030 2033 //The total number of operations is 72031 //The total number of operations is 4 2034 2032 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1) 2035 2033 { 2036 return hsimd128<(16)>::signmask(hsimd128<32>::packh(simd128<32>::constant<0>(), arg1));2037 } 2038 2039 //The total number of operations is 102034 return (((((mvmd128<32>::extract<3>(arg1)>>28)&8)((mvmd128<32>::extract<2>(arg1)>>29)&4))((mvmd128<32>::extract<1>(arg1)>>30)&2))(mvmd128<32>::extract<0>(arg1)>>31)); 2035 } 2036 2037 //The total number of operations is 4 2040 2038 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1) 2041 2039 { 2042 return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));2043 } 2044 2045 //The total number of operations is 1 72040 return (((mvmd128<64>::extract<1>(arg1)>>62)&2)(mvmd128<64>::extract<0>(arg1)>>63)); 2041 } 2042 2043 //The total number of operations is 11 2046 2044 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1) 2047 2045 { … … 2435 2433 } 2436 2434 2437 //The total number of operations is 212435 //The total number of operations is 16 2438 2436 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1) 2439 2437 { … … 2477 2475 } 2478 2476 2479 //The total number of operations is 2 52477 //The total number of operations is 20 2480 2478 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1) 2481 2479 { … … 2672 2670 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<32>::extract(bitblock128_t arg1) 2673 2671 { 2674 return (((uint64_t)((4294967296UL )1))&_mm_extract_epi32(arg1, (int32_t)(pos)));2672 return (((uint64_t)((4294967296ULL)1))&_mm_extract_epi32(arg1, (int32_t)(pos))); 2675 2673 } 2676 2674 … … 2804 2802 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1) 2805 2803 { 2806 return mvmd128<(2)>::srli<(sh*2)>(arg1);2804 return simd128<128>::srli<(sh*4)>(arg1); 2807 2805 } 2808 2806 … … 2822 2820 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1) 2823 2821 { 2824 return mvmd128<(16)>::srli<(sh*2)>(arg1);2822 return simd128<128>::srli<(sh*32)>(arg1); 2825 2823 } 2826 2824 … … 2834 2832 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1) 2835 2833 { 2836 return mvmd128<(64)>::srli<(sh*2)>(arg1);2834 return simd128<128>::srli<(sh*128)>(arg1); 2837 2835 } 2838 2836
Note: See TracChangeset
for help on using the changeset viewer.