Changeset 1740 for trunk/lib/idisa_cpp/idisa_sse2.cpp
 Timestamp:
 Nov 27, 2011, 2:38:59 PM (7 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/idisa_cpp/idisa_sse2.cpp
r1661 r1740 106 106 IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2); 107 107 IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2); 108 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2); 108 109 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2); 109 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);110 110 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2); 111 111 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2); … … 519 519 520 520 //The total number of operations is 1 521 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2) 522 { 523 return _mm_and_si128(arg1, arg2); 524 } 525 526 //The total number of operations is 1 521 527 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2) 522 528 { 523 529 return _mm_xor_si128(arg1, arg2); 524 }525 526 //The total number of operations is 1527 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2)528 {529 return _mm_and_si128(arg1, arg2);530 530 } 531 531 … … 573 573 } 574 574 575 //The total number of operations is 6 5575 //The total number of operations is 64 576 576 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::max(bitblock128_t arg1, bitblock128_t arg2) 577 577 { 578 bitblock128_t hiAns = simd128<(64)>::max(arg1, arg2); 579 bitblock128_t loAns = simd128<(64)>::umax(arg1, arg2); 580 bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1)); 581 bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2)); 582 return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2)); 578 return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg1, arg2); 583 579 } 584 580 … … 708 704 } 709 705 710 //The total number of operations is 6 6706 //The total number of operations is 61 711 707 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2) 712 708 { … … 825 821 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2) 826 822 { 827 bitblock128_t high_bit = simd128<32>::constant<(2147483648UL )>();823 bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>(); 828 824 return simd128<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 829 825 } … … 838 834 } 839 835 840 //The total number of operations is 5 5836 //The total number of operations is 50 841 837 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2) 842 838 { … … 887 883 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2) 888 884 { 889 bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808UL )>();885 bitblock128_t high_bit = simd128<64>::constant<(9223372036854775808ULL)>(); 890 886 return simd128<64>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 891 887 } 892 888 893 //The total number of operations is 7 5889 //The total number of operations is 70 894 890 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2) 895 891 { … … 1042 1038 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2) 1043 1039 { 1044 bitblock128_t high_bit = simd128<32>::constant<(2147483648UL )>();1040 bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>(); 1045 1041 return simd128<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 1046 1042 } … … 1055 1051 } 1056 1052 1057 //The total number of operations is 511053 //The total number of operations is 46 1058 1054 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2) 1059 1055 { … … 1484 1480 } 1485 1481 1486 //The total number of operations is 6 51482 //The total number of operations is 64 1487 1483 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2) 1488 1484 { 1489 bitblock128_t hiAns = simd128<(64)>::min(arg1, arg2); 1490 bitblock128_t loAns = simd128<(64)>::umin(arg1, arg2); 1491 bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1)); 1492 bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2)); 1493 return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2)); 1485 return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg2, arg1); 1494 1486 } 1495 1487 … … 1570 1562 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umin(bitblock128_t arg1, bitblock128_t arg2) 1571 1563 { 1572 bitblock128_t high_bit = simd128<32>::constant<(2147483648UL )>();1564 bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>(); 1573 1565 return simd_xor(simd128<32>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 1574 1566 } … … 1732 1724 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1) 1733 1725 { 1734 bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1); 1735 return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))1)>(simd128<64>::constant<1>()), tmp))); 1736 } 1737 1738 //The total number of operations is 21 1726 return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh(32))>(simd128<64>::srli<(32)>(arg1)))); 1727 } 1728 1729 //The total number of operations is 16 1739 1730 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1) 1740 1731 { 1741 bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1); 1742 return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))1)>(simd128<128>::constant<1>()), tmp))); 1732 return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh(64))>(simd128<128>::srli<(64)>(arg1)))); 1743 1733 } 1744 1734 … … 1871 1861 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2) 1872 1862 { 1873 bitblock128_t high_bit = simd128<32>::constant<(2147483648UL )>();1863 bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>(); 1874 1864 return simd_xor(simd128<32>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 1875 1865 } … … 2021 2011 } 2022 2012 2023 //The total number of operations is 2 882013 //The total number of operations is 268 2024 2014 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2) 2025 2015 { … … 2049 2039 } 2050 2040 2051 //The total number of operations is 252041 //The total number of operations is 8 2052 2042 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1) 2053 2043 { 2054 return hsimd128<(16)>::signmask(hsimd128<32>::packh(simd128<32>::constant<0>(), arg1));2055 } 2056 2057 //The total number of operations is 322044 return (((((mvmd128<32>::extract<3>(arg1)>>28)&8)((mvmd128<32>::extract<2>(arg1)>>29)&4))((mvmd128<32>::extract<1>(arg1)>>30)&2))(mvmd128<32>::extract<0>(arg1)>>31)); 2045 } 2046 2047 //The total number of operations is 8 2058 2048 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1) 2059 2049 { 2060 return hsimd128<(32)>::signmask(hsimd128<64>::packh(simd128<64>::constant<0>(), arg1));2061 } 2062 2063 //The total number of operations is 392050 return (((mvmd128<64>::extract<1>(arg1)>>62)&2)(mvmd128<64>::extract<0>(arg1)>>63)); 2051 } 2052 2053 //The total number of operations is 15 2064 2054 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1) 2065 2055 { … … 2457 2447 } 2458 2448 2459 //The total number of operations is 212449 //The total number of operations is 16 2460 2450 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendh(bitblock128_t arg1) 2461 2451 { … … 2499 2489 } 2500 2490 2501 //The total number of operations is 2 52491 //The total number of operations is 20 2502 2492 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1) 2503 2493 { … … 2790 2780 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1) 2791 2781 { 2792 return mvmd128<(2)>::srli<(sh*2)>(arg1);2782 return simd128<128>::srli<(sh*4)>(arg1); 2793 2783 } 2794 2784
Note: See TracChangeset
for help on using the changeset viewer.