Changeset 1573 for trunk/lib/idisa_cpp/idisa_sse3.cpp
 Timestamp:
 Oct 22, 2011, 6:46:08 PM (8 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/idisa_cpp/idisa_sse3.cpp
r1570 r1573 191 191 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::popcount(bitblock128_t arg1); 192 192 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::popcount(bitblock128_t arg1); 193 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1);194 193 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1); 195 194 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::neg(bitblock128_t arg1); … … 300 299 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2); 301 300 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2); 302 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1);303 301 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1); 304 302 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1); … … 528 526 } 529 527 530 //The total number of operations is 39528 //The total number of operations is 1 531 529 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2) 532 530 { 533 bitblock128_t high_bit = simd128<1>::constant<(1)>(); 534 return simd_xor(simd128<1>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 535 } 536 537 //The total number of operations is 19 531 return simd_and(arg1, arg2); 532 } 533 534 //The total number of operations is 18 538 535 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2) 539 536 { 540 return simd128<1>::ifh(simd128<2>:: himask(), simd_and(arg1, arg2), simd_or(simd_and(arg2, simd128<128>::srli<1>(simd_or(arg1, simd_not(arg2)))), simd_and(arg1, simd128<128>::srli<1>(simd_or(simd_not(arg1), arg2)))));537 return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg2, arg1); 541 538 } 542 539 … … 582 579 } 583 580 584 //The total number of operations is 57581 //The total number of operations is 1 585 582 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::mult(bitblock128_t arg1, bitblock128_t arg2) 586 583 { 587 bitblock128_t loMask = simd128<(2)>::lomask(); 588 bitblock128_t tmpAns1 = simd128<(2)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2)); 589 bitblock128_t tmpAns2 = simd128<(2)>::mult(simd128<(2)>::srli<1>(arg1), simd128<(2)>::srli<1>(arg2)); 590 return simd128<1>::ifh(loMask, tmpAns1, simd128<(2)>::slli<1>(tmpAns2)); 584 return simd_and(arg1, arg2); 591 585 } 592 586 … … 660 654 } 661 655 662 //The total number of operations is 42656 //The total number of operations is 1 663 657 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::gt(bitblock128_t arg1, bitblock128_t arg2) 664 658 { 665 bitblock128_t high_bit = simd128<1>::constant<(1)>(); 666 return simd128<1>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 667 } 668 669 //The total number of operations is 18 659 return simd_andc(arg2, arg1); 660 } 661 662 //The total number of operations is 15 670 663 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::gt(bitblock128_t arg1, bitblock128_t arg2) 671 664 { 672 bitblock128_t tmp = simd_not(arg1); 673 bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(arg1, simd_not(arg2))), simd_or(tmp, arg2))); 674 return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns)); 665 bitblock128_t hiAns = simd128<(1)>::gt(arg1, arg2); 666 bitblock128_t loAns = simd128<(1)>::ugt(arg1, arg2); 667 bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2))); 668 mask = simd_or(mask, simd128<2>::slli<(1)>(mask)); 669 return simd_or(simd128<2>::srai<(1)>(hiAns), mask); 675 670 } 676 671 … … 788 783 } 789 784 790 //The total number of operations is 40785 //The total number of operations is 1 791 786 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ult(bitblock128_t arg1, bitblock128_t arg2) 792 787 { 793 return simd 128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ult(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::ult(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));794 } 795 796 //The total number of operations is 1 7788 return simd_andc(arg2, arg1); 789 } 790 791 //The total number of operations is 14 797 792 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ult(bitblock128_t arg1, bitblock128_t arg2) 798 793 { 799 bitblock128_t tmp = simd_not(arg1); 800 bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(tmp, arg2)), simd_or(tmp, arg2))); 801 return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns)); 794 bitblock128_t tmpAns = simd128<(1)>::ult(arg1, arg2); 795 bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2))); 796 mask = simd_or(mask, simd128<2>::slli<(1)>(mask)); 797 return simd_or(simd128<2>::srai<(1)>(tmpAns), mask); 802 798 } 803 799 … … 844 840 } 845 841 846 //The total number of operations is 42842 //The total number of operations is 1 847 843 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::lt(bitblock128_t arg1, bitblock128_t arg2) 848 844 { 849 bitblock128_t high_bit = simd128<1>::constant<(1)>(); 850 return simd128<1>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 851 } 852 853 //The total number of operations is 18 845 return simd_andc(arg1, arg2); 846 } 847 848 //The total number of operations is 15 854 849 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lt(bitblock128_t arg1, bitblock128_t arg2) 855 850 { 856 bitblock128_t tmp = simd_not(arg2); 857 bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(simd_not(arg1), arg2)), simd_or(arg1, tmp))); 858 return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns)); 851 bitblock128_t hiAns = simd128<(1)>::lt(arg1, arg2); 852 bitblock128_t loAns = simd128<(1)>::ult(arg1, arg2); 853 bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2))); 854 mask = simd_or(mask, simd128<2>::slli<(1)>(mask)); 855 return simd_or(simd128<2>::srai<(1)>(hiAns), mask); 859 856 } 860 857 … … 1003 1000 } 1004 1001 1005 //The total number of operations is 401002 //The total number of operations is 1 1006 1003 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ugt(bitblock128_t arg1, bitblock128_t arg2) 1007 1004 { 1008 return simd 128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ugt(simd_and(simd128<(2)>::himask(), arg1), arg2), simd128<(2)>::ugt(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));1009 } 1010 1011 //The total number of operations is 1 71005 return simd_andc(arg1, arg2); 1006 } 1007 1008 //The total number of operations is 14 1012 1009 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ugt(bitblock128_t arg1, bitblock128_t arg2) 1013 1010 { 1014 bitblock128_t tmp = simd_not(arg2); 1015 bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(arg1, tmp)), simd_or(arg1, tmp))); 1016 return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns)); 1011 bitblock128_t tmpAns = simd128<(1)>::ugt(arg1, arg2); 1012 bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2))); 1013 mask = simd_or(mask, simd128<2>::slli<(1)>(mask)); 1014 return simd_or(simd128<2>::srai<(1)>(tmpAns), mask); 1017 1015 } 1018 1016 … … 1153 1151 } 1154 1152 1155 //The total number of operations is 11156 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)1157 {1158 return simd128<1>::sub(simd128<1>::constant<0>(), arg1);1159 }1160 1161 1153 //The total number of operations is 8 1162 1154 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1) … … 1487 1479 } 1488 1480 1489 //The total number of operations is 391481 //The total number of operations is 1 1490 1482 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::min(bitblock128_t arg1, bitblock128_t arg2) 1491 1483 { 1492 bitblock128_t high_bit = simd128<1>::constant<(1)>(); 1493 return simd_xor(simd128<1>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 1494 } 1495 1496 //The total number of operations is 19 1484 return simd_or(arg1, arg2); 1485 } 1486 1487 //The total number of operations is 18 1497 1488 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::min(bitblock128_t arg1, bitblock128_t arg2) 1498 1489 { 1499 bitblock128_t high_bit = simd128<2>::constant<(2)>(); 1500 return simd_xor(simd128<2>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 1490 return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg1, arg2); 1501 1491 } 1502 1492 … … 1542 1532 } 1543 1533 1544 //The total number of operations is 361534 //The total number of operations is 1 1545 1535 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2) 1546 1536 { 1547 return simd_ or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umin(arg1, arg2)), simd128<(2)>::umin(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));1537 return simd_and(arg1, arg2); 1548 1538 } 1549 1539 … … 1598 1588 } 1599 1589 1600 //The total number of operations is 361590 //The total number of operations is 1 1601 1591 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2) 1602 1592 { 1603 return simd_or( simd_and(simd128<(2)>::himask(), simd128<(2)>::umax(arg1, arg2)), simd128<(2)>::umax(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));1593 return simd_or(arg1, arg2); 1604 1594 } 1605 1595 … … 1654 1644 } 1655 1645 1656 //The total number of operations is 371646 //The total number of operations is 2 1657 1647 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2) 1658 1648 { 1659 return simd_ or(simd_and(simd128<(2)>::himask(), simd128<(2)>::eq(simd_and(simd128<(2)>::himask(), arg1), simd_and(simd128<(2)>::himask(), arg2))), simd_and(simd128<(2)>::lomask(), simd128<(2)>::eq(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2))));1660 } 1661 1662 //The total number of operations is 151649 return simd_not(simd_xor(arg1, arg2)); 1650 } 1651 1652 //The total number of operations is 8 1663 1653 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2) 1664 1654 { 1665 bitblock128_t tmp = simd_xor(arg1, arg2); 1666 bitblock128_t tmpAns = simd_and(simd_not(simd128<128>::slli<1>(tmp)), simd_not(tmp)); 1667 return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns)); 1655 bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2); 1656 bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns)); 1657 bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask); 1658 return simd_or(loMask, hiMask); 1668 1659 } 1669 1660 … … 1850 1841 } 1851 1842 1852 //The total number of operations is 461853 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)1854 {1855 bitblock128_t gtMask = simd128<1>::gt(arg1, simd128<1>::constant<0>());1856 return simd128<1>::ifh(gtMask, arg1, simd128<1>::sub(gtMask, arg1));1857 }1858 1859 1843 //The total number of operations is 9 1860 1844 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1) … … 1905 1889 } 1906 1890 1907 //The total number of operations is 1281891 //The total number of operations is 93 1908 1892 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2) 1909 1893 { … … 1989 1973 } 1990 1974 1991 //The total number of operations is 1 321975 //The total number of operations is 120 1992 1976 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packss(bitblock128_t arg1, bitblock128_t arg2) 1993 1977 { … … 2163 2147 } 2164 2148 2165 //The total number of operations is 1312149 //The total number of operations is 93 2166 2150 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::min_hl(bitblock128_t arg1, bitblock128_t arg2) 2167 2151 { … … 2169 2153 } 2170 2154 2171 //The total number of operations is 8 32155 //The total number of operations is 82 2172 2156 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::min_hl(bitblock128_t arg1, bitblock128_t arg2) 2173 2157 { … … 2205 2189 } 2206 2190 2207 //The total number of operations is 992191 //The total number of operations is 85 2208 2192 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packus(bitblock128_t arg1, bitblock128_t arg2) 2209 2193 {
Note: See TracChangeset
for help on using the changeset viewer.