Changeset 1573 for trunk/lib/idisa_cpp/idisa_avx.cpp
 Timestamp:
 Oct 22, 2011, 6:46:08 PM (8 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/idisa_cpp/idisa_avx.cpp
r1570 r1573 197 197 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::popcount(bitblock256_t arg1); 198 198 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::popcount(bitblock256_t arg1); 199 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::neg(bitblock256_t arg1);200 199 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::neg(bitblock256_t arg1); 201 200 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::neg(bitblock256_t arg1); … … 318 317 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add(bitblock256_t arg1, bitblock256_t arg2); 319 318 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add(bitblock256_t arg1, bitblock256_t arg2); 320 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::abs(bitblock256_t arg1);321 319 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1); 322 320 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1); … … 573 571 } 574 572 575 //The total number of operations is 95573 //The total number of operations is 1 576 574 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::max(bitblock256_t arg1, bitblock256_t arg2) 577 575 { 578 bitblock256_t high_bit = simd256<1>::constant<(1)>(); 579 return simd_xor(simd256<1>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 580 } 581 582 //The total number of operations is 47 576 return simd_and(arg1, arg2); 577 } 578 579 //The total number of operations is 29 583 580 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::max(bitblock256_t arg1, bitblock256_t arg2) 584 581 { 585 bitblock256_t high_bit = simd256<2>::constant<(2)>(); 586 return simd_xor(simd256<2>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 582 bitblock256_t hiAns = simd256<(1)>::max(arg1, arg2); 583 bitblock256_t loAns = simd256<(1)>::umax(arg1, arg2); 584 bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(hiAns, arg1)); 585 bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(hiAns, arg2)); 586 return simd256<1>::ifh(simd256<2>::himask(), hiAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, loAns, arg1), arg2)); 587 587 } 588 588 … … 638 638 } 639 639 640 //The total number of operations is 216640 //The total number of operations is 1 641 641 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::mult(bitblock256_t arg1, bitblock256_t arg2) 642 642 { 643 bitblock256_t loMask = simd256<(2)>::lomask(); 644 bitblock256_t tmpAns1 = simd256<(2)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2)); 645 bitblock256_t tmpAns2 = simd256<(2)>::mult(simd256<(2)>::srli<1>(arg1), simd256<(2)>::srli<1>(arg2)); 646 return simd256<1>::ifh(loMask, tmpAns1, simd256<(2)>::slli<1>(tmpAns2)); 643 return simd_and(arg1, arg2); 647 644 } 648 645 … … 727 724 } 728 725 729 //The total number of operations is 1 24726 //The total number of operations is 1 730 727 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::gt(bitblock256_t arg1, bitblock256_t arg2) 731 728 { 732 bitblock256_t high_bit = simd256<1>::constant<(1)>(); 733 return simd256<1>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 734 } 735 736 //The total number of operations is 60 729 return simd_andc(arg2, arg1); 730 } 731 732 //The total number of operations is 30 737 733 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::gt(bitblock256_t arg1, bitblock256_t arg2) 738 734 { 739 bitblock256_t high_bit = simd256<2>::constant<(2)>(); 740 return simd256<2>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 735 bitblock256_t hiAns = simd256<(1)>::gt(arg1, arg2); 736 bitblock256_t loAns = simd256<(1)>::ugt(arg1, arg2); 737 bitblock256_t mask = simd_and(loAns, simd256<2>::srli<(1)>(simd256<(1)>::eq(arg1, arg2))); 738 mask = simd_or(mask, simd256<2>::slli<(1)>(mask)); 739 return simd_or(simd256<2>::srai<(1)>(hiAns), mask); 741 740 } 742 741 … … 879 878 } 880 879 881 //The total number of operations is 1 82880 //The total number of operations is 1 882 881 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ult(bitblock256_t arg1, bitblock256_t arg2) 883 882 { 884 return simd 256<1>::ifh(simd256<(2)>::himask(), simd256<(2)>::ult(arg1, simd_and(simd256<(2)>::himask(), arg2)), simd256<(2)>::ult(simd_andc(arg1, simd256<(2)>::himask()), simd_andc(arg2, simd256<(2)>::himask())));885 } 886 887 //The total number of operations is 88883 return simd_andc(arg2, arg1); 884 } 885 886 //The total number of operations is 29 888 887 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ult(bitblock256_t arg1, bitblock256_t arg2) 889 888 { 890 return simd_and(simd256<2>::srai<(1)>(simd_or(simd_and(simd_not(arg1), arg2), simd_and(simd_not(simd_xor(arg1, arg2)), simd256<2>::sub(arg1, arg2)))), simd_not(simd256<2>::eq(arg1, arg2))); 889 bitblock256_t tmpAns = simd256<(1)>::ult(arg1, arg2); 890 bitblock256_t mask = simd_and(tmpAns, simd256<2>::srli<(1)>(simd256<(1)>::eq(arg1, arg2))); 891 mask = simd_or(mask, simd256<2>::slli<(1)>(mask)); 892 return simd_or(simd256<2>::srai<(1)>(tmpAns), mask); 891 893 } 892 894 … … 940 942 } 941 943 942 //The total number of operations is 1 84944 //The total number of operations is 1 943 945 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::lt(bitblock256_t arg1, bitblock256_t arg2) 944 946 { 945 bitblock256_t high_bit = simd256<1>::constant<(1)>(); 946 return simd256<1>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 947 } 948 949 //The total number of operations is 90 947 return simd_andc(arg1, arg2); 948 } 949 950 //The total number of operations is 30 950 951 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lt(bitblock256_t arg1, bitblock256_t arg2) 951 952 { 952 bitblock256_t high_bit = simd256<2>::constant<(2)>(); 953 return simd256<2>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 953 bitblock256_t hiAns = simd256<(1)>::lt(arg1, arg2); 954 bitblock256_t loAns = simd256<(1)>::ult(arg1, arg2); 955 bitblock256_t mask = simd_and(loAns, simd256<2>::srli<(1)>(simd256<(1)>::eq(arg1, arg2))); 956 mask = simd_or(mask, simd256<2>::slli<(1)>(mask)); 957 return simd_or(simd256<2>::srai<(1)>(hiAns), mask); 954 958 } 955 959 … … 1107 1111 } 1108 1112 1109 //The total number of operations is 1 221113 //The total number of operations is 1 1110 1114 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1111 1115 { 1112 return simd 256<1>::ifh(simd256<(2)>::himask(), simd256<(2)>::ugt(simd_and(simd256<(2)>::himask(), arg1), arg2), simd256<(2)>::ugt(simd_andc(arg1, simd256<(2)>::himask()), simd_andc(arg2, simd256<(2)>::himask())));1113 } 1114 1115 //The total number of operations is 581116 return simd_andc(arg1, arg2); 1117 } 1118 1119 //The total number of operations is 29 1116 1120 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1117 1121 { 1118 return simd256<1>::ifh(simd256<(4)>::himask(), simd256<(4)>::ugt(simd_and(simd256<(4)>::himask(), arg1), arg2), simd256<(4)>::ugt(simd_andc(arg1, simd256<(4)>::himask()), simd_andc(arg2, simd256<(4)>::himask()))); 1122 bitblock256_t tmpAns = simd256<(1)>::ugt(arg1, arg2); 1123 bitblock256_t mask = simd_and(tmpAns, simd256<2>::srli<(1)>(simd256<(1)>::eq(arg1, arg2))); 1124 mask = simd_or(mask, simd256<2>::slli<(1)>(mask)); 1125 return simd_or(simd256<2>::srai<(1)>(tmpAns), mask); 1119 1126 } 1120 1127 … … 1273 1280 bitblock256_t tmpAns = simd256<(128)>::popcount(arg1); 1274 1281 return simd256<(128)>::add(simd_and(tmpAns, simd256<256>::lomask()), simd256<256>::srli<(128)>(tmpAns)); 1275 }1276 1277 //The total number of operations is 11278 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::neg(bitblock256_t arg1)1279 {1280 return simd256<1>::sub(simd256<1>::constant<0>(), arg1);1281 1282 } 1282 1283 … … 1647 1648 } 1648 1649 1649 //The total number of operations is 951650 //The total number of operations is 1 1650 1651 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::min(bitblock256_t arg1, bitblock256_t arg2) 1651 1652 { 1652 bitblock256_t high_bit = simd256<1>::constant<(1)>(); 1653 return simd_xor(simd256<1>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 1654 } 1655 1656 //The total number of operations is 47 1653 return simd_or(arg1, arg2); 1654 } 1655 1656 //The total number of operations is 29 1657 1657 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::min(bitblock256_t arg1, bitblock256_t arg2) 1658 1658 { 1659 bitblock256_t high_bit = simd256<2>::constant<(2)>(); 1660 return simd_xor(simd256<2>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 1659 bitblock256_t hiAns = simd256<(1)>::min(arg1, arg2); 1660 bitblock256_t loAns = simd256<(1)>::umin(arg1, arg2); 1661 bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(hiAns, arg1)); 1662 bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(hiAns, arg2)); 1663 return simd256<1>::ifh(simd256<2>::himask(), hiAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, loAns, arg1), arg2)); 1661 1664 } 1662 1665 … … 1712 1715 } 1713 1716 1714 //The total number of operations is 921717 //The total number of operations is 1 1715 1718 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2) 1716 1719 { 1717 return simd_ or(simd_and(simd256<(2)>::himask(), simd256<(2)>::umin(arg1, arg2)), simd256<(2)>::umin(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2)));1718 } 1719 1720 //The total number of operations is 441720 return simd_and(arg1, arg2); 1721 } 1722 1723 //The total number of operations is 28 1721 1724 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umin(bitblock256_t arg1, bitblock256_t arg2) 1722 1725 { 1723 return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::umin(arg1, arg2)), simd256<(4)>::umin(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2))); 1726 bitblock256_t tmpAns = simd256<(1)>::umin(arg1, arg2); 1727 bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg1)); 1728 bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg2)); 1729 return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2)); 1724 1730 } 1725 1731 … … 1773 1779 } 1774 1780 1775 //The total number of operations is 921781 //The total number of operations is 1 1776 1782 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2) 1777 1783 { 1778 return simd_or( simd_and(simd256<(2)>::himask(), simd256<(2)>::umax(arg1, arg2)), simd256<(2)>::umax(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2)));1779 } 1780 1781 //The total number of operations is 441784 return simd_or(arg1, arg2); 1785 } 1786 1787 //The total number of operations is 28 1782 1788 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2) 1783 1789 { 1784 return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::umax(arg1, arg2)), simd256<(4)>::umax(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2))); 1790 bitblock256_t tmpAns = simd256<(1)>::umax(arg1, arg2); 1791 bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg1)); 1792 bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg2)); 1793 return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2)); 1785 1794 } 1786 1795 … … 1834 1843 } 1835 1844 1836 //The total number of operations is 1131845 //The total number of operations is 2 1837 1846 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2) 1838 1847 { 1839 return simd_ or(simd_and(simd256<(2)>::himask(), simd256<(2)>::eq(simd_and(simd256<(2)>::himask(), arg1), simd_and(simd256<(2)>::himask(), arg2))), simd_and(simd256<(2)>::lomask(), simd256<(2)>::eq(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2))));1840 } 1841 1842 //The total number of operations is 531848 return simd_not(simd_xor(arg1, arg2)); 1849 } 1850 1851 //The total number of operations is 18 1843 1852 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2) 1844 1853 { 1845 return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::eq(simd_and(simd256<(4)>::himask(), arg1), simd_and(simd256<(4)>::himask(), arg2))), simd_and(simd256<(4)>::lomask(), simd256<(4)>::eq(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2)))); 1854 bitblock256_t tmpAns = simd256<(1)>::eq(arg1, arg2); 1855 bitblock256_t loMask = simd_and(tmpAns, simd256<2>::srli<(1)>(tmpAns)); 1856 bitblock256_t hiMask = simd256<2>::slli<(1)>(loMask); 1857 return simd_or(loMask, hiMask); 1846 1858 } 1847 1859 … … 2061 2073 } 2062 2074 2063 //The total number of operations is 1282064 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::abs(bitblock256_t arg1)2065 {2066 bitblock256_t gtMask = simd256<1>::gt(arg1, simd256<1>::constant<0>());2067 return simd256<1>::ifh(gtMask, arg1, simd256<1>::sub(gtMask, arg1));2068 }2069 2070 2075 //The total number of operations is 45 2071 2076 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1) … … 2120 2125 } 2121 2126 2122 //The total number of operations is 6522127 //The total number of operations is 561 2123 2128 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::umin_hl(bitblock256_t arg1, bitblock256_t arg2) 2124 2129 { … … 2126 2131 } 2127 2132 2128 //The total number of operations is 4 282133 //The total number of operations is 412 2129 2134 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::umin_hl(bitblock256_t arg1, bitblock256_t arg2) 2130 2135 { … … 2216 2221 } 2217 2222 2218 //The total number of operations is 5342223 //The total number of operations is 414 2219 2224 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packss(bitblock256_t arg1, bitblock256_t arg2) 2220 2225 { … … 2408 2413 } 2409 2414 2410 //The total number of operations is 6552415 //The total number of operations is 561 2411 2416 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::min_hl(bitblock256_t arg1, bitblock256_t arg2) 2412 2417 { … … 2414 2419 } 2415 2420 2416 //The total number of operations is 4 312421 //The total number of operations is 413 2417 2422 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::min_hl(bitblock256_t arg1, bitblock256_t arg2) 2418 2423 { … … 2456 2461 } 2457 2462 2458 //The total number of operations is 4142463 //The total number of operations is 344 2459 2464 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packus(bitblock256_t arg1, bitblock256_t arg2) 2460 2465 {
Note: See TracChangeset
for help on using the changeset viewer.