Changeset 1573 for trunk


Ignore:
Timestamp:
Oct 22, 2011, 6:46:08 PM (8 years ago)
Author:
cameron
Message:

FW=1 operations

Location:
trunk
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx.cpp

    r1570 r1573  
    197197template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::popcount(bitblock256_t arg1);
    198198template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::popcount(bitblock256_t arg1);
    199 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::neg(bitblock256_t arg1);
    200199template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::neg(bitblock256_t arg1);
    201200template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::neg(bitblock256_t arg1);
     
    318317template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add(bitblock256_t arg1, bitblock256_t arg2);
    319318template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add(bitblock256_t arg1, bitblock256_t arg2);
    320 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::abs(bitblock256_t arg1);
    321319template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1);
    322320template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1);
     
    573571}
    574572
    575 //The total number of operations is 95
     573//The total number of operations is 1
    576574template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::max(bitblock256_t arg1, bitblock256_t arg2)
    577575{
    578         bitblock256_t high_bit = simd256<1>::constant<(1)>();
    579         return simd_xor(simd256<1>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    580 }
    581 
    582 //The total number of operations is 47
     576        return simd_and(arg1, arg2);
     577}
     578
     579//The total number of operations is 29
    583580template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::max(bitblock256_t arg1, bitblock256_t arg2)
    584581{
    585         bitblock256_t high_bit = simd256<2>::constant<(2)>();
    586         return simd_xor(simd256<2>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     582        bitblock256_t hiAns = simd256<(1)>::max(arg1, arg2);
     583        bitblock256_t loAns = simd256<(1)>::umax(arg1, arg2);
     584        bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(hiAns, arg1));
     585        bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(hiAns, arg2));
     586        return simd256<1>::ifh(simd256<2>::himask(), hiAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, loAns, arg1), arg2));
    587587}
    588588
     
    638638}
    639639
    640 //The total number of operations is 216
     640//The total number of operations is 1
    641641template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::mult(bitblock256_t arg1, bitblock256_t arg2)
    642642{
    643         bitblock256_t loMask = simd256<(2)>::lomask();
    644         bitblock256_t tmpAns1 = simd256<(2)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2));
    645         bitblock256_t tmpAns2 = simd256<(2)>::mult(simd256<(2)>::srli<1>(arg1), simd256<(2)>::srli<1>(arg2));
    646         return simd256<1>::ifh(loMask, tmpAns1, simd256<(2)>::slli<1>(tmpAns2));
     643        return simd_and(arg1, arg2);
    647644}
    648645
     
    727724}
    728725
    729 //The total number of operations is 124
     726//The total number of operations is 1
    730727template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::gt(bitblock256_t arg1, bitblock256_t arg2)
    731728{
    732         bitblock256_t high_bit = simd256<1>::constant<(1)>();
    733         return simd256<1>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    734 }
    735 
    736 //The total number of operations is 60
     729        return simd_andc(arg2, arg1);
     730}
     731
     732//The total number of operations is 30
    737733template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::gt(bitblock256_t arg1, bitblock256_t arg2)
    738734{
    739         bitblock256_t high_bit = simd256<2>::constant<(2)>();
    740         return simd256<2>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
     735        bitblock256_t hiAns = simd256<(1)>::gt(arg1, arg2);
     736        bitblock256_t loAns = simd256<(1)>::ugt(arg1, arg2);
     737        bitblock256_t mask = simd_and(loAns, simd256<2>::srli<(1)>(simd256<(1)>::eq(arg1, arg2)));
     738        mask = simd_or(mask, simd256<2>::slli<(1)>(mask));
     739        return simd_or(simd256<2>::srai<(1)>(hiAns), mask);
    741740}
    742741
     
    879878}
    880879
    881 //The total number of operations is 182
     880//The total number of operations is 1
    882881template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ult(bitblock256_t arg1, bitblock256_t arg2)
    883882{
    884         return simd256<1>::ifh(simd256<(2)>::himask(), simd256<(2)>::ult(arg1, simd_and(simd256<(2)>::himask(), arg2)), simd256<(2)>::ult(simd_andc(arg1, simd256<(2)>::himask()), simd_andc(arg2, simd256<(2)>::himask())));
    885 }
    886 
    887 //The total number of operations is 88
     883        return simd_andc(arg2, arg1);
     884}
     885
     886//The total number of operations is 29
    888887template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ult(bitblock256_t arg1, bitblock256_t arg2)
    889888{
    890         return simd_and(simd256<2>::srai<(1)>(simd_or(simd_and(simd_not(arg1), arg2), simd_and(simd_not(simd_xor(arg1, arg2)), simd256<2>::sub(arg1, arg2)))), simd_not(simd256<2>::eq(arg1, arg2)));
     889        bitblock256_t tmpAns = simd256<(1)>::ult(arg1, arg2);
     890        bitblock256_t mask = simd_and(tmpAns, simd256<2>::srli<(1)>(simd256<(1)>::eq(arg1, arg2)));
     891        mask = simd_or(mask, simd256<2>::slli<(1)>(mask));
     892        return simd_or(simd256<2>::srai<(1)>(tmpAns), mask);
    891893}
    892894
     
    940942}
    941943
    942 //The total number of operations is 184
     944//The total number of operations is 1
    943945template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::lt(bitblock256_t arg1, bitblock256_t arg2)
    944946{
    945         bitblock256_t high_bit = simd256<1>::constant<(1)>();
    946         return simd256<1>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    947 }
    948 
    949 //The total number of operations is 90
     947        return simd_andc(arg1, arg2);
     948}
     949
     950//The total number of operations is 30
    950951template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lt(bitblock256_t arg1, bitblock256_t arg2)
    951952{
    952         bitblock256_t high_bit = simd256<2>::constant<(2)>();
    953         return simd256<2>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
     953        bitblock256_t hiAns = simd256<(1)>::lt(arg1, arg2);
     954        bitblock256_t loAns = simd256<(1)>::ult(arg1, arg2);
     955        bitblock256_t mask = simd_and(loAns, simd256<2>::srli<(1)>(simd256<(1)>::eq(arg1, arg2)));
     956        mask = simd_or(mask, simd256<2>::slli<(1)>(mask));
     957        return simd_or(simd256<2>::srai<(1)>(hiAns), mask);
    954958}
    955959
     
    11071111}
    11081112
    1109 //The total number of operations is 122
     1113//The total number of operations is 1
    11101114template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ugt(bitblock256_t arg1, bitblock256_t arg2)
    11111115{
    1112         return simd256<1>::ifh(simd256<(2)>::himask(), simd256<(2)>::ugt(simd_and(simd256<(2)>::himask(), arg1), arg2), simd256<(2)>::ugt(simd_andc(arg1, simd256<(2)>::himask()), simd_andc(arg2, simd256<(2)>::himask())));
    1113 }
    1114 
    1115 //The total number of operations is 58
     1116        return simd_andc(arg1, arg2);
     1117}
     1118
     1119//The total number of operations is 29
    11161120template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ugt(bitblock256_t arg1, bitblock256_t arg2)
    11171121{
    1118         return simd256<1>::ifh(simd256<(4)>::himask(), simd256<(4)>::ugt(simd_and(simd256<(4)>::himask(), arg1), arg2), simd256<(4)>::ugt(simd_andc(arg1, simd256<(4)>::himask()), simd_andc(arg2, simd256<(4)>::himask())));
     1122        bitblock256_t tmpAns = simd256<(1)>::ugt(arg1, arg2);
     1123        bitblock256_t mask = simd_and(tmpAns, simd256<2>::srli<(1)>(simd256<(1)>::eq(arg1, arg2)));
     1124        mask = simd_or(mask, simd256<2>::slli<(1)>(mask));
     1125        return simd_or(simd256<2>::srai<(1)>(tmpAns), mask);
    11191126}
    11201127
     
    12731280        bitblock256_t tmpAns = simd256<(128)>::popcount(arg1);
    12741281        return simd256<(128)>::add(simd_and(tmpAns, simd256<256>::lomask()), simd256<256>::srli<(128)>(tmpAns));
    1275 }
    1276 
    1277 //The total number of operations is 1
    1278 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::neg(bitblock256_t arg1)
    1279 {
    1280         return simd256<1>::sub(simd256<1>::constant<0>(), arg1);
    12811282}
    12821283
     
    16471648}
    16481649
    1649 //The total number of operations is 95
     1650//The total number of operations is 1
    16501651template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::min(bitblock256_t arg1, bitblock256_t arg2)
    16511652{
    1652         bitblock256_t high_bit = simd256<1>::constant<(1)>();
    1653         return simd_xor(simd256<1>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    1654 }
    1655 
    1656 //The total number of operations is 47
     1653        return simd_or(arg1, arg2);
     1654}
     1655
     1656//The total number of operations is 29
    16571657template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::min(bitblock256_t arg1, bitblock256_t arg2)
    16581658{
    1659         bitblock256_t high_bit = simd256<2>::constant<(2)>();
    1660         return simd_xor(simd256<2>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     1659        bitblock256_t hiAns = simd256<(1)>::min(arg1, arg2);
     1660        bitblock256_t loAns = simd256<(1)>::umin(arg1, arg2);
     1661        bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(hiAns, arg1));
     1662        bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(hiAns, arg2));
     1663        return simd256<1>::ifh(simd256<2>::himask(), hiAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, loAns, arg1), arg2));
    16611664}
    16621665
     
    17121715}
    17131716
    1714 //The total number of operations is 92
     1717//The total number of operations is 1
    17151718template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2)
    17161719{
    1717         return simd_or(simd_and(simd256<(2)>::himask(), simd256<(2)>::umin(arg1, arg2)), simd256<(2)>::umin(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2)));
    1718 }
    1719 
    1720 //The total number of operations is 44
     1720        return simd_and(arg1, arg2);
     1721}
     1722
     1723//The total number of operations is 28
    17211724template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umin(bitblock256_t arg1, bitblock256_t arg2)
    17221725{
    1723         return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::umin(arg1, arg2)), simd256<(4)>::umin(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2)));
     1726        bitblock256_t tmpAns = simd256<(1)>::umin(arg1, arg2);
     1727        bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg1));
     1728        bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg2));
     1729        return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    17241730}
    17251731
     
    17731779}
    17741780
    1775 //The total number of operations is 92
     1781//The total number of operations is 1
    17761782template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2)
    17771783{
    1778         return simd_or(simd_and(simd256<(2)>::himask(), simd256<(2)>::umax(arg1, arg2)), simd256<(2)>::umax(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2)));
    1779 }
    1780 
    1781 //The total number of operations is 44
     1784        return simd_or(arg1, arg2);
     1785}
     1786
     1787//The total number of operations is 28
    17821788template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2)
    17831789{
    1784         return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::umax(arg1, arg2)), simd256<(4)>::umax(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2)));
     1790        bitblock256_t tmpAns = simd256<(1)>::umax(arg1, arg2);
     1791        bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg1));
     1792        bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg2));
     1793        return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    17851794}
    17861795
     
    18341843}
    18351844
    1836 //The total number of operations is 113
     1845//The total number of operations is 2
    18371846template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2)
    18381847{
    1839         return simd_or(simd_and(simd256<(2)>::himask(), simd256<(2)>::eq(simd_and(simd256<(2)>::himask(), arg1), simd_and(simd256<(2)>::himask(), arg2))), simd_and(simd256<(2)>::lomask(), simd256<(2)>::eq(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2))));
    1840 }
    1841 
    1842 //The total number of operations is 53
     1848        return simd_not(simd_xor(arg1, arg2));
     1849}
     1850
     1851//The total number of operations is 18
    18431852template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2)
    18441853{
    1845         return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::eq(simd_and(simd256<(4)>::himask(), arg1), simd_and(simd256<(4)>::himask(), arg2))), simd_and(simd256<(4)>::lomask(), simd256<(4)>::eq(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2))));
     1854        bitblock256_t tmpAns = simd256<(1)>::eq(arg1, arg2);
     1855        bitblock256_t loMask = simd_and(tmpAns, simd256<2>::srli<(1)>(tmpAns));
     1856        bitblock256_t hiMask = simd256<2>::slli<(1)>(loMask);
     1857        return simd_or(loMask, hiMask);
    18461858}
    18471859
     
    20612073}
    20622074
    2063 //The total number of operations is 128
    2064 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::abs(bitblock256_t arg1)
    2065 {
    2066         bitblock256_t gtMask = simd256<1>::gt(arg1, simd256<1>::constant<0>());
    2067         return simd256<1>::ifh(gtMask, arg1, simd256<1>::sub(gtMask, arg1));
    2068 }
    2069 
    20702075//The total number of operations is 45
    20712076template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1)
     
    21202125}
    21212126
    2122 //The total number of operations is 652
     2127//The total number of operations is 561
    21232128template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::umin_hl(bitblock256_t arg1, bitblock256_t arg2)
    21242129{
     
    21262131}
    21272132
    2128 //The total number of operations is 428
     2133//The total number of operations is 412
    21292134template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::umin_hl(bitblock256_t arg1, bitblock256_t arg2)
    21302135{
     
    22162221}
    22172222
    2218 //The total number of operations is 534
     2223//The total number of operations is 414
    22192224template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packss(bitblock256_t arg1, bitblock256_t arg2)
    22202225{
     
    24082413}
    24092414
    2410 //The total number of operations is 655
     2415//The total number of operations is 561
    24112416template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::min_hl(bitblock256_t arg1, bitblock256_t arg2)
    24122417{
     
    24142419}
    24152420
    2416 //The total number of operations is 431
     2421//The total number of operations is 413
    24172422template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::min_hl(bitblock256_t arg1, bitblock256_t arg2)
    24182423{
     
    24562461}
    24572462
    2458 //The total number of operations is 414
     2463//The total number of operations is 344
    24592464template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packus(bitblock256_t arg1, bitblock256_t arg2)
    24602465{
  • trunk/lib/idisa_cpp/idisa_sse2.cpp

    r1570 r1573  
    191191template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::popcount(bitblock128_t arg1);
    192192template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::popcount(bitblock128_t arg1);
    193 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1);
    194193template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1);
    195194template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::neg(bitblock128_t arg1);
     
    300299template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2);
    301300template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2);
    302 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1);
    303301template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
    304302template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
     
    528526}
    529527
    530 //The total number of operations is 39
     528//The total number of operations is 1
    531529template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2)
    532530{
    533         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    534         return simd_xor(simd128<1>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    535 }
    536 
    537 //The total number of operations is 19
     531        return simd_and(arg1, arg2);
     532}
     533
     534//The total number of operations is 18
    538535template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2)
    539536{
    540         return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, arg2), simd_or(simd_and(arg2, simd128<128>::srli<1>(simd_or(arg1, simd_not(arg2)))), simd_and(arg1, simd128<128>::srli<1>(simd_or(simd_not(arg1), arg2)))));
     537        return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg2, arg1);
    541538}
    542539
     
    582579}
    583580
    584 //The total number of operations is 57
     581//The total number of operations is 1
    585582template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::mult(bitblock128_t arg1, bitblock128_t arg2)
    586583{
    587         bitblock128_t loMask = simd128<(2)>::lomask();
    588         bitblock128_t tmpAns1 = simd128<(2)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2));
    589         bitblock128_t tmpAns2 = simd128<(2)>::mult(simd128<(2)>::srli<1>(arg1), simd128<(2)>::srli<1>(arg2));
    590         return simd128<1>::ifh(loMask, tmpAns1, simd128<(2)>::slli<1>(tmpAns2));
     584        return simd_and(arg1, arg2);
    591585}
    592586
     
    660654}
    661655
    662 //The total number of operations is 42
     656//The total number of operations is 1
    663657template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::gt(bitblock128_t arg1, bitblock128_t arg2)
    664658{
    665         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    666         return simd128<1>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    667 }
    668 
    669 //The total number of operations is 18
     659        return simd_andc(arg2, arg1);
     660}
     661
     662//The total number of operations is 15
    670663template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::gt(bitblock128_t arg1, bitblock128_t arg2)
    671664{
    672         bitblock128_t tmp = simd_not(arg1);
    673         bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(arg1, simd_not(arg2))), simd_or(tmp, arg2)));
    674         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     665        bitblock128_t hiAns = simd128<(1)>::gt(arg1, arg2);
     666        bitblock128_t loAns = simd128<(1)>::ugt(arg1, arg2);
     667        bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     668        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     669        return simd_or(simd128<2>::srai<(1)>(hiAns), mask);
    675670}
    676671
     
    788783}
    789784
    790 //The total number of operations is 40
     785//The total number of operations is 1
    791786template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ult(bitblock128_t arg1, bitblock128_t arg2)
    792787{
    793         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ult(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::ult(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));
    794 }
    795 
    796 //The total number of operations is 17
     788        return simd_andc(arg2, arg1);
     789}
     790
     791//The total number of operations is 14
    797792template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ult(bitblock128_t arg1, bitblock128_t arg2)
    798793{
    799         bitblock128_t tmp = simd_not(arg1);
    800         bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(tmp, arg2)), simd_or(tmp, arg2)));
    801         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     794        bitblock128_t tmpAns = simd128<(1)>::ult(arg1, arg2);
     795        bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     796        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     797        return simd_or(simd128<2>::srai<(1)>(tmpAns), mask);
    802798}
    803799
     
    844840}
    845841
    846 //The total number of operations is 42
     842//The total number of operations is 1
    847843template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::lt(bitblock128_t arg1, bitblock128_t arg2)
    848844{
    849         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    850         return simd128<1>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    851 }
    852 
    853 //The total number of operations is 18
     845        return simd_andc(arg1, arg2);
     846}
     847
     848//The total number of operations is 15
    854849template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lt(bitblock128_t arg1, bitblock128_t arg2)
    855850{
    856         bitblock128_t tmp = simd_not(arg2);
    857         bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(simd_not(arg1), arg2)), simd_or(arg1, tmp)));
    858         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     851        bitblock128_t hiAns = simd128<(1)>::lt(arg1, arg2);
     852        bitblock128_t loAns = simd128<(1)>::ult(arg1, arg2);
     853        bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     854        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     855        return simd_or(simd128<2>::srai<(1)>(hiAns), mask);
    859856}
    860857
     
    10031000}
    10041001
    1005 //The total number of operations is 40
     1002//The total number of operations is 1
    10061003template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10071004{
    1008         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ugt(simd_and(simd128<(2)>::himask(), arg1), arg2), simd128<(2)>::ugt(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));
    1009 }
    1010 
    1011 //The total number of operations is 17
     1005        return simd_andc(arg1, arg2);
     1006}
     1007
     1008//The total number of operations is 14
    10121009template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10131010{
    1014         bitblock128_t tmp = simd_not(arg2);
    1015         bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(arg1, tmp)), simd_or(arg1, tmp)));
    1016         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     1011        bitblock128_t tmpAns = simd128<(1)>::ugt(arg1, arg2);
     1012        bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     1013        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     1014        return simd_or(simd128<2>::srai<(1)>(tmpAns), mask);
    10171015}
    10181016
     
    11531151}
    11541152
    1155 //The total number of operations is 1
    1156 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)
    1157 {
    1158         return simd128<1>::sub(simd128<1>::constant<0>(), arg1);
    1159 }
    1160 
    11611153//The total number of operations is 8
    11621154template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1)
     
    14871479}
    14881480
    1489 //The total number of operations is 39
     1481//The total number of operations is 1
    14901482template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::min(bitblock128_t arg1, bitblock128_t arg2)
    14911483{
    1492         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    1493         return simd_xor(simd128<1>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    1494 }
    1495 
    1496 //The total number of operations is 19
     1484        return simd_or(arg1, arg2);
     1485}
     1486
     1487//The total number of operations is 18
    14971488template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::min(bitblock128_t arg1, bitblock128_t arg2)
    14981489{
    1499         bitblock128_t high_bit = simd128<2>::constant<(2)>();
    1500         return simd_xor(simd128<2>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     1490        return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg1, arg2);
    15011491}
    15021492
     
    15421532}
    15431533
    1544 //The total number of operations is 36
     1534//The total number of operations is 1
    15451535template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15461536{
    1547         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umin(arg1, arg2)), simd128<(2)>::umin(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));
     1537        return simd_and(arg1, arg2);
    15481538}
    15491539
     
    15981588}
    15991589
    1600 //The total number of operations is 36
     1590//The total number of operations is 1
    16011591template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2)
    16021592{
    1603         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umax(arg1, arg2)), simd128<(2)>::umax(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));
     1593        return simd_or(arg1, arg2);
    16041594}
    16051595
     
    16541644}
    16551645
    1656 //The total number of operations is 37
     1646//The total number of operations is 2
    16571647template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
    16581648{
    1659         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::eq(simd_and(simd128<(2)>::himask(), arg1), simd_and(simd128<(2)>::himask(), arg2))), simd_and(simd128<(2)>::lomask(), simd128<(2)>::eq(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2))));
    1660 }
    1661 
    1662 //The total number of operations is 15
     1649        return simd_not(simd_xor(arg1, arg2));
     1650}
     1651
     1652//The total number of operations is 8
    16631653template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
    16641654{
    1665         bitblock128_t tmp = simd_xor(arg1, arg2);
    1666         bitblock128_t tmpAns = simd_and(simd_not(simd128<128>::slli<1>(tmp)), simd_not(tmp));
    1667         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     1655        bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
     1656        bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
     1657        bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
     1658        return simd_or(loMask, hiMask);
    16681659}
    16691660
     
    18501841}
    18511842
    1852 //The total number of operations is 46
    1853 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)
    1854 {
    1855         bitblock128_t gtMask = simd128<1>::gt(arg1, simd128<1>::constant<0>());
    1856         return simd128<1>::ifh(gtMask, arg1, simd128<1>::sub(gtMask, arg1));
    1857 }
    1858 
    18591843//The total number of operations is 9
    18601844template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
     
    19051889}
    19061890
    1907 //The total number of operations is 128
     1891//The total number of operations is 93
    19081892template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2)
    19091893{
     
    19891973}
    19901974
    1991 //The total number of operations is 132
     1975//The total number of operations is 120
    19921976template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packss(bitblock128_t arg1, bitblock128_t arg2)
    19931977{
     
    21632147}
    21642148
    2165 //The total number of operations is 131
     2149//The total number of operations is 93
    21662150template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::min_hl(bitblock128_t arg1, bitblock128_t arg2)
    21672151{
     
    21692153}
    21702154
    2171 //The total number of operations is 83
     2155//The total number of operations is 82
    21722156template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::min_hl(bitblock128_t arg1, bitblock128_t arg2)
    21732157{
     
    22052189}
    22062190
    2207 //The total number of operations is 99
     2191//The total number of operations is 85
    22082192template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packus(bitblock128_t arg1, bitblock128_t arg2)
    22092193{
  • trunk/lib/idisa_cpp/idisa_sse3.cpp

    r1570 r1573  
    191191template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::popcount(bitblock128_t arg1);
    192192template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::popcount(bitblock128_t arg1);
    193 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1);
    194193template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1);
    195194template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::neg(bitblock128_t arg1);
     
    300299template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2);
    301300template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2);
    302 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1);
    303301template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
    304302template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
     
    528526}
    529527
    530 //The total number of operations is 39
     528//The total number of operations is 1
    531529template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2)
    532530{
    533         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    534         return simd_xor(simd128<1>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    535 }
    536 
    537 //The total number of operations is 19
     531        return simd_and(arg1, arg2);
     532}
     533
     534//The total number of operations is 18
    538535template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2)
    539536{
    540         return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, arg2), simd_or(simd_and(arg2, simd128<128>::srli<1>(simd_or(arg1, simd_not(arg2)))), simd_and(arg1, simd128<128>::srli<1>(simd_or(simd_not(arg1), arg2)))));
     537        return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg2, arg1);
    541538}
    542539
     
    582579}
    583580
    584 //The total number of operations is 57
     581//The total number of operations is 1
    585582template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::mult(bitblock128_t arg1, bitblock128_t arg2)
    586583{
    587         bitblock128_t loMask = simd128<(2)>::lomask();
    588         bitblock128_t tmpAns1 = simd128<(2)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2));
    589         bitblock128_t tmpAns2 = simd128<(2)>::mult(simd128<(2)>::srli<1>(arg1), simd128<(2)>::srli<1>(arg2));
    590         return simd128<1>::ifh(loMask, tmpAns1, simd128<(2)>::slli<1>(tmpAns2));
     584        return simd_and(arg1, arg2);
    591585}
    592586
     
    660654}
    661655
    662 //The total number of operations is 42
     656//The total number of operations is 1
    663657template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::gt(bitblock128_t arg1, bitblock128_t arg2)
    664658{
    665         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    666         return simd128<1>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    667 }
    668 
    669 //The total number of operations is 18
     659        return simd_andc(arg2, arg1);
     660}
     661
     662//The total number of operations is 15
    670663template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::gt(bitblock128_t arg1, bitblock128_t arg2)
    671664{
    672         bitblock128_t tmp = simd_not(arg1);
    673         bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(arg1, simd_not(arg2))), simd_or(tmp, arg2)));
    674         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     665        bitblock128_t hiAns = simd128<(1)>::gt(arg1, arg2);
     666        bitblock128_t loAns = simd128<(1)>::ugt(arg1, arg2);
     667        bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     668        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     669        return simd_or(simd128<2>::srai<(1)>(hiAns), mask);
    675670}
    676671
     
    788783}
    789784
    790 //The total number of operations is 40
     785//The total number of operations is 1
    791786template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ult(bitblock128_t arg1, bitblock128_t arg2)
    792787{
    793         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ult(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::ult(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));
    794 }
    795 
    796 //The total number of operations is 17
     788        return simd_andc(arg2, arg1);
     789}
     790
     791//The total number of operations is 14
    797792template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ult(bitblock128_t arg1, bitblock128_t arg2)
    798793{
    799         bitblock128_t tmp = simd_not(arg1);
    800         bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(tmp, arg2)), simd_or(tmp, arg2)));
    801         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     794        bitblock128_t tmpAns = simd128<(1)>::ult(arg1, arg2);
     795        bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     796        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     797        return simd_or(simd128<2>::srai<(1)>(tmpAns), mask);
    802798}
    803799
     
    844840}
    845841
    846 //The total number of operations is 42
     842//The total number of operations is 1
    847843template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::lt(bitblock128_t arg1, bitblock128_t arg2)
    848844{
    849         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    850         return simd128<1>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    851 }
    852 
    853 //The total number of operations is 18
     845        return simd_andc(arg1, arg2);
     846}
     847
     848//The total number of operations is 15
    854849template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lt(bitblock128_t arg1, bitblock128_t arg2)
    855850{
    856         bitblock128_t tmp = simd_not(arg2);
    857         bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(simd_not(arg1), arg2)), simd_or(arg1, tmp)));
    858         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     851        bitblock128_t hiAns = simd128<(1)>::lt(arg1, arg2);
     852        bitblock128_t loAns = simd128<(1)>::ult(arg1, arg2);
     853        bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     854        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     855        return simd_or(simd128<2>::srai<(1)>(hiAns), mask);
    859856}
    860857
     
    10031000}
    10041001
    1005 //The total number of operations is 40
     1002//The total number of operations is 1
    10061003template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10071004{
    1008         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ugt(simd_and(simd128<(2)>::himask(), arg1), arg2), simd128<(2)>::ugt(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));
    1009 }
    1010 
    1011 //The total number of operations is 17
     1005        return simd_andc(arg1, arg2);
     1006}
     1007
     1008//The total number of operations is 14
    10121009template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10131010{
    1014         bitblock128_t tmp = simd_not(arg2);
    1015         bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(arg1, tmp)), simd_or(arg1, tmp)));
    1016         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     1011        bitblock128_t tmpAns = simd128<(1)>::ugt(arg1, arg2);
     1012        bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     1013        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     1014        return simd_or(simd128<2>::srai<(1)>(tmpAns), mask);
    10171015}
    10181016
     
    11531151}
    11541152
    1155 //The total number of operations is 1
    1156 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)
    1157 {
    1158         return simd128<1>::sub(simd128<1>::constant<0>(), arg1);
    1159 }
    1160 
    11611153//The total number of operations is 8
    11621154template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1)
     
    14871479}
    14881480
    1489 //The total number of operations is 39
     1481//The total number of operations is 1
    14901482template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::min(bitblock128_t arg1, bitblock128_t arg2)
    14911483{
    1492         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    1493         return simd_xor(simd128<1>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    1494 }
    1495 
    1496 //The total number of operations is 19
     1484        return simd_or(arg1, arg2);
     1485}
     1486
     1487//The total number of operations is 18
    14971488template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::min(bitblock128_t arg1, bitblock128_t arg2)
    14981489{
    1499         bitblock128_t high_bit = simd128<2>::constant<(2)>();
    1500         return simd_xor(simd128<2>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     1490        return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg1, arg2);
    15011491}
    15021492
     
    15421532}
    15431533
    1544 //The total number of operations is 36
     1534//The total number of operations is 1
    15451535template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15461536{
    1547         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umin(arg1, arg2)), simd128<(2)>::umin(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));
     1537        return simd_and(arg1, arg2);
    15481538}
    15491539
     
    15981588}
    15991589
    1600 //The total number of operations is 36
     1590//The total number of operations is 1
    16011591template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2)
    16021592{
    1603         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umax(arg1, arg2)), simd128<(2)>::umax(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));
     1593        return simd_or(arg1, arg2);
    16041594}
    16051595
     
    16541644}
    16551645
    1656 //The total number of operations is 37
     1646//The total number of operations is 2
    16571647template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
    16581648{
    1659         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::eq(simd_and(simd128<(2)>::himask(), arg1), simd_and(simd128<(2)>::himask(), arg2))), simd_and(simd128<(2)>::lomask(), simd128<(2)>::eq(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2))));
    1660 }
    1661 
    1662 //The total number of operations is 15
     1649        return simd_not(simd_xor(arg1, arg2));
     1650}
     1651
     1652//The total number of operations is 8
    16631653template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
    16641654{
    1665         bitblock128_t tmp = simd_xor(arg1, arg2);
    1666         bitblock128_t tmpAns = simd_and(simd_not(simd128<128>::slli<1>(tmp)), simd_not(tmp));
    1667         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     1655        bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
     1656        bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
     1657        bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
     1658        return simd_or(loMask, hiMask);
    16681659}
    16691660
     
    18501841}
    18511842
    1852 //The total number of operations is 46
    1853 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)
    1854 {
    1855         bitblock128_t gtMask = simd128<1>::gt(arg1, simd128<1>::constant<0>());
    1856         return simd128<1>::ifh(gtMask, arg1, simd128<1>::sub(gtMask, arg1));
    1857 }
    1858 
    18591843//The total number of operations is 9
    18601844template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
     
    19051889}
    19061890
    1907 //The total number of operations is 128
     1891//The total number of operations is 93
    19081892template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2)
    19091893{
     
    19891973}
    19901974
    1991 //The total number of operations is 132
     1975//The total number of operations is 120
    19921976template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packss(bitblock128_t arg1, bitblock128_t arg2)
    19931977{
     
    21632147}
    21642148
    2165 //The total number of operations is 131
     2149//The total number of operations is 93
    21662150template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::min_hl(bitblock128_t arg1, bitblock128_t arg2)
    21672151{
     
    21692153}
    21702154
    2171 //The total number of operations is 83
     2155//The total number of operations is 82
    21722156template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::min_hl(bitblock128_t arg1, bitblock128_t arg2)
    21732157{
     
    22052189}
    22062190
    2207 //The total number of operations is 99
     2191//The total number of operations is 85
    22082192template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packus(bitblock128_t arg1, bitblock128_t arg2)
    22092193{
  • trunk/lib/idisa_cpp/idisa_sse4_1.cpp

    r1570 r1573  
    192192template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::popcount(bitblock128_t arg1);
    193193template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::popcount(bitblock128_t arg1);
    194 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1);
    195194template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1);
    196195template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::neg(bitblock128_t arg1);
     
    301300template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2);
    302301template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2);
    303 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1);
    304302template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
    305303template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
     
    533531}
    534532
    535 //The total number of operations is 39
     533//The total number of operations is 1
    536534template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2)
    537535{
    538         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    539         return simd_xor(simd128<1>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    540 }
    541 
    542 //The total number of operations is 19
     536        return simd_and(arg1, arg2);
     537}
     538
     539//The total number of operations is 18
    543540template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2)
    544541{
    545         return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, arg2), simd_or(simd_and(arg2, simd128<128>::srli<1>(simd_or(arg1, simd_not(arg2)))), simd_and(arg1, simd128<128>::srli<1>(simd_or(simd_not(arg1), arg2)))));
     542        return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg2, arg1);
    546543}
    547544
     
    591588}
    592589
    593 //The total number of operations is 57
     590//The total number of operations is 1
    594591template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::mult(bitblock128_t arg1, bitblock128_t arg2)
    595592{
    596         bitblock128_t loMask = simd128<(2)>::lomask();
    597         bitblock128_t tmpAns1 = simd128<(2)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2));
    598         bitblock128_t tmpAns2 = simd128<(2)>::mult(simd128<(2)>::srli<1>(arg1), simd128<(2)>::srli<1>(arg2));
    599         return simd128<1>::ifh(loMask, tmpAns1, simd128<(2)>::slli<1>(tmpAns2));
     593        return simd_and(arg1, arg2);
    600594}
    601595
     
    666660}
    667661
    668 //The total number of operations is 42
     662//The total number of operations is 1
    669663template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::gt(bitblock128_t arg1, bitblock128_t arg2)
    670664{
    671         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    672         return simd128<1>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    673 }
    674 
    675 //The total number of operations is 18
     665        return simd_andc(arg2, arg1);
     666}
     667
     668//The total number of operations is 15
    676669template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::gt(bitblock128_t arg1, bitblock128_t arg2)
    677670{
    678         bitblock128_t tmp = simd_not(arg1);
    679         bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(arg1, simd_not(arg2))), simd_or(tmp, arg2)));
    680         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     671        bitblock128_t hiAns = simd128<(1)>::gt(arg1, arg2);
     672        bitblock128_t loAns = simd128<(1)>::ugt(arg1, arg2);
     673        bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     674        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     675        return simd_or(simd128<2>::srai<(1)>(hiAns), mask);
    681676}
    682677
     
    794789}
    795790
    796 //The total number of operations is 40
     791//The total number of operations is 1
    797792template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ult(bitblock128_t arg1, bitblock128_t arg2)
    798793{
    799         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ult(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::ult(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));
    800 }
    801 
    802 //The total number of operations is 17
     794        return simd_andc(arg2, arg1);
     795}
     796
     797//The total number of operations is 14
    803798template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ult(bitblock128_t arg1, bitblock128_t arg2)
    804799{
    805         bitblock128_t tmp = simd_not(arg1);
    806         bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(tmp, arg2)), simd_or(tmp, arg2)));
    807         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     800        bitblock128_t tmpAns = simd128<(1)>::ult(arg1, arg2);
     801        bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     802        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     803        return simd_or(simd128<2>::srai<(1)>(tmpAns), mask);
    808804}
    809805
     
    850846}
    851847
    852 //The total number of operations is 42
     848//The total number of operations is 1
    853849template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::lt(bitblock128_t arg1, bitblock128_t arg2)
    854850{
    855         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    856         return simd128<1>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    857 }
    858 
    859 //The total number of operations is 18
     851        return simd_andc(arg1, arg2);
     852}
     853
     854//The total number of operations is 15
    860855template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lt(bitblock128_t arg1, bitblock128_t arg2)
    861856{
    862         bitblock128_t tmp = simd_not(arg2);
    863         bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(simd_not(arg1), arg2)), simd_or(arg1, tmp)));
    864         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     857        bitblock128_t hiAns = simd128<(1)>::lt(arg1, arg2);
     858        bitblock128_t loAns = simd128<(1)>::ult(arg1, arg2);
     859        bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     860        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     861        return simd_or(simd128<2>::srai<(1)>(hiAns), mask);
    865862}
    866863
     
    10091006}
    10101007
    1011 //The total number of operations is 40
     1008//The total number of operations is 1
    10121009template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10131010{
    1014         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ugt(simd_and(simd128<(2)>::himask(), arg1), arg2), simd128<(2)>::ugt(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));
    1015 }
    1016 
    1017 //The total number of operations is 17
     1011        return simd_andc(arg1, arg2);
     1012}
     1013
     1014//The total number of operations is 14
    10181015template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10191016{
    1020         bitblock128_t tmp = simd_not(arg2);
    1021         bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(arg1, tmp)), simd_or(arg1, tmp)));
    1022         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     1017        bitblock128_t tmpAns = simd128<(1)>::ugt(arg1, arg2);
     1018        bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     1019        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     1020        return simd_or(simd128<2>::srai<(1)>(tmpAns), mask);
    10231021}
    10241022
     
    11591157}
    11601158
    1161 //The total number of operations is 1
    1162 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)
    1163 {
    1164         return simd128<1>::sub(simd128<1>::constant<0>(), arg1);
    1165 }
    1166 
    11671159//The total number of operations is 8
    11681160template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1)
     
    14931485}
    14941486
    1495 //The total number of operations is 39
     1487//The total number of operations is 1
    14961488template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::min(bitblock128_t arg1, bitblock128_t arg2)
    14971489{
    1498         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    1499         return simd_xor(simd128<1>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    1500 }
    1501 
    1502 //The total number of operations is 19
     1490        return simd_or(arg1, arg2);
     1491}
     1492
     1493//The total number of operations is 18
    15031494template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::min(bitblock128_t arg1, bitblock128_t arg2)
    15041495{
    1505         bitblock128_t high_bit = simd128<2>::constant<(2)>();
    1506         return simd_xor(simd128<2>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     1496        return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg1, arg2);
    15071497}
    15081498
     
    15521542}
    15531543
    1554 //The total number of operations is 36
     1544//The total number of operations is 1
    15551545template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15561546{
    1557         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umin(arg1, arg2)), simd128<(2)>::umin(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));
     1547        return simd_and(arg1, arg2);
    15581548}
    15591549
     
    16061596}
    16071597
    1608 //The total number of operations is 36
     1598//The total number of operations is 1
    16091599template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2)
    16101600{
    1611         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umax(arg1, arg2)), simd128<(2)>::umax(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));
     1601        return simd_or(arg1, arg2);
    16121602}
    16131603
     
    16601650}
    16611651
    1662 //The total number of operations is 37
     1652//The total number of operations is 2
    16631653template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
    16641654{
    1665         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::eq(simd_and(simd128<(2)>::himask(), arg1), simd_and(simd128<(2)>::himask(), arg2))), simd_and(simd128<(2)>::lomask(), simd128<(2)>::eq(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2))));
    1666 }
    1667 
    1668 //The total number of operations is 15
     1655        return simd_not(simd_xor(arg1, arg2));
     1656}
     1657
     1658//The total number of operations is 8
    16691659template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
    16701660{
    1671         bitblock128_t tmp = simd_xor(arg1, arg2);
    1672         bitblock128_t tmpAns = simd_and(simd_not(simd128<128>::slli<1>(tmp)), simd_not(tmp));
    1673         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     1661        bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
     1662        bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
     1663        bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
     1664        return simd_or(loMask, hiMask);
    16741665}
    16751666
     
    18531844}
    18541845
    1855 //The total number of operations is 46
    1856 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)
    1857 {
    1858         bitblock128_t gtMask = simd128<1>::gt(arg1, simd128<1>::constant<0>());
    1859         return simd128<1>::ifh(gtMask, arg1, simd128<1>::sub(gtMask, arg1));
    1860 }
    1861 
    18621846//The total number of operations is 9
    18631847template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
     
    19051889}
    19061890
    1907 //The total number of operations is 128
     1891//The total number of operations is 93
    19081892template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2)
    19091893{
     
    19891973}
    19901974
    1991 //The total number of operations is 132
     1975//The total number of operations is 120
    19921976template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packss(bitblock128_t arg1, bitblock128_t arg2)
    19931977{
     
    21632147}
    21642148
    2165 //The total number of operations is 131
     2149//The total number of operations is 93
    21662150template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::min_hl(bitblock128_t arg1, bitblock128_t arg2)
    21672151{
     
    21692153}
    21702154
    2171 //The total number of operations is 83
     2155//The total number of operations is 82
    21722156template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::min_hl(bitblock128_t arg1, bitblock128_t arg2)
    21732157{
     
    22052189}
    22062190
    2207 //The total number of operations is 99
     2191//The total number of operations is 85
    22082192template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packus(bitblock128_t arg1, bitblock128_t arg2)
    22092193{
  • trunk/lib/idisa_cpp/idisa_sse4_2.cpp

    r1570 r1573  
    192192template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::popcount(bitblock128_t arg1);
    193193template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::popcount(bitblock128_t arg1);
    194 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1);
    195194template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1);
    196195template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::neg(bitblock128_t arg1);
     
    301300template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2);
    302301template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2);
    303 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1);
    304302template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
    305303template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
     
    533531}
    534532
    535 //The total number of operations is 39
     533//The total number of operations is 1
    536534template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2)
    537535{
    538         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    539         return simd_xor(simd128<1>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    540 }
    541 
    542 //The total number of operations is 19
     536        return simd_and(arg1, arg2);
     537}
     538
     539//The total number of operations is 18
    543540template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2)
    544541{
    545         return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, arg2), simd_or(simd_and(arg2, simd128<128>::srli<1>(simd_or(arg1, simd_not(arg2)))), simd_and(arg1, simd128<128>::srli<1>(simd_or(simd_not(arg1), arg2)))));
     542        return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg2, arg1);
    546543}
    547544
     
    587584}
    588585
    589 //The total number of operations is 57
     586//The total number of operations is 1
    590587template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::mult(bitblock128_t arg1, bitblock128_t arg2)
    591588{
    592         bitblock128_t loMask = simd128<(2)>::lomask();
    593         bitblock128_t tmpAns1 = simd128<(2)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2));
    594         bitblock128_t tmpAns2 = simd128<(2)>::mult(simd128<(2)>::srli<1>(arg1), simd128<(2)>::srli<1>(arg2));
    595         return simd128<1>::ifh(loMask, tmpAns1, simd128<(2)>::slli<1>(tmpAns2));
     589        return simd_and(arg1, arg2);
    596590}
    597591
     
    662656}
    663657
    664 //The total number of operations is 42
     658//The total number of operations is 1
    665659template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::gt(bitblock128_t arg1, bitblock128_t arg2)
    666660{
    667         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    668         return simd128<1>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    669 }
    670 
    671 //The total number of operations is 18
     661        return simd_andc(arg2, arg1);
     662}
     663
     664//The total number of operations is 15
    672665template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::gt(bitblock128_t arg1, bitblock128_t arg2)
    673666{
    674         bitblock128_t tmp = simd_not(arg1);
    675         bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(arg1, simd_not(arg2))), simd_or(tmp, arg2)));
    676         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     667        bitblock128_t hiAns = simd128<(1)>::gt(arg1, arg2);
     668        bitblock128_t loAns = simd128<(1)>::ugt(arg1, arg2);
     669        bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     670        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     671        return simd_or(simd128<2>::srai<(1)>(hiAns), mask);
    677672}
    678673
     
    786781}
    787782
    788 //The total number of operations is 40
     783//The total number of operations is 1
    789784template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ult(bitblock128_t arg1, bitblock128_t arg2)
    790785{
    791         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ult(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::ult(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));
    792 }
    793 
    794 //The total number of operations is 17
     786        return simd_andc(arg2, arg1);
     787}
     788
     789//The total number of operations is 14
    795790template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ult(bitblock128_t arg1, bitblock128_t arg2)
    796791{
    797         bitblock128_t tmp = simd_not(arg1);
    798         bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(tmp, arg2)), simd_or(tmp, arg2)));
    799         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     792        bitblock128_t tmpAns = simd128<(1)>::ult(arg1, arg2);
     793        bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     794        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     795        return simd_or(simd128<2>::srai<(1)>(tmpAns), mask);
    800796}
    801797
     
    843839}
    844840
    845 //The total number of operations is 42
     841//The total number of operations is 1
    846842template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::lt(bitblock128_t arg1, bitblock128_t arg2)
    847843{
    848         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    849         return simd128<1>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    850 }
    851 
    852 //The total number of operations is 18
     844        return simd_andc(arg1, arg2);
     845}
     846
     847//The total number of operations is 15
    853848template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lt(bitblock128_t arg1, bitblock128_t arg2)
    854849{
    855         bitblock128_t tmp = simd_not(arg2);
    856         bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(simd_not(arg1), arg2)), simd_or(arg1, tmp)));
    857         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     850        bitblock128_t hiAns = simd128<(1)>::lt(arg1, arg2);
     851        bitblock128_t loAns = simd128<(1)>::ult(arg1, arg2);
     852        bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     853        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     854        return simd_or(simd128<2>::srai<(1)>(hiAns), mask);
    858855}
    859856
     
    1001998}
    1002999
    1003 //The total number of operations is 40
     1000//The total number of operations is 1
    10041001template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10051002{
    1006         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ugt(simd_and(simd128<(2)>::himask(), arg1), arg2), simd128<(2)>::ugt(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));
    1007 }
    1008 
    1009 //The total number of operations is 17
     1003        return simd_andc(arg1, arg2);
     1004}
     1005
     1006//The total number of operations is 14
    10101007template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10111008{
    1012         bitblock128_t tmp = simd_not(arg2);
    1013         bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(arg1, tmp)), simd_or(arg1, tmp)));
    1014         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     1009        bitblock128_t tmpAns = simd128<(1)>::ugt(arg1, arg2);
     1010        bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     1011        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     1012        return simd_or(simd128<2>::srai<(1)>(tmpAns), mask);
    10151013}
    10161014
     
    11491147}
    11501148
    1151 //The total number of operations is 1
    1152 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)
    1153 {
    1154         return simd128<1>::sub(simd128<1>::constant<0>(), arg1);
    1155 }
    1156 
    11571149//The total number of operations is 8
    11581150template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1)
     
    14831475}
    14841476
    1485 //The total number of operations is 39
     1477//The total number of operations is 1
    14861478template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::min(bitblock128_t arg1, bitblock128_t arg2)
    14871479{
    1488         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    1489         return simd_xor(simd128<1>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    1490 }
    1491 
    1492 //The total number of operations is 19
     1480        return simd_or(arg1, arg2);
     1481}
     1482
     1483//The total number of operations is 18
    14931484template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::min(bitblock128_t arg1, bitblock128_t arg2)
    14941485{
    1495         bitblock128_t high_bit = simd128<2>::constant<(2)>();
    1496         return simd_xor(simd128<2>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     1486        return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg1, arg2);
    14971487}
    14981488
     
    15381528}
    15391529
    1540 //The total number of operations is 36
     1530//The total number of operations is 1
    15411531template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15421532{
    1543         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umin(arg1, arg2)), simd128<(2)>::umin(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));
     1533        return simd_and(arg1, arg2);
    15441534}
    15451535
     
    15901580}
    15911581
    1592 //The total number of operations is 36
     1582//The total number of operations is 1
    15931583template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2)
    15941584{
    1595         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umax(arg1, arg2)), simd128<(2)>::umax(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));
     1585        return simd_or(arg1, arg2);
    15961586}
    15971587
     
    16421632}
    16431633
    1644 //The total number of operations is 37
     1634//The total number of operations is 2
    16451635template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
    16461636{
    1647         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::eq(simd_and(simd128<(2)>::himask(), arg1), simd_and(simd128<(2)>::himask(), arg2))), simd_and(simd128<(2)>::lomask(), simd128<(2)>::eq(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2))));
    1648 }
    1649 
    1650 //The total number of operations is 15
     1637        return simd_not(simd_xor(arg1, arg2));
     1638}
     1639
     1640//The total number of operations is 8
    16511641template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
    16521642{
    1653         bitblock128_t tmp = simd_xor(arg1, arg2);
    1654         bitblock128_t tmpAns = simd_and(simd_not(simd128<128>::slli<1>(tmp)), simd_not(tmp));
    1655         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     1643        bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
     1644        bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
     1645        bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
     1646        return simd_or(loMask, hiMask);
    16561647}
    16571648
     
    18351826}
    18361827
    1837 //The total number of operations is 46
    1838 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)
    1839 {
    1840         bitblock128_t gtMask = simd128<1>::gt(arg1, simd128<1>::constant<0>());
    1841         return simd128<1>::ifh(gtMask, arg1, simd128<1>::sub(gtMask, arg1));
    1842 }
    1843 
    18441828//The total number of operations is 9
    18451829template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
     
    18871871}
    18881872
    1889 //The total number of operations is 128
     1873//The total number of operations is 93
    18901874template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2)
    18911875{
     
    19711955}
    19721956
    1973 //The total number of operations is 132
     1957//The total number of operations is 120
    19741958template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packss(bitblock128_t arg1, bitblock128_t arg2)
    19751959{
     
    21452129}
    21462130
    2147 //The total number of operations is 131
     2131//The total number of operations is 93
    21482132template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::min_hl(bitblock128_t arg1, bitblock128_t arg2)
    21492133{
     
    21512135}
    21522136
    2153 //The total number of operations is 83
     2137//The total number of operations is 82
    21542138template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::min_hl(bitblock128_t arg1, bitblock128_t arg2)
    21552139{
     
    21872171}
    21882172
    2189 //The total number of operations is 99
     2173//The total number of operations is 85
    21902174template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packus(bitblock128_t arg1, bitblock128_t arg2)
    21912175{
  • trunk/lib/idisa_cpp/idisa_ssse3.cpp

    r1570 r1573  
    192192template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::popcount(bitblock128_t arg1);
    193193template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::popcount(bitblock128_t arg1);
    194 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1);
    195194template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1);
    196195template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::neg(bitblock128_t arg1);
     
    301300template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2);
    302301template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2);
    303 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1);
    304302template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
    305303template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
     
    533531}
    534532
    535 //The total number of operations is 39
     533//The total number of operations is 1
    536534template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2)
    537535{
    538         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    539         return simd_xor(simd128<1>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    540 }
    541 
    542 //The total number of operations is 19
     536        return simd_and(arg1, arg2);
     537}
     538
     539//The total number of operations is 18
    543540template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2)
    544541{
    545         return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, arg2), simd_or(simd_and(arg2, simd128<128>::srli<1>(simd_or(arg1, simd_not(arg2)))), simd_and(arg1, simd128<128>::srli<1>(simd_or(simd_not(arg1), arg2)))));
     542        return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg2, arg1);
    546543}
    547544
     
    587584}
    588585
    589 //The total number of operations is 57
     586//The total number of operations is 1
    590587template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::mult(bitblock128_t arg1, bitblock128_t arg2)
    591588{
    592         bitblock128_t loMask = simd128<(2)>::lomask();
    593         bitblock128_t tmpAns1 = simd128<(2)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2));
    594         bitblock128_t tmpAns2 = simd128<(2)>::mult(simd128<(2)>::srli<1>(arg1), simd128<(2)>::srli<1>(arg2));
    595         return simd128<1>::ifh(loMask, tmpAns1, simd128<(2)>::slli<1>(tmpAns2));
     589        return simd_and(arg1, arg2);
    596590}
    597591
     
    665659}
    666660
    667 //The total number of operations is 42
     661//The total number of operations is 1
    668662template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::gt(bitblock128_t arg1, bitblock128_t arg2)
    669663{
    670         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    671         return simd128<1>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    672 }
    673 
    674 //The total number of operations is 18
     664        return simd_andc(arg2, arg1);
     665}
     666
     667//The total number of operations is 15
    675668template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::gt(bitblock128_t arg1, bitblock128_t arg2)
    676669{
    677         bitblock128_t tmp = simd_not(arg1);
    678         bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(arg1, simd_not(arg2))), simd_or(tmp, arg2)));
    679         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     670        bitblock128_t hiAns = simd128<(1)>::gt(arg1, arg2);
     671        bitblock128_t loAns = simd128<(1)>::ugt(arg1, arg2);
     672        bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     673        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     674        return simd_or(simd128<2>::srai<(1)>(hiAns), mask);
    680675}
    681676
     
    793788}
    794789
    795 //The total number of operations is 40
     790//The total number of operations is 1
    796791template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ult(bitblock128_t arg1, bitblock128_t arg2)
    797792{
    798         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ult(arg1, simd_and(simd128<(2)>::himask(), arg2)), simd128<(2)>::ult(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));
    799 }
    800 
    801 //The total number of operations is 17
     793        return simd_andc(arg2, arg1);
     794}
     795
     796//The total number of operations is 14
    802797template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ult(bitblock128_t arg1, bitblock128_t arg2)
    803798{
    804         bitblock128_t tmp = simd_not(arg1);
    805         bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(tmp, arg2)), simd_or(tmp, arg2)));
    806         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     799        bitblock128_t tmpAns = simd128<(1)>::ult(arg1, arg2);
     800        bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     801        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     802        return simd_or(simd128<2>::srai<(1)>(tmpAns), mask);
    807803}
    808804
     
    849845}
    850846
    851 //The total number of operations is 42
     847//The total number of operations is 1
    852848template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::lt(bitblock128_t arg1, bitblock128_t arg2)
    853849{
    854         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    855         return simd128<1>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
    856 }
    857 
    858 //The total number of operations is 18
     850        return simd_andc(arg1, arg2);
     851}
     852
     853//The total number of operations is 15
    859854template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lt(bitblock128_t arg1, bitblock128_t arg2)
    860855{
    861         bitblock128_t tmp = simd_not(arg2);
    862         bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(simd_not(arg1), arg2)), simd_or(arg1, tmp)));
    863         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     856        bitblock128_t hiAns = simd128<(1)>::lt(arg1, arg2);
     857        bitblock128_t loAns = simd128<(1)>::ult(arg1, arg2);
     858        bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     859        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     860        return simd_or(simd128<2>::srai<(1)>(hiAns), mask);
    864861}
    865862
     
    10081005}
    10091006
    1010 //The total number of operations is 40
     1007//The total number of operations is 1
    10111008template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10121009{
    1013         return simd128<1>::ifh(simd128<(2)>::himask(), simd128<(2)>::ugt(simd_and(simd128<(2)>::himask(), arg1), arg2), simd128<(2)>::ugt(simd_andc(arg1, simd128<(2)>::himask()), simd_andc(arg2, simd128<(2)>::himask())));
    1014 }
    1015 
    1016 //The total number of operations is 17
     1010        return simd_andc(arg1, arg2);
     1011}
     1012
     1013//The total number of operations is 14
    10171014template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ugt(bitblock128_t arg1, bitblock128_t arg2)
    10181015{
    1019         bitblock128_t tmp = simd_not(arg2);
    1020         bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(arg1, tmp)), simd_or(arg1, tmp)));
    1021         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     1016        bitblock128_t tmpAns = simd128<(1)>::ugt(arg1, arg2);
     1017        bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2)));
     1018        mask = simd_or(mask, simd128<2>::slli<(1)>(mask));
     1019        return simd_or(simd128<2>::srai<(1)>(tmpAns), mask);
    10221020}
    10231021
     
    11581156}
    11591157
    1160 //The total number of operations is 1
    1161 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::neg(bitblock128_t arg1)
    1162 {
    1163         return simd128<1>::sub(simd128<1>::constant<0>(), arg1);
    1164 }
    1165 
    11661158//The total number of operations is 8
    11671159template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1)
     
    14921484}
    14931485
    1494 //The total number of operations is 39
     1486//The total number of operations is 1
    14951487template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::min(bitblock128_t arg1, bitblock128_t arg2)
    14961488{
    1497         bitblock128_t high_bit = simd128<1>::constant<(1)>();
    1498         return simd_xor(simd128<1>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    1499 }
    1500 
    1501 //The total number of operations is 19
     1489        return simd_or(arg1, arg2);
     1490}
     1491
     1492//The total number of operations is 18
    15021493template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::min(bitblock128_t arg1, bitblock128_t arg2)
    15031494{
    1504         bitblock128_t high_bit = simd128<2>::constant<(2)>();
    1505         return simd_xor(simd128<2>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     1495        return simd128<1>::ifh(simd128<2>::lt(arg1, arg2), arg1, arg2);
    15061496}
    15071497
     
    15471537}
    15481538
    1549 //The total number of operations is 36
     1539//The total number of operations is 1
    15501540template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2)
    15511541{
    1552         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umin(arg1, arg2)), simd128<(2)>::umin(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));
     1542        return simd_and(arg1, arg2);
    15531543}
    15541544
     
    16031593}
    16041594
    1605 //The total number of operations is 36
     1595//The total number of operations is 1
    16061596template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2)
    16071597{
    1608         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::umax(arg1, arg2)), simd128<(2)>::umax(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2)));
     1598        return simd_or(arg1, arg2);
    16091599}
    16101600
     
    16591649}
    16601650
    1661 //The total number of operations is 37
     1651//The total number of operations is 2
    16621652template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
    16631653{
    1664         return simd_or(simd_and(simd128<(2)>::himask(), simd128<(2)>::eq(simd_and(simd128<(2)>::himask(), arg1), simd_and(simd128<(2)>::himask(), arg2))), simd_and(simd128<(2)>::lomask(), simd128<(2)>::eq(simd_and(simd128<(2)>::lomask(), arg1), simd_and(simd128<(2)>::lomask(), arg2))));
    1665 }
    1666 
    1667 //The total number of operations is 15
     1654        return simd_not(simd_xor(arg1, arg2));
     1655}
     1656
     1657//The total number of operations is 8
    16681658template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
    16691659{
    1670         bitblock128_t tmp = simd_xor(arg1, arg2);
    1671         bitblock128_t tmpAns = simd_and(simd_not(simd128<128>::slli<1>(tmp)), simd_not(tmp));
    1672         return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns));
     1660        bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
     1661        bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
     1662        bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
     1663        return simd_or(loMask, hiMask);
    16731664}
    16741665
     
    18551846}
    18561847
    1857 //The total number of operations is 46
    1858 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::abs(bitblock128_t arg1)
    1859 {
    1860         bitblock128_t gtMask = simd128<1>::gt(arg1, simd128<1>::constant<0>());
    1861         return simd128<1>::ifh(gtMask, arg1, simd128<1>::sub(gtMask, arg1));
    1862 }
    1863 
    18641848//The total number of operations is 9
    18651849template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
     
    19071891}
    19081892
    1909 //The total number of operations is 128
     1893//The total number of operations is 93
    19101894template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2)
    19111895{
     
    19911975}
    19921976
    1993 //The total number of operations is 132
     1977//The total number of operations is 120
    19941978template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packss(bitblock128_t arg1, bitblock128_t arg2)
    19951979{
     
    21652149}
    21662150
    2167 //The total number of operations is 131
     2151//The total number of operations is 93
    21682152template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::min_hl(bitblock128_t arg1, bitblock128_t arg2)
    21692153{
     
    21712155}
    21722156
    2173 //The total number of operations is 83
     2157//The total number of operations is 82
    21742158template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::min_hl(bitblock128_t arg1, bitblock128_t arg2)
    21752159{
     
    22072191}
    22082192
    2209 //The total number of operations is 99
     2193//The total number of operations is 85
    22102194template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packus(bitblock128_t arg1, bitblock128_t arg2)
    22112195{
  • trunk/libgen/Library_Generator/StrategyPool.py

    r1570 r1573  
    88                "body":r'''return simd_xor(arg1, arg2)''',
    99                "Ops":["simd_add", "simd_sub"],
     10                "Fws":[1],
     11                "Platforms":[configure.ALL],
     12                },
     13                "umin1":\
     14                {
     15                "body":r'''return simd_and(arg1, arg2)''',
     16                "Ops":["simd_max", "simd_umin", "simd_mult"],
     17                "Fws":[1],
     18                "Platforms":[configure.ALL],
     19                },
     20                "umax1":\
     21                {
     22                "body":r'''return simd_or(arg1, arg2)''',
     23                "Ops":["simd_min", "simd_umax"],
     24                "Fws":[1],
     25                "Platforms":[configure.ALL],
     26                },
     27                "ult1":\
     28                {
     29                "body":r'''return simd_andc(arg2, arg1)''',
     30                "Ops":["simd_ult", "simd_gt"],
     31                "Fws":[1],
     32                "Platforms":[configure.ALL],
     33                },
     34                "ugt1":\
     35                {
     36                "body":r'''return simd_andc(arg1, arg2)''',
     37                "Ops":["simd_lt", "simd_ugt"],
     38                "Fws":[1],
     39                "Platforms":[configure.ALL],
     40                },
     41
     42                "eq1":\
     43                {
     44                "body":r'''return simd_not(simd_xor(arg1, arg2))''',
     45                "Ops":["simd_eq"],
     46                "Fws":[1],
     47                "Platforms":[configure.ALL],
     48                },
     49                "ctz1":\
     50                {
     51                "body":r'''return simd_not(arg1)''',
     52                "Ops":["simd_ctz"],
    1053                "Fws":[1],
    1154                "Platforms":[configure.ALL],
     
    390433return simd_ifh(1, gtMask, arg1, simd_sub(fw, gtMask, arg1))''',
    391434                "Ops":["simd_abs"],
    392                 "Fws":[-1],
     435                "Fws":range(2, curRegSize+1),
    393436                "Platforms":[configure.ALL],
    394437                },
     
    400443return simd_ifh(1, eqMask, arg1, simd_sub(fw, eqMask, arg1))''',
    401444                "Ops":["simd_abs"],
    402                 "Fws":[-1],
     445                "Fws":range(2, curRegSize+1),
    403446                "Platforms":[configure.ALL],
    404447                },
     
    409452return simd_sub(fw, simd_constant(fw, 0), arg1)''',
    410453                "Ops":["simd_neg"],
    411                 "Fws":[-1],
     454                "Fws":range(2, curRegSize+1),
    412455                "Platforms":[configure.ALL],
    413456                },
  • trunk/libgen/Library_Tester/utility.h

    r1570 r1573  
    1 #include "idisa_sse4_2.h"
     1#include "idisa_sse2.h"
    22#define USE_SSE
    33typedef __m128i SIMD_type;
Note: See TracChangeset for help on using the changeset viewer.