Ignore:
Timestamp:
Oct 23, 2011, 9:43:33 AM (8 years ago)
Author:
cameron
Message:

bitblock::srl, sll, srli, slli implementations

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_sse4_1.cpp

    r1573 r1580  
    2828        static IDISA_ALWAYS_INLINE bitblock128_t add_hl(bitblock128_t arg1);
    2929        static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t shift_mask);
     30        static IDISA_ALWAYS_INLINE bitblock128_t lomask();
    3031        static IDISA_ALWAYS_INLINE bitblock128_t umin(bitblock128_t arg1, bitblock128_t arg2);
    3132        template <uint64_t val> static IDISA_ALWAYS_INLINE bitblock128_t constant();
    3233        static IDISA_ALWAYS_INLINE bitblock128_t min(bitblock128_t arg1, bitblock128_t arg2);
    33         static IDISA_ALWAYS_INLINE bitblock128_t lomask();
    3434        static IDISA_ALWAYS_INLINE bitblock128_t umax(bitblock128_t arg1, bitblock128_t arg2);
    3535        static IDISA_ALWAYS_INLINE bitblock128_t abs(bitblock128_t arg1);
     
    8989{
    9090public:
     91        static IDISA_ALWAYS_INLINE bitblock128_t sll(bitblock128_t arg1, bitblock128_t arg2);
    9192        static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(bitblock128_t* arg1);
     93        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1);
     94        static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t arg2);
    9295        static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t* arg1, bitblock128_t arg2);
    9396        static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1);
    9497        static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1);
    9598        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1);
     99        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1);
    96100        static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(bitblock128_t* arg1);
    97101        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t* arg1, bitblock128_t arg2);
     
    231235template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srl(bitblock128_t arg1, bitblock128_t shift_mask);
    232236template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srl(bitblock128_t arg1, bitblock128_t shift_mask);
    233 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask();
    234 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask();
    235 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask();
    236 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask();
    237 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask();
    238 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask();
    239 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask();
    240237template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant();
    241238template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant();
     
    254251template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::min(bitblock128_t arg1, bitblock128_t arg2);
    255252template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2);
     253template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask();
     254template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask();
     255template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask();
     256template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask();
     257template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask();
     258template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask();
     259template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask();
    256260template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2);
    257261template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umin(bitblock128_t arg1, bitblock128_t arg2);
     
    262266template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umin(bitblock128_t arg1, bitblock128_t arg2);
    263267template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umin(bitblock128_t arg1, bitblock128_t arg2);
    264 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2);
    265 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2);
    266 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2);
    267 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2);
    268 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2);
    269 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2);
    270 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2);
    271 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2);
     268template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
     269template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
     270template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1);
     271template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1);
     272template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1);
     273template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1);
     274template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1);
    272275template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2);
    273276template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2);
     
    300303template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2);
    301304template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2);
    302 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
    303 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
    304 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1);
    305 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1);
    306 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1);
    307 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1);
    308 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1);
     305template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2);
     306template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2);
     307template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2);
     308template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2);
     309template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2);
     310template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2);
     311template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2);
     312template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2);
    309313template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2);
    310314template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::umin_hl(bitblock128_t arg1, bitblock128_t arg2);
     
    445449template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    446450template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    447 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1);
    448 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1);
    449 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1);
    450 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1);
    451 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1);
    452 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1);
    453 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1);
    454451template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    455452template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
     
    479476template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dslli(bitblock128_t arg1, bitblock128_t arg2);
    480477template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dslli(bitblock128_t arg1, bitblock128_t arg2);
     478template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1);
     479template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1);
     480template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1);
     481template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1);
     482template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1);
     483template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1);
     484template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1);
    481485template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
    482486template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
     
    13961400
    13971401//The total number of operations is 0
    1398 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask()
    1399 {
    1400         return simd128<2>::constant<(1)>();
    1401 }
    1402 
    1403 //The total number of operations is 0
    1404 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask()
    1405 {
    1406         return simd128<4>::constant<(3)>();
    1407 }
    1408 
    1409 //The total number of operations is 0
    1410 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask()
    1411 {
    1412         return simd128<8>::constant<(15)>();
    1413 }
    1414 
    1415 //The total number of operations is 0
    1416 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask()
    1417 {
    1418         return simd128<16>::constant<(255)>();
    1419 }
    1420 
    1421 //The total number of operations is 0
    1422 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask()
    1423 {
    1424         return simd128<32>::constant<(65535)>();
    1425 }
    1426 
    1427 //The total number of operations is 0
    1428 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask()
    1429 {
    1430         return _mm_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1));
    1431 }
    1432 
    1433 //The total number of operations is 0
    1434 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask()
    1435 {
    1436         return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1));
    1437 }
    1438 
    1439 //The total number of operations is 0
    14401402template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant()
    14411403{
     
    15421504}
    15431505
     1506//The total number of operations is 0
     1507template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask()
     1508{
     1509        return simd128<2>::constant<(1)>();
     1510}
     1511
     1512//The total number of operations is 0
     1513template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask()
     1514{
     1515        return simd128<4>::constant<(3)>();
     1516}
     1517
     1518//The total number of operations is 0
     1519template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask()
     1520{
     1521        return simd128<8>::constant<(15)>();
     1522}
     1523
     1524//The total number of operations is 0
     1525template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask()
     1526{
     1527        return simd128<16>::constant<(255)>();
     1528}
     1529
     1530//The total number of operations is 0
     1531template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask()
     1532{
     1533        return simd128<32>::constant<(65535)>();
     1534}
     1535
     1536//The total number of operations is 0
     1537template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask()
     1538{
     1539        return _mm_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1));
     1540}
     1541
     1542//The total number of operations is 0
     1543template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask()
     1544{
     1545        return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1));
     1546}
     1547
    15441548//The total number of operations is 1
    15451549template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2)
     
    15961600}
    15971601
     1602//The total number of operations is 9
     1603template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
     1604{
     1605        return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1);
     1606}
     1607
     1608//The total number of operations is 19
     1609template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1)
     1610{
     1611        bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>());
     1612        return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1));
     1613}
     1614
     1615//The total number of operations is 1
     1616template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1)
     1617{
     1618        return _mm_abs_epi8(arg1);
     1619}
     1620
     1621//The total number of operations is 1
     1622template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1)
     1623{
     1624        return _mm_abs_epi16(arg1);
     1625}
     1626
     1627//The total number of operations is 1
     1628template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1)
     1629{
     1630        return _mm_abs_epi32(arg1);
     1631}
     1632
     1633//The total number of operations is 9
     1634template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1)
     1635{
     1636        bitblock128_t eqMask = simd128<64>::eq(simd128<1>::ifh(simd128<64>::himask(), simd128<(32)>::abs(arg1), arg1), arg1);
     1637        return simd128<1>::ifh(eqMask, arg1, simd128<64>::sub(eqMask, arg1));
     1638}
     1639
     1640//The total number of operations is 37
     1641template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1)
     1642{
     1643        bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1);
     1644        return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1));
     1645}
     1646
     1647//The total number of operations is 2
     1648template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1649{
     1650        return simd_not(simd_xor(arg1, arg2));
     1651}
     1652
     1653//The total number of operations is 8
     1654template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1655{
     1656        bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
     1657        bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
     1658        bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
     1659        return simd_or(loMask, hiMask);
     1660}
     1661
     1662//The total number of operations is 9
     1663template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1664{
     1665        return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2))));
     1666}
     1667
     1668//The total number of operations is 1
     1669template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1670{
     1671        return _mm_cmpeq_epi8(arg1, arg2);
     1672}
     1673
     1674//The total number of operations is 1
     1675template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1676{
     1677        return _mm_cmpeq_epi16(arg1, arg2);
     1678}
     1679
     1680//The total number of operations is 1
     1681template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1682{
     1683        return _mm_cmpeq_epi32(arg1, arg2);
     1684}
     1685
     1686//The total number of operations is 1
     1687template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1688{
     1689        return _mm_cmpeq_epi64(arg1, arg2);
     1690}
     1691
     1692//The total number of operations is 11
     1693template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1694{
     1695        bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2);
     1696        bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns));
     1697        bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask);
     1698        return simd_or(loMask, hiMask);
     1699}
     1700
     1701//The total number of operations is 4
     1702template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)
     1703{
     1704        return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));
     1705}
     1706
     1707//The total number of operations is 10
     1708template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)
     1709{
     1710        bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
     1711        return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     1712}
     1713
     1714//The total number of operations is 5
     1715template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
     1716{
     1717        bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
     1718        return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     1719}
     1720
     1721//The total number of operations is 1
     1722template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
     1723{
     1724        return _mm_srai_epi16(arg1, (int32_t)(sh));
     1725}
     1726
     1727//The total number of operations is 1
     1728template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
     1729{
     1730        return _mm_srai_epi32(arg1, (int32_t)(sh));
     1731}
     1732
     1733//The total number of operations is 5
     1734template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
     1735{
     1736        bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
     1737        return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
     1738}
     1739
     1740//The total number of operations is 21
     1741template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
     1742{
     1743        bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
     1744        return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1745}
     1746
     1747//The total number of operations is 0
     1748template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask()
     1749{
     1750        return simd128<2>::constant<(2)>();
     1751}
     1752
     1753//The total number of operations is 0
     1754template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask()
     1755{
     1756        return simd128<4>::constant<(12)>();
     1757}
     1758
     1759//The total number of operations is 0
     1760template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::himask()
     1761{
     1762        return simd128<8>::constant<(240)>();
     1763}
     1764
     1765//The total number of operations is 0
     1766template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::himask()
     1767{
     1768        return simd128<16>::constant<(65280)>();
     1769}
     1770
     1771//The total number of operations is 0
     1772template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask()
     1773{
     1774        return simd128<32>::constant<-65536>();
     1775}
     1776
     1777//The total number of operations is 0
     1778template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask()
     1779{
     1780        return _mm_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0));
     1781}
     1782
     1783//The total number of operations is 0
     1784template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask()
     1785{
     1786        return _mm_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0));
     1787}
     1788
     1789//The total number of operations is 1
     1790template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
     1791{
     1792        return simd_xor(arg1, arg2);
     1793}
     1794
     1795//The total number of operations is 10
     1796template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2)
     1797{
     1798        bitblock128_t tmp = simd_xor(arg1, arg2);
     1799        return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(arg1, arg2))), tmp);
     1800}
     1801
     1802//The total number of operations is 6
     1803template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2)
     1804{
     1805        return simd128<1>::ifh(simd128<(8)>::himask(), simd128<(8)>::add(arg1, simd_and(simd128<(8)>::himask(), arg2)), simd128<(8)>::add(arg1, arg2));
     1806}
     1807
     1808//The total number of operations is 1
     1809template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2)
     1810{
     1811        return _mm_add_epi8(arg1, arg2);
     1812}
     1813
     1814//The total number of operations is 1
     1815template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2)
     1816{
     1817        return _mm_add_epi16(arg1, arg2);
     1818}
     1819
     1820//The total number of operations is 1
     1821template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2)
     1822{
     1823        return _mm_add_epi32(arg1, arg2);
     1824}
     1825
     1826//The total number of operations is 1
     1827template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2)
     1828{
     1829        return _mm_add_epi64(arg1, arg2);
     1830}
     1831
     1832//The total number of operations is 11
     1833template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2)
     1834{
     1835        bitblock128_t partial = simd128<(64)>::add(arg1, arg2);
     1836        bitblock128_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial));
     1837        bitblock128_t carry = simd128<128>::slli<(64)>(simd128<(64)>::srli<(63)>(carryMask));
     1838        return simd128<(64)>::add(partial, carry);
     1839}
     1840
    15981841//The total number of operations is 1
    15991842template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2)
     
    16481891        bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(tmpAns, arg2));
    16491892        return simd128<1>::ifh(simd128<128>::himask(), tmpAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    1650 }
    1651 
    1652 //The total number of operations is 2
    1653 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1654 {
    1655         return simd_not(simd_xor(arg1, arg2));
    1656 }
    1657 
    1658 //The total number of operations is 8
    1659 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1660 {
    1661         bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
    1662         bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
    1663         bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
    1664         return simd_or(loMask, hiMask);
    1665 }
    1666 
    1667 //The total number of operations is 9
    1668 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1669 {
    1670         return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2))));
    1671 }
    1672 
    1673 //The total number of operations is 1
    1674 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1675 {
    1676         return _mm_cmpeq_epi8(arg1, arg2);
    1677 }
    1678 
    1679 //The total number of operations is 1
    1680 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1681 {
    1682         return _mm_cmpeq_epi16(arg1, arg2);
    1683 }
    1684 
    1685 //The total number of operations is 1
    1686 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1687 {
    1688         return _mm_cmpeq_epi32(arg1, arg2);
    1689 }
    1690 
    1691 //The total number of operations is 1
    1692 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1693 {
    1694         return _mm_cmpeq_epi64(arg1, arg2);
    1695 }
    1696 
    1697 //The total number of operations is 11
    1698 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1699 {
    1700         bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2);
    1701         bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns));
    1702         bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask);
    1703         return simd_or(loMask, hiMask);
    1704 }
    1705 
    1706 //The total number of operations is 4
    1707 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)
    1708 {
    1709         return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));
    1710 }
    1711 
    1712 //The total number of operations is 10
    1713 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)
    1714 {
    1715         bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
    1716         return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1717 }
    1718 
    1719 //The total number of operations is 5
    1720 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
    1721 {
    1722         bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
    1723         return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1724 }
    1725 
    1726 //The total number of operations is 1
    1727 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
    1728 {
    1729         return _mm_srai_epi16(arg1, (int32_t)(sh));
    1730 }
    1731 
    1732 //The total number of operations is 1
    1733 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
    1734 {
    1735         return _mm_srai_epi32(arg1, (int32_t)(sh));
    1736 }
    1737 
    1738 //The total number of operations is 5
    1739 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    1740 {
    1741         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1742         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1743 }
    1744 
    1745 //The total number of operations is 21
    1746 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    1747 {
    1748         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1749         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
    1750 }
    1751 
    1752 //The total number of operations is 0
    1753 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask()
    1754 {
    1755         return simd128<2>::constant<(2)>();
    1756 }
    1757 
    1758 //The total number of operations is 0
    1759 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask()
    1760 {
    1761         return simd128<4>::constant<(12)>();
    1762 }
    1763 
    1764 //The total number of operations is 0
    1765 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::himask()
    1766 {
    1767         return simd128<8>::constant<(240)>();
    1768 }
    1769 
    1770 //The total number of operations is 0
    1771 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::himask()
    1772 {
    1773         return simd128<16>::constant<(65280)>();
    1774 }
    1775 
    1776 //The total number of operations is 0
    1777 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask()
    1778 {
    1779         return simd128<32>::constant<-65536>();
    1780 }
    1781 
    1782 //The total number of operations is 0
    1783 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask()
    1784 {
    1785         return _mm_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0));
    1786 }
    1787 
    1788 //The total number of operations is 0
    1789 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask()
    1790 {
    1791         return _mm_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0));
    1792 }
    1793 
    1794 //The total number of operations is 1
    1795 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
    1796 {
    1797         return simd_xor(arg1, arg2);
    1798 }
    1799 
    1800 //The total number of operations is 10
    1801 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2)
    1802 {
    1803         bitblock128_t tmp = simd_xor(arg1, arg2);
    1804         return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(arg1, arg2))), tmp);
    1805 }
    1806 
    1807 //The total number of operations is 6
    1808 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2)
    1809 {
    1810         return simd128<1>::ifh(simd128<(8)>::himask(), simd128<(8)>::add(arg1, simd_and(simd128<(8)>::himask(), arg2)), simd128<(8)>::add(arg1, arg2));
    1811 }
    1812 
    1813 //The total number of operations is 1
    1814 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2)
    1815 {
    1816         return _mm_add_epi8(arg1, arg2);
    1817 }
    1818 
    1819 //The total number of operations is 1
    1820 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2)
    1821 {
    1822         return _mm_add_epi16(arg1, arg2);
    1823 }
    1824 
    1825 //The total number of operations is 1
    1826 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2)
    1827 {
    1828         return _mm_add_epi32(arg1, arg2);
    1829 }
    1830 
    1831 //The total number of operations is 1
    1832 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2)
    1833 {
    1834         return _mm_add_epi64(arg1, arg2);
    1835 }
    1836 
    1837 //The total number of operations is 11
    1838 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2)
    1839 {
    1840         bitblock128_t partial = simd128<(64)>::add(arg1, arg2);
    1841         bitblock128_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial));
    1842         bitblock128_t carry = simd128<128>::slli<(64)>(simd128<(64)>::srli<(63)>(carryMask));
    1843         return simd128<(64)>::add(partial, carry);
    1844 }
    1845 
    1846 //The total number of operations is 9
    1847 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
    1848 {
    1849         return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1);
    1850 }
    1851 
    1852 //The total number of operations is 19
    1853 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1)
    1854 {
    1855         bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>());
    1856         return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1));
    1857 }
    1858 
    1859 //The total number of operations is 1
    1860 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1)
    1861 {
    1862         return _mm_abs_epi8(arg1);
    1863 }
    1864 
    1865 //The total number of operations is 1
    1866 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1)
    1867 {
    1868         return _mm_abs_epi16(arg1);
    1869 }
    1870 
    1871 //The total number of operations is 1
    1872 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1)
    1873 {
    1874         return _mm_abs_epi32(arg1);
    1875 }
    1876 
    1877 //The total number of operations is 9
    1878 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1)
    1879 {
    1880         bitblock128_t eqMask = simd128<64>::eq(simd128<1>::ifh(simd128<64>::himask(), simd128<(32)>::abs(arg1), arg1), arg1);
    1881         return simd128<1>::ifh(eqMask, arg1, simd128<64>::sub(eqMask, arg1));
    1882 }
    1883 
    1884 //The total number of operations is 37
    1885 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1)
    1886 {
    1887         bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1);
    1888         return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1));
    18891893}
    18901894
     
    27732777}
    27742778
    2775 //The total number of operations is 4
    2776 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1)
    2777 {
    2778         return simd128<128>::slli<(sh*2)>(arg1);
    2779 }
    2780 
    2781 //The total number of operations is 4
    2782 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1)
    2783 {
    2784         return simd128<128>::slli<(sh*4)>(arg1);
    2785 }
    2786 
    2787 //The total number of operations is 4
    2788 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1)
    2789 {
    2790         return simd128<128>::slli<(sh*8)>(arg1);
    2791 }
    2792 
    2793 //The total number of operations is 4
    2794 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1)
    2795 {
    2796         return simd128<128>::slli<(sh*16)>(arg1);
    2797 }
    2798 
    2799 //The total number of operations is 4
    2800 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1)
    2801 {
    2802         return simd128<128>::slli<(sh*32)>(arg1);
    2803 }
    2804 
    2805 //The total number of operations is 4
    2806 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1)
    2807 {
    2808         return simd128<128>::slli<(sh*64)>(arg1);
    2809 }
    2810 
    2811 //The total number of operations is 4
    2812 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1)
    2813 {
    2814         return simd128<128>::slli<(sh*128)>(arg1);
    2815 }
    2816 
    28172779//The total number of operations is 5
    28182780template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     
    29772939}
    29782940
     2941//The total number of operations is 4
     2942template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1)
     2943{
     2944        return simd128<128>::slli<(sh*2)>(arg1);
     2945}
     2946
     2947//The total number of operations is 4
     2948template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1)
     2949{
     2950        return simd128<128>::slli<(sh*4)>(arg1);
     2951}
     2952
     2953//The total number of operations is 4
     2954template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1)
     2955{
     2956        return simd128<128>::slli<(sh*8)>(arg1);
     2957}
     2958
     2959//The total number of operations is 4
     2960template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1)
     2961{
     2962        return simd128<128>::slli<(sh*16)>(arg1);
     2963}
     2964
     2965//The total number of operations is 4
     2966template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1)
     2967{
     2968        return simd128<128>::slli<(sh*32)>(arg1);
     2969}
     2970
     2971//The total number of operations is 4
     2972template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1)
     2973{
     2974        return simd128<128>::slli<(sh*64)>(arg1);
     2975}
     2976
     2977//The total number of operations is 4
     2978template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1)
     2979{
     2980        return simd128<128>::slli<(sh*128)>(arg1);
     2981}
     2982
    29792983//The total number of operations is 13
    29802984template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
     
    30073011}
    30083012
     3013//The total number of operations is 11
     3014IDISA_ALWAYS_INLINE bitblock128_t bitblock128::sll(bitblock128_t arg1, bitblock128_t arg2)
     3015{
     3016        return simd128<128>::sll(arg1, arg2);
     3017}
     3018
    30093019//The total number of operations is 1
    30103020IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_unaligned(bitblock128_t* arg1)
    30113021{
    30123022        return _mm_loadu_si128((bitblock128_t*)(arg1));
     3023}
     3024
     3025//The total number of operations is 4
     3026template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srli(bitblock128_t arg1)
     3027{
     3028        return simd128<128>::srli<sh>(arg1);
    30133029}
    30143030
     
    30313047}
    30323048
     3049//The total number of operations is 11
     3050IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srl(bitblock128_t arg1, bitblock128_t arg2)
     3051{
     3052        return simd128<128>::srl(arg1, arg2);
     3053}
     3054
     3055//The total number of operations is 4
     3056template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::slli(bitblock128_t arg1)
     3057{
     3058        return simd128<128>::slli<sh>(arg1);
     3059}
     3060
    30333061//The total number of operations is 2
    30343062IDISA_ALWAYS_INLINE bool bitblock128::any(bitblock128_t arg1)
Note: See TracChangeset for help on using the changeset viewer.