Ignore:
Timestamp:
Oct 23, 2011, 9:43:33 AM (8 years ago)
Author:
cameron
Message:

bitblock::srl, sll, srli, slli implementations

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_ssse3.cpp

    r1573 r1580  
    2828        static IDISA_ALWAYS_INLINE bitblock128_t add_hl(bitblock128_t arg1);
    2929        static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t shift_mask);
     30        static IDISA_ALWAYS_INLINE bitblock128_t lomask();
    3031        static IDISA_ALWAYS_INLINE bitblock128_t umin(bitblock128_t arg1, bitblock128_t arg2);
    3132        template <uint64_t val> static IDISA_ALWAYS_INLINE bitblock128_t constant();
    3233        static IDISA_ALWAYS_INLINE bitblock128_t min(bitblock128_t arg1, bitblock128_t arg2);
    33         static IDISA_ALWAYS_INLINE bitblock128_t lomask();
    3434        static IDISA_ALWAYS_INLINE bitblock128_t umax(bitblock128_t arg1, bitblock128_t arg2);
    3535        static IDISA_ALWAYS_INLINE bitblock128_t abs(bitblock128_t arg1);
     
    8989{
    9090public:
     91        static IDISA_ALWAYS_INLINE bitblock128_t sll(bitblock128_t arg1, bitblock128_t arg2);
    9192        static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(bitblock128_t* arg1);
     93        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1);
     94        static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t arg2);
    9295        static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t* arg1, bitblock128_t arg2);
    9396        static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1);
    9497        static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1);
    9598        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1);
     99        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1);
    96100        static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(bitblock128_t* arg1);
    97101        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t* arg1, bitblock128_t arg2);
     
    231235template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srl(bitblock128_t arg1, bitblock128_t shift_mask);
    232236template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srl(bitblock128_t arg1, bitblock128_t shift_mask);
    233 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask();
    234 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask();
    235 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask();
    236 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask();
    237 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask();
    238 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask();
    239 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask();
    240237template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant();
    241238template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant();
     
    254251template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::min(bitblock128_t arg1, bitblock128_t arg2);
    255252template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2);
     253template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask();
     254template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask();
     255template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask();
     256template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask();
     257template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask();
     258template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask();
     259template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask();
    256260template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2);
    257261template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umin(bitblock128_t arg1, bitblock128_t arg2);
     
    262266template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umin(bitblock128_t arg1, bitblock128_t arg2);
    263267template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umin(bitblock128_t arg1, bitblock128_t arg2);
    264 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2);
    265 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2);
    266 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2);
    267 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2);
    268 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2);
    269 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2);
    270 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2);
    271 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2);
     268template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
     269template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
     270template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1);
     271template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1);
     272template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1);
     273template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1);
     274template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1);
    272275template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2);
    273276template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2);
     
    300303template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2);
    301304template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2);
    302 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
    303 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
    304 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1);
    305 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1);
    306 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1);
    307 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1);
    308 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1);
     305template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2);
     306template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2);
     307template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2);
     308template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2);
     309template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2);
     310template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2);
     311template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2);
     312template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2);
    309313template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2);
    310314template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::umin_hl(bitblock128_t arg1, bitblock128_t arg2);
     
    445449template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    446450template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    447 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1);
    448 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1);
    449 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1);
    450 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1);
    451 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1);
    452 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1);
    453 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1);
    454451template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    455452template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
     
    479476template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dslli(bitblock128_t arg1, bitblock128_t arg2);
    480477template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dslli(bitblock128_t arg1, bitblock128_t arg2);
     478template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1);
     479template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1);
     480template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1);
     481template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1);
     482template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1);
     483template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1);
     484template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1);
    481485template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
    482486template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
     
    13951399
    13961400//The total number of operations is 0
    1397 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask()
    1398 {
    1399         return simd128<2>::constant<(1)>();
    1400 }
    1401 
    1402 //The total number of operations is 0
    1403 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask()
    1404 {
    1405         return simd128<4>::constant<(3)>();
    1406 }
    1407 
    1408 //The total number of operations is 0
    1409 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask()
    1410 {
    1411         return simd128<8>::constant<(15)>();
    1412 }
    1413 
    1414 //The total number of operations is 0
    1415 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask()
    1416 {
    1417         return simd128<16>::constant<(255)>();
    1418 }
    1419 
    1420 //The total number of operations is 0
    1421 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask()
    1422 {
    1423         return simd128<32>::constant<(65535)>();
    1424 }
    1425 
    1426 //The total number of operations is 0
    1427 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask()
    1428 {
    1429         return _mm_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1));
    1430 }
    1431 
    1432 //The total number of operations is 0
    1433 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask()
    1434 {
    1435         return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1));
    1436 }
    1437 
    1438 //The total number of operations is 0
    14391401template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant()
    14401402{
     
    15371499}
    15381500
     1501//The total number of operations is 0
     1502template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask()
     1503{
     1504        return simd128<2>::constant<(1)>();
     1505}
     1506
     1507//The total number of operations is 0
     1508template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask()
     1509{
     1510        return simd128<4>::constant<(3)>();
     1511}
     1512
     1513//The total number of operations is 0
     1514template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask()
     1515{
     1516        return simd128<8>::constant<(15)>();
     1517}
     1518
     1519//The total number of operations is 0
     1520template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask()
     1521{
     1522        return simd128<16>::constant<(255)>();
     1523}
     1524
     1525//The total number of operations is 0
     1526template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask()
     1527{
     1528        return simd128<32>::constant<(65535)>();
     1529}
     1530
     1531//The total number of operations is 0
     1532template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask()
     1533{
     1534        return _mm_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1));
     1535}
     1536
     1537//The total number of operations is 0
     1538template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask()
     1539{
     1540        return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1));
     1541}
     1542
    15391543//The total number of operations is 1
    15401544template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2)
     
    15931597}
    15941598
     1599//The total number of operations is 9
     1600template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
     1601{
     1602        return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1);
     1603}
     1604
     1605//The total number of operations is 19
     1606template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1)
     1607{
     1608        bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>());
     1609        return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1));
     1610}
     1611
     1612//The total number of operations is 1
     1613template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1)
     1614{
     1615        return _mm_abs_epi8(arg1);
     1616}
     1617
     1618//The total number of operations is 1
     1619template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1)
     1620{
     1621        return _mm_abs_epi16(arg1);
     1622}
     1623
     1624//The total number of operations is 1
     1625template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1)
     1626{
     1627        return _mm_abs_epi32(arg1);
     1628}
     1629
     1630//The total number of operations is 13
     1631template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1)
     1632{
     1633        bitblock128_t eqMask = simd128<64>::eq(simd128<1>::ifh(simd128<64>::himask(), simd128<(32)>::abs(arg1), arg1), arg1);
     1634        return simd128<1>::ifh(eqMask, arg1, simd128<64>::sub(eqMask, arg1));
     1635}
     1636
     1637//The total number of operations is 45
     1638template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1)
     1639{
     1640        bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1);
     1641        return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1));
     1642}
     1643
     1644//The total number of operations is 2
     1645template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1646{
     1647        return simd_not(simd_xor(arg1, arg2));
     1648}
     1649
     1650//The total number of operations is 8
     1651template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1652{
     1653        bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
     1654        bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
     1655        bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
     1656        return simd_or(loMask, hiMask);
     1657}
     1658
     1659//The total number of operations is 9
     1660template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1661{
     1662        return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2))));
     1663}
     1664
     1665//The total number of operations is 1
     1666template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1667{
     1668        return _mm_cmpeq_epi8(arg1, arg2);
     1669}
     1670
     1671//The total number of operations is 1
     1672template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1673{
     1674        return _mm_cmpeq_epi16(arg1, arg2);
     1675}
     1676
     1677//The total number of operations is 1
     1678template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1679{
     1680        return _mm_cmpeq_epi32(arg1, arg2);
     1681}
     1682
     1683//The total number of operations is 5
     1684template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1685{
     1686        bitblock128_t tmpAns = simd128<(32)>::eq(arg1, arg2);
     1687        bitblock128_t loMask = simd_and(tmpAns, simd128<64>::srli<(32)>(tmpAns));
     1688        bitblock128_t hiMask = simd128<64>::slli<(32)>(loMask);
     1689        return simd_or(loMask, hiMask);
     1690}
     1691
     1692//The total number of operations is 15
     1693template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1694{
     1695        bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2);
     1696        bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns));
     1697        bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask);
     1698        return simd_or(loMask, hiMask);
     1699}
     1700
     1701//The total number of operations is 4
     1702template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)
     1703{
     1704        return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));
     1705}
     1706
     1707//The total number of operations is 10
     1708template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)
     1709{
     1710        bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
     1711        return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     1712}
     1713
     1714//The total number of operations is 5
     1715template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
     1716{
     1717        bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
     1718        return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     1719}
     1720
     1721//The total number of operations is 1
     1722template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
     1723{
     1724        return _mm_srai_epi16(arg1, (int32_t)(sh));
     1725}
     1726
     1727//The total number of operations is 1
     1728template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
     1729{
     1730        return _mm_srai_epi32(arg1, (int32_t)(sh));
     1731}
     1732
     1733//The total number of operations is 5
     1734template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
     1735{
     1736        bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
     1737        return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
     1738}
     1739
     1740//The total number of operations is 21
     1741template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
     1742{
     1743        bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
     1744        return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1745}
     1746
     1747//The total number of operations is 0
     1748template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask()
     1749{
     1750        return simd128<2>::constant<(2)>();
     1751}
     1752
     1753//The total number of operations is 0
     1754template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask()
     1755{
     1756        return simd128<4>::constant<(12)>();
     1757}
     1758
     1759//The total number of operations is 0
     1760template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::himask()
     1761{
     1762        return simd128<8>::constant<(240)>();
     1763}
     1764
     1765//The total number of operations is 0
     1766template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::himask()
     1767{
     1768        return simd128<16>::constant<(65280)>();
     1769}
     1770
     1771//The total number of operations is 0
     1772template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask()
     1773{
     1774        return simd128<32>::constant<-65536>();
     1775}
     1776
     1777//The total number of operations is 0
     1778template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask()
     1779{
     1780        return _mm_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0));
     1781}
     1782
     1783//The total number of operations is 0
     1784template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask()
     1785{
     1786        return _mm_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0));
     1787}
     1788
     1789//The total number of operations is 1
     1790template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
     1791{
     1792        return simd_xor(arg1, arg2);
     1793}
     1794
     1795//The total number of operations is 10
     1796template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2)
     1797{
     1798        bitblock128_t tmp = simd_xor(arg1, arg2);
     1799        return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(arg1, arg2))), tmp);
     1800}
     1801
     1802//The total number of operations is 6
     1803template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2)
     1804{
     1805        return simd128<1>::ifh(simd128<(8)>::himask(), simd128<(8)>::add(arg1, simd_and(simd128<(8)>::himask(), arg2)), simd128<(8)>::add(arg1, arg2));
     1806}
     1807
     1808//The total number of operations is 1
     1809template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2)
     1810{
     1811        return _mm_add_epi8(arg1, arg2);
     1812}
     1813
     1814//The total number of operations is 1
     1815template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2)
     1816{
     1817        return _mm_add_epi16(arg1, arg2);
     1818}
     1819
     1820//The total number of operations is 1
     1821template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2)
     1822{
     1823        return _mm_add_epi32(arg1, arg2);
     1824}
     1825
     1826//The total number of operations is 1
     1827template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2)
     1828{
     1829        return _mm_add_epi64(arg1, arg2);
     1830}
     1831
     1832//The total number of operations is 11
     1833template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2)
     1834{
     1835        bitblock128_t partial = simd128<(64)>::add(arg1, arg2);
     1836        bitblock128_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial));
     1837        bitblock128_t carry = simd128<128>::slli<(64)>(simd128<(64)>::srli<(63)>(carryMask));
     1838        return simd128<(64)>::add(partial, carry);
     1839}
     1840
    15951841//The total number of operations is 1
    15961842template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2)
     
    16471893        bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(tmpAns, arg2));
    16481894        return simd128<1>::ifh(simd128<128>::himask(), tmpAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    1649 }
    1650 
    1651 //The total number of operations is 2
    1652 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1653 {
    1654         return simd_not(simd_xor(arg1, arg2));
    1655 }
    1656 
    1657 //The total number of operations is 8
    1658 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1659 {
    1660         bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
    1661         bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
    1662         bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
    1663         return simd_or(loMask, hiMask);
    1664 }
    1665 
    1666 //The total number of operations is 9
    1667 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1668 {
    1669         return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2))));
    1670 }
    1671 
    1672 //The total number of operations is 1
    1673 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1674 {
    1675         return _mm_cmpeq_epi8(arg1, arg2);
    1676 }
    1677 
    1678 //The total number of operations is 1
    1679 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1680 {
    1681         return _mm_cmpeq_epi16(arg1, arg2);
    1682 }
    1683 
    1684 //The total number of operations is 1
    1685 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1686 {
    1687         return _mm_cmpeq_epi32(arg1, arg2);
    1688 }
    1689 
    1690 //The total number of operations is 5
    1691 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1692 {
    1693         bitblock128_t tmpAns = simd128<(32)>::eq(arg1, arg2);
    1694         bitblock128_t loMask = simd_and(tmpAns, simd128<64>::srli<(32)>(tmpAns));
    1695         bitblock128_t hiMask = simd128<64>::slli<(32)>(loMask);
    1696         return simd_or(loMask, hiMask);
    1697 }
    1698 
    1699 //The total number of operations is 15
    1700 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1701 {
    1702         bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2);
    1703         bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns));
    1704         bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask);
    1705         return simd_or(loMask, hiMask);
    1706 }
    1707 
    1708 //The total number of operations is 4
    1709 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)
    1710 {
    1711         return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));
    1712 }
    1713 
    1714 //The total number of operations is 10
    1715 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)
    1716 {
    1717         bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
    1718         return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1719 }
    1720 
    1721 //The total number of operations is 5
    1722 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
    1723 {
    1724         bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
    1725         return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1726 }
    1727 
    1728 //The total number of operations is 1
    1729 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
    1730 {
    1731         return _mm_srai_epi16(arg1, (int32_t)(sh));
    1732 }
    1733 
    1734 //The total number of operations is 1
    1735 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
    1736 {
    1737         return _mm_srai_epi32(arg1, (int32_t)(sh));
    1738 }
    1739 
    1740 //The total number of operations is 5
    1741 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    1742 {
    1743         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1744         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1745 }
    1746 
    1747 //The total number of operations is 21
    1748 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    1749 {
    1750         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1751         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
    1752 }
    1753 
    1754 //The total number of operations is 0
    1755 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask()
    1756 {
    1757         return simd128<2>::constant<(2)>();
    1758 }
    1759 
    1760 //The total number of operations is 0
    1761 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask()
    1762 {
    1763         return simd128<4>::constant<(12)>();
    1764 }
    1765 
    1766 //The total number of operations is 0
    1767 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::himask()
    1768 {
    1769         return simd128<8>::constant<(240)>();
    1770 }
    1771 
    1772 //The total number of operations is 0
    1773 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::himask()
    1774 {
    1775         return simd128<16>::constant<(65280)>();
    1776 }
    1777 
    1778 //The total number of operations is 0
    1779 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask()
    1780 {
    1781         return simd128<32>::constant<-65536>();
    1782 }
    1783 
    1784 //The total number of operations is 0
    1785 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask()
    1786 {
    1787         return _mm_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0));
    1788 }
    1789 
    1790 //The total number of operations is 0
    1791 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask()
    1792 {
    1793         return _mm_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0));
    1794 }
    1795 
    1796 //The total number of operations is 1
    1797 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
    1798 {
    1799         return simd_xor(arg1, arg2);
    1800 }
    1801 
    1802 //The total number of operations is 10
    1803 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2)
    1804 {
    1805         bitblock128_t tmp = simd_xor(arg1, arg2);
    1806         return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(arg1, arg2))), tmp);
    1807 }
    1808 
    1809 //The total number of operations is 6
    1810 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2)
    1811 {
    1812         return simd128<1>::ifh(simd128<(8)>::himask(), simd128<(8)>::add(arg1, simd_and(simd128<(8)>::himask(), arg2)), simd128<(8)>::add(arg1, arg2));
    1813 }
    1814 
    1815 //The total number of operations is 1
    1816 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2)
    1817 {
    1818         return _mm_add_epi8(arg1, arg2);
    1819 }
    1820 
    1821 //The total number of operations is 1
    1822 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2)
    1823 {
    1824         return _mm_add_epi16(arg1, arg2);
    1825 }
    1826 
    1827 //The total number of operations is 1
    1828 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2)
    1829 {
    1830         return _mm_add_epi32(arg1, arg2);
    1831 }
    1832 
    1833 //The total number of operations is 1
    1834 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2)
    1835 {
    1836         return _mm_add_epi64(arg1, arg2);
    1837 }
    1838 
    1839 //The total number of operations is 11
    1840 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2)
    1841 {
    1842         bitblock128_t partial = simd128<(64)>::add(arg1, arg2);
    1843         bitblock128_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial));
    1844         bitblock128_t carry = simd128<128>::slli<(64)>(simd128<(64)>::srli<(63)>(carryMask));
    1845         return simd128<(64)>::add(partial, carry);
    1846 }
    1847 
    1848 //The total number of operations is 9
    1849 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
    1850 {
    1851         return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1);
    1852 }
    1853 
    1854 //The total number of operations is 19
    1855 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1)
    1856 {
    1857         bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>());
    1858         return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1));
    1859 }
    1860 
    1861 //The total number of operations is 1
    1862 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1)
    1863 {
    1864         return _mm_abs_epi8(arg1);
    1865 }
    1866 
    1867 //The total number of operations is 1
    1868 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1)
    1869 {
    1870         return _mm_abs_epi16(arg1);
    1871 }
    1872 
    1873 //The total number of operations is 1
    1874 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1)
    1875 {
    1876         return _mm_abs_epi32(arg1);
    1877 }
    1878 
    1879 //The total number of operations is 13
    1880 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1)
    1881 {
    1882         bitblock128_t eqMask = simd128<64>::eq(simd128<1>::ifh(simd128<64>::himask(), simd128<(32)>::abs(arg1), arg1), arg1);
    1883         return simd128<1>::ifh(eqMask, arg1, simd128<64>::sub(eqMask, arg1));
    1884 }
    1885 
    1886 //The total number of operations is 45
    1887 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1)
    1888 {
    1889         bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1);
    1890         return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1));
    18911895}
    18921896
     
    27762780}
    27772781
    2778 //The total number of operations is 4
    2779 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1)
    2780 {
    2781         return simd128<128>::slli<(sh*2)>(arg1);
    2782 }
    2783 
    2784 //The total number of operations is 4
    2785 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1)
    2786 {
    2787         return simd128<128>::slli<(sh*4)>(arg1);
    2788 }
    2789 
    2790 //The total number of operations is 4
    2791 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1)
    2792 {
    2793         return simd128<128>::slli<(sh*8)>(arg1);
    2794 }
    2795 
    2796 //The total number of operations is 4
    2797 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1)
    2798 {
    2799         return simd128<128>::slli<(sh*16)>(arg1);
    2800 }
    2801 
    2802 //The total number of operations is 4
    2803 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1)
    2804 {
    2805         return simd128<128>::slli<(sh*32)>(arg1);
    2806 }
    2807 
    2808 //The total number of operations is 4
    2809 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1)
    2810 {
    2811         return simd128<128>::slli<(sh*64)>(arg1);
    2812 }
    2813 
    2814 //The total number of operations is 4
    2815 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1)
    2816 {
    2817         return simd128<128>::slli<(sh*128)>(arg1);
    2818 }
    2819 
    28202782//The total number of operations is 5
    28212783template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     
    29802942}
    29812943
     2944//The total number of operations is 4
     2945template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1)
     2946{
     2947        return simd128<128>::slli<(sh*2)>(arg1);
     2948}
     2949
     2950//The total number of operations is 4
     2951template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1)
     2952{
     2953        return simd128<128>::slli<(sh*4)>(arg1);
     2954}
     2955
     2956//The total number of operations is 4
     2957template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1)
     2958{
     2959        return simd128<128>::slli<(sh*8)>(arg1);
     2960}
     2961
     2962//The total number of operations is 4
     2963template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1)
     2964{
     2965        return simd128<128>::slli<(sh*16)>(arg1);
     2966}
     2967
     2968//The total number of operations is 4
     2969template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1)
     2970{
     2971        return simd128<128>::slli<(sh*32)>(arg1);
     2972}
     2973
     2974//The total number of operations is 4
     2975template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1)
     2976{
     2977        return simd128<128>::slli<(sh*64)>(arg1);
     2978}
     2979
     2980//The total number of operations is 4
     2981template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1)
     2982{
     2983        return simd128<128>::slli<(sh*128)>(arg1);
     2984}
     2985
    29822986//The total number of operations is 13
    29832987template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
     
    30103014}
    30113015
     3016//The total number of operations is 11
     3017IDISA_ALWAYS_INLINE bitblock128_t bitblock128::sll(bitblock128_t arg1, bitblock128_t arg2)
     3018{
     3019        return simd128<128>::sll(arg1, arg2);
     3020}
     3021
    30123022//The total number of operations is 1
    30133023IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_unaligned(bitblock128_t* arg1)
    30143024{
    30153025        return _mm_loadu_si128((bitblock128_t*)(arg1));
     3026}
     3027
     3028//The total number of operations is 4
     3029template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srli(bitblock128_t arg1)
     3030{
     3031        return simd128<128>::srli<sh>(arg1);
    30163032}
    30173033
     
    30343050}
    30353051
     3052//The total number of operations is 11
     3053IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srl(bitblock128_t arg1, bitblock128_t arg2)
     3054{
     3055        return simd128<128>::srl(arg1, arg2);
     3056}
     3057
     3058//The total number of operations is 4
     3059template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::slli(bitblock128_t arg1)
     3060{
     3061        return simd128<128>::slli<sh>(arg1);
     3062}
     3063
    30363064//The total number of operations is 2
    30373065IDISA_ALWAYS_INLINE bool bitblock128::any(bitblock128_t arg1)
Note: See TracChangeset for help on using the changeset viewer.