Ignore:
Timestamp:
Oct 23, 2011, 9:43:33 AM (8 years ago)
Author:
cameron
Message:

bitblock::srl, sll, srli, slli implementations

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_sse4_2.cpp

    r1573 r1580  
    2828        static IDISA_ALWAYS_INLINE bitblock128_t add_hl(bitblock128_t arg1);
    2929        static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t shift_mask);
     30        static IDISA_ALWAYS_INLINE bitblock128_t lomask();
    3031        static IDISA_ALWAYS_INLINE bitblock128_t umin(bitblock128_t arg1, bitblock128_t arg2);
    3132        template <uint64_t val> static IDISA_ALWAYS_INLINE bitblock128_t constant();
    3233        static IDISA_ALWAYS_INLINE bitblock128_t min(bitblock128_t arg1, bitblock128_t arg2);
    33         static IDISA_ALWAYS_INLINE bitblock128_t lomask();
    3434        static IDISA_ALWAYS_INLINE bitblock128_t umax(bitblock128_t arg1, bitblock128_t arg2);
    3535        static IDISA_ALWAYS_INLINE bitblock128_t abs(bitblock128_t arg1);
     
    8989{
    9090public:
     91        static IDISA_ALWAYS_INLINE bitblock128_t sll(bitblock128_t arg1, bitblock128_t arg2);
    9192        static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(bitblock128_t* arg1);
     93        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1);
     94        static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t arg2);
    9295        static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t* arg1, bitblock128_t arg2);
    9396        static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1);
    9497        static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1);
    9598        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1);
     99        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1);
    96100        static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(bitblock128_t* arg1);
    97101        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t* arg1, bitblock128_t arg2);
     
    231235template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srl(bitblock128_t arg1, bitblock128_t shift_mask);
    232236template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srl(bitblock128_t arg1, bitblock128_t shift_mask);
    233 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask();
    234 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask();
    235 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask();
    236 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask();
    237 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask();
    238 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask();
    239 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask();
    240237template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant();
    241238template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant();
     
    254251template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::min(bitblock128_t arg1, bitblock128_t arg2);
    255252template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2);
     253template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask();
     254template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask();
     255template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask();
     256template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask();
     257template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask();
     258template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask();
     259template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask();
    256260template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2);
    257261template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umin(bitblock128_t arg1, bitblock128_t arg2);
     
    262266template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umin(bitblock128_t arg1, bitblock128_t arg2);
    263267template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umin(bitblock128_t arg1, bitblock128_t arg2);
    264 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2);
    265 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2);
    266 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2);
    267 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2);
    268 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2);
    269 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2);
    270 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2);
    271 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2);
     268template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
     269template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
     270template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1);
     271template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1);
     272template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1);
     273template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1);
     274template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1);
    272275template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2);
    273276template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2);
     
    300303template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2);
    301304template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2);
    302 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
    303 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
    304 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1);
    305 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1);
    306 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1);
    307 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1);
    308 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1);
     305template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2);
     306template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2);
     307template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2);
     308template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2);
     309template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2);
     310template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2);
     311template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2);
     312template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2);
    309313template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2);
    310314template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::umin_hl(bitblock128_t arg1, bitblock128_t arg2);
     
    445449template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    446450template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    447 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1);
    448 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1);
    449 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1);
    450 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1);
    451 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1);
    452 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1);
    453 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1);
    454451template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    455452template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
     
    479476template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dslli(bitblock128_t arg1, bitblock128_t arg2);
    480477template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dslli(bitblock128_t arg1, bitblock128_t arg2);
     478template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1);
     479template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1);
     480template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1);
     481template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1);
     482template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1);
     483template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1);
     484template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1);
    481485template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
    482486template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
     
    13861390
    13871391//The total number of operations is 0
    1388 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask()
    1389 {
    1390         return simd128<2>::constant<(1)>();
    1391 }
    1392 
    1393 //The total number of operations is 0
    1394 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask()
    1395 {
    1396         return simd128<4>::constant<(3)>();
    1397 }
    1398 
    1399 //The total number of operations is 0
    1400 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask()
    1401 {
    1402         return simd128<8>::constant<(15)>();
    1403 }
    1404 
    1405 //The total number of operations is 0
    1406 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask()
    1407 {
    1408         return simd128<16>::constant<(255)>();
    1409 }
    1410 
    1411 //The total number of operations is 0
    1412 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask()
    1413 {
    1414         return simd128<32>::constant<(65535)>();
    1415 }
    1416 
    1417 //The total number of operations is 0
    1418 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask()
    1419 {
    1420         return _mm_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1));
    1421 }
    1422 
    1423 //The total number of operations is 0
    1424 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask()
    1425 {
    1426         return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1));
    1427 }
    1428 
    1429 //The total number of operations is 0
    14301392template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant()
    14311393{
     
    15281490}
    15291491
     1492//The total number of operations is 0
     1493template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask()
     1494{
     1495        return simd128<2>::constant<(1)>();
     1496}
     1497
     1498//The total number of operations is 0
     1499template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask()
     1500{
     1501        return simd128<4>::constant<(3)>();
     1502}
     1503
     1504//The total number of operations is 0
     1505template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask()
     1506{
     1507        return simd128<8>::constant<(15)>();
     1508}
     1509
     1510//The total number of operations is 0
     1511template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask()
     1512{
     1513        return simd128<16>::constant<(255)>();
     1514}
     1515
     1516//The total number of operations is 0
     1517template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask()
     1518{
     1519        return simd128<32>::constant<(65535)>();
     1520}
     1521
     1522//The total number of operations is 0
     1523template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask()
     1524{
     1525        return _mm_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1));
     1526}
     1527
     1528//The total number of operations is 0
     1529template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask()
     1530{
     1531        return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1));
     1532}
     1533
    15301534//The total number of operations is 1
    15311535template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2)
     
    15801584}
    15811585
     1586//The total number of operations is 9
     1587template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
     1588{
     1589        return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1);
     1590}
     1591
     1592//The total number of operations is 19
     1593template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1)
     1594{
     1595        bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>());
     1596        return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1));
     1597}
     1598
     1599//The total number of operations is 1
     1600template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1)
     1601{
     1602        return _mm_abs_epi8(arg1);
     1603}
     1604
     1605//The total number of operations is 1
     1606template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1)
     1607{
     1608        return _mm_abs_epi16(arg1);
     1609}
     1610
     1611//The total number of operations is 1
     1612template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1)
     1613{
     1614        return _mm_abs_epi32(arg1);
     1615}
     1616
     1617//The total number of operations is 5
     1618template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1)
     1619{
     1620        bitblock128_t gtMask = simd128<64>::gt(arg1, simd128<64>::constant<0>());
     1621        return simd128<1>::ifh(gtMask, arg1, simd128<64>::sub(gtMask, arg1));
     1622}
     1623
     1624//The total number of operations is 33
     1625template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1)
     1626{
     1627        bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1);
     1628        return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1));
     1629}
     1630
     1631//The total number of operations is 2
     1632template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1633{
     1634        return simd_not(simd_xor(arg1, arg2));
     1635}
     1636
     1637//The total number of operations is 8
     1638template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1639{
     1640        bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
     1641        bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
     1642        bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
     1643        return simd_or(loMask, hiMask);
     1644}
     1645
     1646//The total number of operations is 9
     1647template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1648{
     1649        return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2))));
     1650}
     1651
     1652//The total number of operations is 1
     1653template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1654{
     1655        return _mm_cmpeq_epi8(arg1, arg2);
     1656}
     1657
     1658//The total number of operations is 1
     1659template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1660{
     1661        return _mm_cmpeq_epi16(arg1, arg2);
     1662}
     1663
     1664//The total number of operations is 1
     1665template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1666{
     1667        return _mm_cmpeq_epi32(arg1, arg2);
     1668}
     1669
     1670//The total number of operations is 1
     1671template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1672{
     1673        return _mm_cmpeq_epi64(arg1, arg2);
     1674}
     1675
     1676//The total number of operations is 11
     1677template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1678{
     1679        bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2);
     1680        bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns));
     1681        bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask);
     1682        return simd_or(loMask, hiMask);
     1683}
     1684
     1685//The total number of operations is 4
     1686template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)
     1687{
     1688        return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));
     1689}
     1690
     1691//The total number of operations is 10
     1692template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)
     1693{
     1694        bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
     1695        return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     1696}
     1697
     1698//The total number of operations is 5
     1699template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
     1700{
     1701        bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
     1702        return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     1703}
     1704
     1705//The total number of operations is 1
     1706template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
     1707{
     1708        return _mm_srai_epi16(arg1, (int32_t)(sh));
     1709}
     1710
     1711//The total number of operations is 1
     1712template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
     1713{
     1714        return _mm_srai_epi32(arg1, (int32_t)(sh));
     1715}
     1716
     1717//The total number of operations is 5
     1718template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
     1719{
     1720        bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
     1721        return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
     1722}
     1723
     1724//The total number of operations is 21
     1725template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
     1726{
     1727        bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
     1728        return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
     1729}
     1730
     1731//The total number of operations is 0
     1732template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask()
     1733{
     1734        return simd128<2>::constant<(2)>();
     1735}
     1736
     1737//The total number of operations is 0
     1738template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask()
     1739{
     1740        return simd128<4>::constant<(12)>();
     1741}
     1742
     1743//The total number of operations is 0
     1744template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::himask()
     1745{
     1746        return simd128<8>::constant<(240)>();
     1747}
     1748
     1749//The total number of operations is 0
     1750template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::himask()
     1751{
     1752        return simd128<16>::constant<(65280)>();
     1753}
     1754
     1755//The total number of operations is 0
     1756template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask()
     1757{
     1758        return simd128<32>::constant<-65536>();
     1759}
     1760
     1761//The total number of operations is 0
     1762template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask()
     1763{
     1764        return _mm_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0));
     1765}
     1766
     1767//The total number of operations is 0
     1768template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask()
     1769{
     1770        return _mm_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0));
     1771}
     1772
     1773//The total number of operations is 1
     1774template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
     1775{
     1776        return simd_xor(arg1, arg2);
     1777}
     1778
     1779//The total number of operations is 10
     1780template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2)
     1781{
     1782        bitblock128_t tmp = simd_xor(arg1, arg2);
     1783        return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(arg1, arg2))), tmp);
     1784}
     1785
     1786//The total number of operations is 6
     1787template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2)
     1788{
     1789        return simd128<1>::ifh(simd128<(8)>::himask(), simd128<(8)>::add(arg1, simd_and(simd128<(8)>::himask(), arg2)), simd128<(8)>::add(arg1, arg2));
     1790}
     1791
     1792//The total number of operations is 1
     1793template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2)
     1794{
     1795        return _mm_add_epi8(arg1, arg2);
     1796}
     1797
     1798//The total number of operations is 1
     1799template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2)
     1800{
     1801        return _mm_add_epi16(arg1, arg2);
     1802}
     1803
     1804//The total number of operations is 1
     1805template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2)
     1806{
     1807        return _mm_add_epi32(arg1, arg2);
     1808}
     1809
     1810//The total number of operations is 1
     1811template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2)
     1812{
     1813        return _mm_add_epi64(arg1, arg2);
     1814}
     1815
     1816//The total number of operations is 11
     1817template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2)
     1818{
     1819        bitblock128_t partial = simd128<(64)>::add(arg1, arg2);
     1820        bitblock128_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial));
     1821        bitblock128_t carry = simd128<128>::slli<(64)>(simd128<(64)>::srli<(63)>(carryMask));
     1822        return simd128<(64)>::add(partial, carry);
     1823}
     1824
    15821825//The total number of operations is 1
    15831826template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2)
     
    16301873        bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(tmpAns, arg2));
    16311874        return simd128<1>::ifh(simd128<128>::himask(), tmpAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    1632 }
    1633 
    1634 //The total number of operations is 2
    1635 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1636 {
    1637         return simd_not(simd_xor(arg1, arg2));
    1638 }
    1639 
    1640 //The total number of operations is 8
    1641 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1642 {
    1643         bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
    1644         bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
    1645         bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
    1646         return simd_or(loMask, hiMask);
    1647 }
    1648 
    1649 //The total number of operations is 9
    1650 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1651 {
    1652         return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2))));
    1653 }
    1654 
    1655 //The total number of operations is 1
    1656 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1657 {
    1658         return _mm_cmpeq_epi8(arg1, arg2);
    1659 }
    1660 
    1661 //The total number of operations is 1
    1662 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1663 {
    1664         return _mm_cmpeq_epi16(arg1, arg2);
    1665 }
    1666 
    1667 //The total number of operations is 1
    1668 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1669 {
    1670         return _mm_cmpeq_epi32(arg1, arg2);
    1671 }
    1672 
    1673 //The total number of operations is 1
    1674 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1675 {
    1676         return _mm_cmpeq_epi64(arg1, arg2);
    1677 }
    1678 
    1679 //The total number of operations is 11
    1680 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1681 {
    1682         bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2);
    1683         bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns));
    1684         bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask);
    1685         return simd_or(loMask, hiMask);
    1686 }
    1687 
    1688 //The total number of operations is 4
    1689 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)
    1690 {
    1691         return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));
    1692 }
    1693 
    1694 //The total number of operations is 10
    1695 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)
    1696 {
    1697         bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
    1698         return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1699 }
    1700 
    1701 //The total number of operations is 5
    1702 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
    1703 {
    1704         bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
    1705         return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1706 }
    1707 
    1708 //The total number of operations is 1
    1709 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
    1710 {
    1711         return _mm_srai_epi16(arg1, (int32_t)(sh));
    1712 }
    1713 
    1714 //The total number of operations is 1
    1715 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
    1716 {
    1717         return _mm_srai_epi32(arg1, (int32_t)(sh));
    1718 }
    1719 
    1720 //The total number of operations is 5
    1721 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    1722 {
    1723         bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
    1724         return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd128<64>::constant<1>()), tmp)));
    1725 }
    1726 
    1727 //The total number of operations is 21
    1728 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    1729 {
    1730         bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
    1731         return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd128<128>::constant<1>()), tmp)));
    1732 }
    1733 
    1734 //The total number of operations is 0
    1735 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask()
    1736 {
    1737         return simd128<2>::constant<(2)>();
    1738 }
    1739 
    1740 //The total number of operations is 0
    1741 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask()
    1742 {
    1743         return simd128<4>::constant<(12)>();
    1744 }
    1745 
    1746 //The total number of operations is 0
    1747 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::himask()
    1748 {
    1749         return simd128<8>::constant<(240)>();
    1750 }
    1751 
    1752 //The total number of operations is 0
    1753 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::himask()
    1754 {
    1755         return simd128<16>::constant<(65280)>();
    1756 }
    1757 
    1758 //The total number of operations is 0
    1759 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask()
    1760 {
    1761         return simd128<32>::constant<-65536>();
    1762 }
    1763 
    1764 //The total number of operations is 0
    1765 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask()
    1766 {
    1767         return _mm_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0));
    1768 }
    1769 
    1770 //The total number of operations is 0
    1771 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask()
    1772 {
    1773         return _mm_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0));
    1774 }
    1775 
    1776 //The total number of operations is 1
    1777 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)
    1778 {
    1779         return simd_xor(arg1, arg2);
    1780 }
    1781 
    1782 //The total number of operations is 10
    1783 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2)
    1784 {
    1785         bitblock128_t tmp = simd_xor(arg1, arg2);
    1786         return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(arg1, arg2))), tmp);
    1787 }
    1788 
    1789 //The total number of operations is 6
    1790 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2)
    1791 {
    1792         return simd128<1>::ifh(simd128<(8)>::himask(), simd128<(8)>::add(arg1, simd_and(simd128<(8)>::himask(), arg2)), simd128<(8)>::add(arg1, arg2));
    1793 }
    1794 
    1795 //The total number of operations is 1
    1796 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2)
    1797 {
    1798         return _mm_add_epi8(arg1, arg2);
    1799 }
    1800 
    1801 //The total number of operations is 1
    1802 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2)
    1803 {
    1804         return _mm_add_epi16(arg1, arg2);
    1805 }
    1806 
    1807 //The total number of operations is 1
    1808 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2)
    1809 {
    1810         return _mm_add_epi32(arg1, arg2);
    1811 }
    1812 
    1813 //The total number of operations is 1
    1814 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2)
    1815 {
    1816         return _mm_add_epi64(arg1, arg2);
    1817 }
    1818 
    1819 //The total number of operations is 11
    1820 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2)
    1821 {
    1822         bitblock128_t partial = simd128<(64)>::add(arg1, arg2);
    1823         bitblock128_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial));
    1824         bitblock128_t carry = simd128<128>::slli<(64)>(simd128<(64)>::srli<(63)>(carryMask));
    1825         return simd128<(64)>::add(partial, carry);
    1826 }
    1827 
    1828 //The total number of operations is 9
    1829 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
    1830 {
    1831         return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1);
    1832 }
    1833 
    1834 //The total number of operations is 19
    1835 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1)
    1836 {
    1837         bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>());
    1838         return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1));
    1839 }
    1840 
    1841 //The total number of operations is 1
    1842 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1)
    1843 {
    1844         return _mm_abs_epi8(arg1);
    1845 }
    1846 
    1847 //The total number of operations is 1
    1848 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1)
    1849 {
    1850         return _mm_abs_epi16(arg1);
    1851 }
    1852 
    1853 //The total number of operations is 1
    1854 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1)
    1855 {
    1856         return _mm_abs_epi32(arg1);
    1857 }
    1858 
    1859 //The total number of operations is 5
    1860 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1)
    1861 {
    1862         bitblock128_t gtMask = simd128<64>::gt(arg1, simd128<64>::constant<0>());
    1863         return simd128<1>::ifh(gtMask, arg1, simd128<64>::sub(gtMask, arg1));
    1864 }
    1865 
    1866 //The total number of operations is 33
    1867 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1)
    1868 {
    1869         bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1);
    1870         return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1));
    18711875}
    18721876
     
    27552759}
    27562760
    2757 //The total number of operations is 4
    2758 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1)
    2759 {
    2760         return simd128<128>::slli<(sh*2)>(arg1);
    2761 }
    2762 
    2763 //The total number of operations is 4
    2764 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1)
    2765 {
    2766         return simd128<128>::slli<(sh*4)>(arg1);
    2767 }
    2768 
    2769 //The total number of operations is 4
    2770 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1)
    2771 {
    2772         return simd128<128>::slli<(sh*8)>(arg1);
    2773 }
    2774 
    2775 //The total number of operations is 4
    2776 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1)
    2777 {
    2778         return simd128<128>::slli<(sh*16)>(arg1);
    2779 }
    2780 
    2781 //The total number of operations is 4
    2782 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1)
    2783 {
    2784         return simd128<128>::slli<(sh*32)>(arg1);
    2785 }
    2786 
    2787 //The total number of operations is 4
    2788 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1)
    2789 {
    2790         return simd128<128>::slli<(sh*64)>(arg1);
    2791 }
    2792 
    2793 //The total number of operations is 4
    2794 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1)
    2795 {
    2796         return simd128<128>::slli<(sh*128)>(arg1);
    2797 }
    2798 
    27992761//The total number of operations is 5
    28002762template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     
    29592921}
    29602922
     2923//The total number of operations is 4
     2924template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1)
     2925{
     2926        return simd128<128>::slli<(sh*2)>(arg1);
     2927}
     2928
     2929//The total number of operations is 4
     2930template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1)
     2931{
     2932        return simd128<128>::slli<(sh*4)>(arg1);
     2933}
     2934
     2935//The total number of operations is 4
     2936template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1)
     2937{
     2938        return simd128<128>::slli<(sh*8)>(arg1);
     2939}
     2940
     2941//The total number of operations is 4
     2942template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1)
     2943{
     2944        return simd128<128>::slli<(sh*16)>(arg1);
     2945}
     2946
     2947//The total number of operations is 4
     2948template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1)
     2949{
     2950        return simd128<128>::slli<(sh*32)>(arg1);
     2951}
     2952
     2953//The total number of operations is 4
     2954template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1)
     2955{
     2956        return simd128<128>::slli<(sh*64)>(arg1);
     2957}
     2958
     2959//The total number of operations is 4
     2960template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1)
     2961{
     2962        return simd128<128>::slli<(sh*128)>(arg1);
     2963}
     2964
    29612965//The total number of operations is 13
    29622966template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
     
    29892993}
    29902994
     2995//The total number of operations is 11
     2996IDISA_ALWAYS_INLINE bitblock128_t bitblock128::sll(bitblock128_t arg1, bitblock128_t arg2)
     2997{
     2998        return simd128<128>::sll(arg1, arg2);
     2999}
     3000
    29913001//The total number of operations is 1
    29923002IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_unaligned(bitblock128_t* arg1)
    29933003{
    29943004        return _mm_loadu_si128((bitblock128_t*)(arg1));
     3005}
     3006
     3007//The total number of operations is 4
     3008template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srli(bitblock128_t arg1)
     3009{
     3010        return simd128<128>::srli<sh>(arg1);
    29953011}
    29963012
     
    30133029}
    30143030
     3031//The total number of operations is 11
     3032IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srl(bitblock128_t arg1, bitblock128_t arg2)
     3033{
     3034        return simd128<128>::srl(arg1, arg2);
     3035}
     3036
     3037//The total number of operations is 4
     3038template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::slli(bitblock128_t arg1)
     3039{
     3040        return simd128<128>::slli<sh>(arg1);
     3041}
     3042
    30153043//The total number of operations is 2
    30163044IDISA_ALWAYS_INLINE bool bitblock128::any(bitblock128_t arg1)
Note: See TracChangeset for help on using the changeset viewer.