Changeset 1580 for trunk/lib/idisa_cpp/idisa_sse4_1.cpp
 Timestamp:
 Oct 23, 2011, 9:43:33 AM (8 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/idisa_cpp/idisa_sse4_1.cpp
r1573 r1580 28 28 static IDISA_ALWAYS_INLINE bitblock128_t add_hl(bitblock128_t arg1); 29 29 static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t shift_mask); 30 static IDISA_ALWAYS_INLINE bitblock128_t lomask(); 30 31 static IDISA_ALWAYS_INLINE bitblock128_t umin(bitblock128_t arg1, bitblock128_t arg2); 31 32 template <uint64_t val> static IDISA_ALWAYS_INLINE bitblock128_t constant(); 32 33 static IDISA_ALWAYS_INLINE bitblock128_t min(bitblock128_t arg1, bitblock128_t arg2); 33 static IDISA_ALWAYS_INLINE bitblock128_t lomask();34 34 static IDISA_ALWAYS_INLINE bitblock128_t umax(bitblock128_t arg1, bitblock128_t arg2); 35 35 static IDISA_ALWAYS_INLINE bitblock128_t abs(bitblock128_t arg1); … … 89 89 { 90 90 public: 91 static IDISA_ALWAYS_INLINE bitblock128_t sll(bitblock128_t arg1, bitblock128_t arg2); 91 92 static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(bitblock128_t* arg1); 93 template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1); 94 static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t arg2); 92 95 static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t* arg1, bitblock128_t arg2); 93 96 static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1); 94 97 static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1); 95 98 static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1); 99 template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1); 96 100 static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(bitblock128_t* arg1); 97 101 static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t* arg1, bitblock128_t arg2); … … 231 235 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srl(bitblock128_t arg1, bitblock128_t shift_mask); 232 236 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srl(bitblock128_t arg1, bitblock128_t shift_mask); 233 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask();234 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask();235 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask();236 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask();237 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask();238 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask();239 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask();240 237 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant(); 241 238 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant(); … … 254 251 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::min(bitblock128_t arg1, bitblock128_t arg2); 255 252 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2); 253 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask(); 254 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask(); 255 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask(); 256 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask(); 257 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask(); 258 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask(); 259 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask(); 256 260 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2); 257 261 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umin(bitblock128_t arg1, bitblock128_t arg2); … … 262 266 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umin(bitblock128_t arg1, bitblock128_t arg2); 263 267 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umin(bitblock128_t arg1, bitblock128_t arg2); 264 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2); 265 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2); 266 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2); 267 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2); 268 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2); 269 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2); 270 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2); 271 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2); 268 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1); 269 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1); 270 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1); 271 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1); 272 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1); 273 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1); 274 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1); 272 275 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2); 273 276 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2); … … 300 303 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2); 301 304 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2); 302 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1); 303 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1); 304 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1); 305 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1); 306 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1); 307 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1); 308 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1); 305 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2); 306 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2); 307 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2); 308 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2); 309 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2); 310 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2); 311 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2); 312 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2); 309 313 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2); 310 314 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::umin_hl(bitblock128_t arg1, bitblock128_t arg2); … … 445 449 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16); 446 450 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16); 447 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1);448 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1);449 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1);450 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1);451 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1);452 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1);453 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1);454 451 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4); 455 452 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4); … … 479 476 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dslli(bitblock128_t arg1, bitblock128_t arg2); 480 477 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dslli(bitblock128_t arg1, bitblock128_t arg2); 478 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1); 479 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1); 480 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1); 481 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1); 482 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1); 483 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1); 484 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1); 481 485 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8); 482 486 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8); … … 1396 1400 1397 1401 //The total number of operations is 0 1398 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask()1399 {1400 return simd128<2>::constant<(1)>();1401 }1402 1403 //The total number of operations is 01404 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask()1405 {1406 return simd128<4>::constant<(3)>();1407 }1408 1409 //The total number of operations is 01410 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask()1411 {1412 return simd128<8>::constant<(15)>();1413 }1414 1415 //The total number of operations is 01416 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask()1417 {1418 return simd128<16>::constant<(255)>();1419 }1420 1421 //The total number of operations is 01422 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask()1423 {1424 return simd128<32>::constant<(65535)>();1425 }1426 1427 //The total number of operations is 01428 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask()1429 {1430 return _mm_set_epi32((int32_t)(0), (int32_t)(1), (int32_t)(0), (int32_t)(1));1431 }1432 1433 //The total number of operations is 01434 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask()1435 {1436 return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(1), (int32_t)(1));1437 }1438 1439 //The total number of operations is 01440 1402 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant() 1441 1403 { … … 1542 1504 } 1543 1505 1506 //The total number of operations is 0 1507 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask() 1508 { 1509 return simd128<2>::constant<(1)>(); 1510 } 1511 1512 //The total number of operations is 0 1513 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask() 1514 { 1515 return simd128<4>::constant<(3)>(); 1516 } 1517 1518 //The total number of operations is 0 1519 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask() 1520 { 1521 return simd128<8>::constant<(15)>(); 1522 } 1523 1524 //The total number of operations is 0 1525 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask() 1526 { 1527 return simd128<16>::constant<(255)>(); 1528 } 1529 1530 //The total number of operations is 0 1531 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask() 1532 { 1533 return simd128<32>::constant<(65535)>(); 1534 } 1535 1536 //The total number of operations is 0 1537 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask() 1538 { 1539 return _mm_set_epi32((int32_t)(0), (int32_t)(1), (int32_t)(0), (int32_t)(1)); 1540 } 1541 1542 //The total number of operations is 0 1543 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask() 1544 { 1545 return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(1), (int32_t)(1)); 1546 } 1547 1544 1548 //The total number of operations is 1 1545 1549 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2) … … 1596 1600 } 1597 1601 1602 //The total number of operations is 9 1603 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1) 1604 { 1605 return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1); 1606 } 1607 1608 //The total number of operations is 19 1609 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1) 1610 { 1611 bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>()); 1612 return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1)); 1613 } 1614 1615 //The total number of operations is 1 1616 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1) 1617 { 1618 return _mm_abs_epi8(arg1); 1619 } 1620 1621 //The total number of operations is 1 1622 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1) 1623 { 1624 return _mm_abs_epi16(arg1); 1625 } 1626 1627 //The total number of operations is 1 1628 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1) 1629 { 1630 return _mm_abs_epi32(arg1); 1631 } 1632 1633 //The total number of operations is 9 1634 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1) 1635 { 1636 bitblock128_t eqMask = simd128<64>::eq(simd128<1>::ifh(simd128<64>::himask(), simd128<(32)>::abs(arg1), arg1), arg1); 1637 return simd128<1>::ifh(eqMask, arg1, simd128<64>::sub(eqMask, arg1)); 1638 } 1639 1640 //The total number of operations is 37 1641 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1) 1642 { 1643 bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1); 1644 return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1)); 1645 } 1646 1647 //The total number of operations is 2 1648 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2) 1649 { 1650 return simd_not(simd_xor(arg1, arg2)); 1651 } 1652 1653 //The total number of operations is 8 1654 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2) 1655 { 1656 bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2); 1657 bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns)); 1658 bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask); 1659 return simd_or(loMask, hiMask); 1660 } 1661 1662 //The total number of operations is 9 1663 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2) 1664 { 1665 return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2)))); 1666 } 1667 1668 //The total number of operations is 1 1669 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2) 1670 { 1671 return _mm_cmpeq_epi8(arg1, arg2); 1672 } 1673 1674 //The total number of operations is 1 1675 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2) 1676 { 1677 return _mm_cmpeq_epi16(arg1, arg2); 1678 } 1679 1680 //The total number of operations is 1 1681 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2) 1682 { 1683 return _mm_cmpeq_epi32(arg1, arg2); 1684 } 1685 1686 //The total number of operations is 1 1687 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2) 1688 { 1689 return _mm_cmpeq_epi64(arg1, arg2); 1690 } 1691 1692 //The total number of operations is 11 1693 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2) 1694 { 1695 bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2); 1696 bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns)); 1697 bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask); 1698 return simd_or(loMask, hiMask); 1699 } 1700 1701 //The total number of operations is 4 1702 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1) 1703 { 1704 return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1))); 1705 } 1706 1707 //The total number of operations is 10 1708 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1) 1709 { 1710 bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1); 1711 return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))1))>(), tmp))); 1712 } 1713 1714 //The total number of operations is 5 1715 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1) 1716 { 1717 bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1); 1718 return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))1))>(), tmp))); 1719 } 1720 1721 //The total number of operations is 1 1722 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1) 1723 { 1724 return _mm_srai_epi16(arg1, (int32_t)(sh)); 1725 } 1726 1727 //The total number of operations is 1 1728 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1) 1729 { 1730 return _mm_srai_epi32(arg1, (int32_t)(sh)); 1731 } 1732 1733 //The total number of operations is 5 1734 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1) 1735 { 1736 bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1); 1737 return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))1)>(simd128<64>::constant<1>()), tmp))); 1738 } 1739 1740 //The total number of operations is 21 1741 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1) 1742 { 1743 bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1); 1744 return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))1)>(simd128<128>::constant<1>()), tmp))); 1745 } 1746 1747 //The total number of operations is 0 1748 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask() 1749 { 1750 return simd128<2>::constant<(2)>(); 1751 } 1752 1753 //The total number of operations is 0 1754 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask() 1755 { 1756 return simd128<4>::constant<(12)>(); 1757 } 1758 1759 //The total number of operations is 0 1760 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::himask() 1761 { 1762 return simd128<8>::constant<(240)>(); 1763 } 1764 1765 //The total number of operations is 0 1766 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::himask() 1767 { 1768 return simd128<16>::constant<(65280)>(); 1769 } 1770 1771 //The total number of operations is 0 1772 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask() 1773 { 1774 return simd128<32>::constant<65536>(); 1775 } 1776 1777 //The total number of operations is 0 1778 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask() 1779 { 1780 return _mm_set_epi32((int32_t)(1), (int32_t)(0), (int32_t)(1), (int32_t)(0)); 1781 } 1782 1783 //The total number of operations is 0 1784 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask() 1785 { 1786 return _mm_set_epi32((int32_t)(1), (int32_t)(1), (int32_t)(0), (int32_t)(0)); 1787 } 1788 1789 //The total number of operations is 1 1790 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2) 1791 { 1792 return simd_xor(arg1, arg2); 1793 } 1794 1795 //The total number of operations is 10 1796 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2) 1797 { 1798 bitblock128_t tmp = simd_xor(arg1, arg2); 1799 return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(arg1, arg2))), tmp); 1800 } 1801 1802 //The total number of operations is 6 1803 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2) 1804 { 1805 return simd128<1>::ifh(simd128<(8)>::himask(), simd128<(8)>::add(arg1, simd_and(simd128<(8)>::himask(), arg2)), simd128<(8)>::add(arg1, arg2)); 1806 } 1807 1808 //The total number of operations is 1 1809 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2) 1810 { 1811 return _mm_add_epi8(arg1, arg2); 1812 } 1813 1814 //The total number of operations is 1 1815 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2) 1816 { 1817 return _mm_add_epi16(arg1, arg2); 1818 } 1819 1820 //The total number of operations is 1 1821 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2) 1822 { 1823 return _mm_add_epi32(arg1, arg2); 1824 } 1825 1826 //The total number of operations is 1 1827 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2) 1828 { 1829 return _mm_add_epi64(arg1, arg2); 1830 } 1831 1832 //The total number of operations is 11 1833 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2) 1834 { 1835 bitblock128_t partial = simd128<(64)>::add(arg1, arg2); 1836 bitblock128_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial)); 1837 bitblock128_t carry = simd128<128>::slli<(64)>(simd128<(64)>::srli<(63)>(carryMask)); 1838 return simd128<(64)>::add(partial, carry); 1839 } 1840 1598 1841 //The total number of operations is 1 1599 1842 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2) … … 1648 1891 bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(tmpAns, arg2)); 1649 1892 return simd128<1>::ifh(simd128<128>::himask(), tmpAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, tmpAns, arg1), arg2)); 1650 }1651 1652 //The total number of operations is 21653 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)1654 {1655 return simd_not(simd_xor(arg1, arg2));1656 }1657 1658 //The total number of operations is 81659 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)1660 {1661 bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);1662 bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));1663 bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);1664 return simd_or(loMask, hiMask);1665 }1666 1667 //The total number of operations is 91668 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2)1669 {1670 return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2))));1671 }1672 1673 //The total number of operations is 11674 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2)1675 {1676 return _mm_cmpeq_epi8(arg1, arg2);1677 }1678 1679 //The total number of operations is 11680 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2)1681 {1682 return _mm_cmpeq_epi16(arg1, arg2);1683 }1684 1685 //The total number of operations is 11686 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2)1687 {1688 return _mm_cmpeq_epi32(arg1, arg2);1689 }1690 1691 //The total number of operations is 11692 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2)1693 {1694 return _mm_cmpeq_epi64(arg1, arg2);1695 }1696 1697 //The total number of operations is 111698 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2)1699 {1700 bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2);1701 bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns));1702 bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask);1703 return simd_or(loMask, hiMask);1704 }1705 1706 //The total number of operations is 41707 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)1708 {1709 return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));1710 }1711 1712 //The total number of operations is 101713 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)1714 {1715 bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);1716 return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))1))>(), tmp)));1717 }1718 1719 //The total number of operations is 51720 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)1721 {1722 bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);1723 return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))1))>(), tmp)));1724 }1725 1726 //The total number of operations is 11727 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)1728 {1729 return _mm_srai_epi16(arg1, (int32_t)(sh));1730 }1731 1732 //The total number of operations is 11733 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)1734 {1735 return _mm_srai_epi32(arg1, (int32_t)(sh));1736 }1737 1738 //The total number of operations is 51739 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)1740 {1741 bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);1742 return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))1)>(simd128<64>::constant<1>()), tmp)));1743 }1744 1745 //The total number of operations is 211746 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)1747 {1748 bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);1749 return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))1)>(simd128<128>::constant<1>()), tmp)));1750 }1751 1752 //The total number of operations is 01753 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask()1754 {1755 return simd128<2>::constant<(2)>();1756 }1757 1758 //The total number of operations is 01759 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask()1760 {1761 return simd128<4>::constant<(12)>();1762 }1763 1764 //The total number of operations is 01765 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::himask()1766 {1767 return simd128<8>::constant<(240)>();1768 }1769 1770 //The total number of operations is 01771 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::himask()1772 {1773 return simd128<16>::constant<(65280)>();1774 }1775 1776 //The total number of operations is 01777 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask()1778 {1779 return simd128<32>::constant<65536>();1780 }1781 1782 //The total number of operations is 01783 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask()1784 {1785 return _mm_set_epi32((int32_t)(1), (int32_t)(0), (int32_t)(1), (int32_t)(0));1786 }1787 1788 //The total number of operations is 01789 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask()1790 {1791 return _mm_set_epi32((int32_t)(1), (int32_t)(1), (int32_t)(0), (int32_t)(0));1792 }1793 1794 //The total number of operations is 11795 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)1796 {1797 return simd_xor(arg1, arg2);1798 }1799 1800 //The total number of operations is 101801 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2)1802 {1803 bitblock128_t tmp = simd_xor(arg1, arg2);1804 return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(arg1, arg2))), tmp);1805 }1806 1807 //The total number of operations is 61808 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2)1809 {1810 return simd128<1>::ifh(simd128<(8)>::himask(), simd128<(8)>::add(arg1, simd_and(simd128<(8)>::himask(), arg2)), simd128<(8)>::add(arg1, arg2));1811 }1812 1813 //The total number of operations is 11814 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2)1815 {1816 return _mm_add_epi8(arg1, arg2);1817 }1818 1819 //The total number of operations is 11820 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2)1821 {1822 return _mm_add_epi16(arg1, arg2);1823 }1824 1825 //The total number of operations is 11826 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2)1827 {1828 return _mm_add_epi32(arg1, arg2);1829 }1830 1831 //The total number of operations is 11832 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2)1833 {1834 return _mm_add_epi64(arg1, arg2);1835 }1836 1837 //The total number of operations is 111838 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2)1839 {1840 bitblock128_t partial = simd128<(64)>::add(arg1, arg2);1841 bitblock128_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial));1842 bitblock128_t carry = simd128<128>::slli<(64)>(simd128<(64)>::srli<(63)>(carryMask));1843 return simd128<(64)>::add(partial, carry);1844 }1845 1846 //The total number of operations is 91847 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)1848 {1849 return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1);1850 }1851 1852 //The total number of operations is 191853 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1)1854 {1855 bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>());1856 return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1));1857 }1858 1859 //The total number of operations is 11860 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1)1861 {1862 return _mm_abs_epi8(arg1);1863 }1864 1865 //The total number of operations is 11866 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1)1867 {1868 return _mm_abs_epi16(arg1);1869 }1870 1871 //The total number of operations is 11872 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1)1873 {1874 return _mm_abs_epi32(arg1);1875 }1876 1877 //The total number of operations is 91878 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1)1879 {1880 bitblock128_t eqMask = simd128<64>::eq(simd128<1>::ifh(simd128<64>::himask(), simd128<(32)>::abs(arg1), arg1), arg1);1881 return simd128<1>::ifh(eqMask, arg1, simd128<64>::sub(eqMask, arg1));1882 }1883 1884 //The total number of operations is 371885 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1)1886 {1887 bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1);1888 return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1));1889 1893 } 1890 1894 … … 2773 2777 } 2774 2778 2775 //The total number of operations is 42776 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1)2777 {2778 return simd128<128>::slli<(sh*2)>(arg1);2779 }2780 2781 //The total number of operations is 42782 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1)2783 {2784 return simd128<128>::slli<(sh*4)>(arg1);2785 }2786 2787 //The total number of operations is 42788 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1)2789 {2790 return simd128<128>::slli<(sh*8)>(arg1);2791 }2792 2793 //The total number of operations is 42794 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1)2795 {2796 return simd128<128>::slli<(sh*16)>(arg1);2797 }2798 2799 //The total number of operations is 42800 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1)2801 {2802 return simd128<128>::slli<(sh*32)>(arg1);2803 }2804 2805 //The total number of operations is 42806 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1)2807 {2808 return simd128<128>::slli<(sh*64)>(arg1);2809 }2810 2811 //The total number of operations is 42812 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1)2813 {2814 return simd128<128>::slli<(sh*128)>(arg1);2815 }2816 2817 2779 //The total number of operations is 5 2818 2780 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4) … … 2977 2939 } 2978 2940 2941 //The total number of operations is 4 2942 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1) 2943 { 2944 return simd128<128>::slli<(sh*2)>(arg1); 2945 } 2946 2947 //The total number of operations is 4 2948 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1) 2949 { 2950 return simd128<128>::slli<(sh*4)>(arg1); 2951 } 2952 2953 //The total number of operations is 4 2954 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1) 2955 { 2956 return simd128<128>::slli<(sh*8)>(arg1); 2957 } 2958 2959 //The total number of operations is 4 2960 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1) 2961 { 2962 return simd128<128>::slli<(sh*16)>(arg1); 2963 } 2964 2965 //The total number of operations is 4 2966 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1) 2967 { 2968 return simd128<128>::slli<(sh*32)>(arg1); 2969 } 2970 2971 //The total number of operations is 4 2972 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1) 2973 { 2974 return simd128<128>::slli<(sh*64)>(arg1); 2975 } 2976 2977 //The total number of operations is 4 2978 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1) 2979 { 2980 return simd128<128>::slli<(sh*128)>(arg1); 2981 } 2982 2979 2983 //The total number of operations is 13 2980 2984 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8) … … 3007 3011 } 3008 3012 3013 //The total number of operations is 11 3014 IDISA_ALWAYS_INLINE bitblock128_t bitblock128::sll(bitblock128_t arg1, bitblock128_t arg2) 3015 { 3016 return simd128<128>::sll(arg1, arg2); 3017 } 3018 3009 3019 //The total number of operations is 1 3010 3020 IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_unaligned(bitblock128_t* arg1) 3011 3021 { 3012 3022 return _mm_loadu_si128((bitblock128_t*)(arg1)); 3023 } 3024 3025 //The total number of operations is 4 3026 template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srli(bitblock128_t arg1) 3027 { 3028 return simd128<128>::srli<sh>(arg1); 3013 3029 } 3014 3030 … … 3031 3047 } 3032 3048 3049 //The total number of operations is 11 3050 IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srl(bitblock128_t arg1, bitblock128_t arg2) 3051 { 3052 return simd128<128>::srl(arg1, arg2); 3053 } 3054 3055 //The total number of operations is 4 3056 template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::slli(bitblock128_t arg1) 3057 { 3058 return simd128<128>::slli<sh>(arg1); 3059 } 3060 3033 3061 //The total number of operations is 2 3034 3062 IDISA_ALWAYS_INLINE bool bitblock128::any(bitblock128_t arg1)
Note: See TracChangeset
for help on using the changeset viewer.