Changeset 1580 for trunk/lib/idisa_cpp/idisa_sse4_2.cpp
 Timestamp:
 Oct 23, 2011, 9:43:33 AM (8 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/idisa_cpp/idisa_sse4_2.cpp
r1573 r1580 28 28 static IDISA_ALWAYS_INLINE bitblock128_t add_hl(bitblock128_t arg1); 29 29 static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t shift_mask); 30 static IDISA_ALWAYS_INLINE bitblock128_t lomask(); 30 31 static IDISA_ALWAYS_INLINE bitblock128_t umin(bitblock128_t arg1, bitblock128_t arg2); 31 32 template <uint64_t val> static IDISA_ALWAYS_INLINE bitblock128_t constant(); 32 33 static IDISA_ALWAYS_INLINE bitblock128_t min(bitblock128_t arg1, bitblock128_t arg2); 33 static IDISA_ALWAYS_INLINE bitblock128_t lomask();34 34 static IDISA_ALWAYS_INLINE bitblock128_t umax(bitblock128_t arg1, bitblock128_t arg2); 35 35 static IDISA_ALWAYS_INLINE bitblock128_t abs(bitblock128_t arg1); … … 89 89 { 90 90 public: 91 static IDISA_ALWAYS_INLINE bitblock128_t sll(bitblock128_t arg1, bitblock128_t arg2); 91 92 static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(bitblock128_t* arg1); 93 template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1); 94 static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t arg2); 92 95 static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t* arg1, bitblock128_t arg2); 93 96 static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1); 94 97 static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1); 95 98 static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1); 99 template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1); 96 100 static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(bitblock128_t* arg1); 97 101 static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t* arg1, bitblock128_t arg2); … … 231 235 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srl(bitblock128_t arg1, bitblock128_t shift_mask); 232 236 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srl(bitblock128_t arg1, bitblock128_t shift_mask); 233 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask();234 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask();235 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask();236 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask();237 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask();238 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask();239 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask();240 237 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant(); 241 238 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant(); … … 254 251 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::min(bitblock128_t arg1, bitblock128_t arg2); 255 252 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2); 253 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask(); 254 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask(); 255 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask(); 256 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask(); 257 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask(); 258 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask(); 259 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask(); 256 260 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2); 257 261 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umin(bitblock128_t arg1, bitblock128_t arg2); … … 262 266 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umin(bitblock128_t arg1, bitblock128_t arg2); 263 267 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umin(bitblock128_t arg1, bitblock128_t arg2); 264 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2); 265 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2); 266 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2); 267 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2); 268 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2); 269 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2); 270 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2); 271 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2); 268 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1); 269 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1); 270 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1); 271 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1); 272 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1); 273 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1); 274 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1); 272 275 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2); 273 276 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2); … … 300 303 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2); 301 304 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2); 302 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1); 303 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1); 304 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1); 305 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1); 306 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1); 307 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1); 308 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1); 305 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2); 306 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2); 307 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2); 308 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2); 309 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2); 310 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2); 311 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2); 312 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2); 309 313 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2); 310 314 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::umin_hl(bitblock128_t arg1, bitblock128_t arg2); … … 445 449 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16); 446 450 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16); 447 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1);448 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1);449 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1);450 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1);451 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1);452 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1);453 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1);454 451 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4); 455 452 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4); … … 479 476 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dslli(bitblock128_t arg1, bitblock128_t arg2); 480 477 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dslli(bitblock128_t arg1, bitblock128_t arg2); 478 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1); 479 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1); 480 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1); 481 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1); 482 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1); 483 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1); 484 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1); 481 485 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8); 482 486 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8); … … 1386 1390 1387 1391 //The total number of operations is 0 1388 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask()1389 {1390 return simd128<2>::constant<(1)>();1391 }1392 1393 //The total number of operations is 01394 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask()1395 {1396 return simd128<4>::constant<(3)>();1397 }1398 1399 //The total number of operations is 01400 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask()1401 {1402 return simd128<8>::constant<(15)>();1403 }1404 1405 //The total number of operations is 01406 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask()1407 {1408 return simd128<16>::constant<(255)>();1409 }1410 1411 //The total number of operations is 01412 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask()1413 {1414 return simd128<32>::constant<(65535)>();1415 }1416 1417 //The total number of operations is 01418 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask()1419 {1420 return _mm_set_epi32((int32_t)(0), (int32_t)(1), (int32_t)(0), (int32_t)(1));1421 }1422 1423 //The total number of operations is 01424 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask()1425 {1426 return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(1), (int32_t)(1));1427 }1428 1429 //The total number of operations is 01430 1392 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant() 1431 1393 { … … 1528 1490 } 1529 1491 1492 //The total number of operations is 0 1493 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask() 1494 { 1495 return simd128<2>::constant<(1)>(); 1496 } 1497 1498 //The total number of operations is 0 1499 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask() 1500 { 1501 return simd128<4>::constant<(3)>(); 1502 } 1503 1504 //The total number of operations is 0 1505 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask() 1506 { 1507 return simd128<8>::constant<(15)>(); 1508 } 1509 1510 //The total number of operations is 0 1511 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask() 1512 { 1513 return simd128<16>::constant<(255)>(); 1514 } 1515 1516 //The total number of operations is 0 1517 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask() 1518 { 1519 return simd128<32>::constant<(65535)>(); 1520 } 1521 1522 //The total number of operations is 0 1523 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask() 1524 { 1525 return _mm_set_epi32((int32_t)(0), (int32_t)(1), (int32_t)(0), (int32_t)(1)); 1526 } 1527 1528 //The total number of operations is 0 1529 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask() 1530 { 1531 return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(1), (int32_t)(1)); 1532 } 1533 1530 1534 //The total number of operations is 1 1531 1535 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2) … … 1580 1584 } 1581 1585 1586 //The total number of operations is 9 1587 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1) 1588 { 1589 return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1); 1590 } 1591 1592 //The total number of operations is 19 1593 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1) 1594 { 1595 bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>()); 1596 return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1)); 1597 } 1598 1599 //The total number of operations is 1 1600 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1) 1601 { 1602 return _mm_abs_epi8(arg1); 1603 } 1604 1605 //The total number of operations is 1 1606 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1) 1607 { 1608 return _mm_abs_epi16(arg1); 1609 } 1610 1611 //The total number of operations is 1 1612 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1) 1613 { 1614 return _mm_abs_epi32(arg1); 1615 } 1616 1617 //The total number of operations is 5 1618 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1) 1619 { 1620 bitblock128_t gtMask = simd128<64>::gt(arg1, simd128<64>::constant<0>()); 1621 return simd128<1>::ifh(gtMask, arg1, simd128<64>::sub(gtMask, arg1)); 1622 } 1623 1624 //The total number of operations is 33 1625 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1) 1626 { 1627 bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1); 1628 return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1)); 1629 } 1630 1631 //The total number of operations is 2 1632 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2) 1633 { 1634 return simd_not(simd_xor(arg1, arg2)); 1635 } 1636 1637 //The total number of operations is 8 1638 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2) 1639 { 1640 bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2); 1641 bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns)); 1642 bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask); 1643 return simd_or(loMask, hiMask); 1644 } 1645 1646 //The total number of operations is 9 1647 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2) 1648 { 1649 return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2)))); 1650 } 1651 1652 //The total number of operations is 1 1653 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2) 1654 { 1655 return _mm_cmpeq_epi8(arg1, arg2); 1656 } 1657 1658 //The total number of operations is 1 1659 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2) 1660 { 1661 return _mm_cmpeq_epi16(arg1, arg2); 1662 } 1663 1664 //The total number of operations is 1 1665 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2) 1666 { 1667 return _mm_cmpeq_epi32(arg1, arg2); 1668 } 1669 1670 //The total number of operations is 1 1671 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2) 1672 { 1673 return _mm_cmpeq_epi64(arg1, arg2); 1674 } 1675 1676 //The total number of operations is 11 1677 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2) 1678 { 1679 bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2); 1680 bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns)); 1681 bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask); 1682 return simd_or(loMask, hiMask); 1683 } 1684 1685 //The total number of operations is 4 1686 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1) 1687 { 1688 return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1))); 1689 } 1690 1691 //The total number of operations is 10 1692 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1) 1693 { 1694 bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1); 1695 return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))1))>(), tmp))); 1696 } 1697 1698 //The total number of operations is 5 1699 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1) 1700 { 1701 bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1); 1702 return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))1))>(), tmp))); 1703 } 1704 1705 //The total number of operations is 1 1706 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1) 1707 { 1708 return _mm_srai_epi16(arg1, (int32_t)(sh)); 1709 } 1710 1711 //The total number of operations is 1 1712 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1) 1713 { 1714 return _mm_srai_epi32(arg1, (int32_t)(sh)); 1715 } 1716 1717 //The total number of operations is 5 1718 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1) 1719 { 1720 bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1); 1721 return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))1)>(simd128<64>::constant<1>()), tmp))); 1722 } 1723 1724 //The total number of operations is 21 1725 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1) 1726 { 1727 bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1); 1728 return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))1)>(simd128<128>::constant<1>()), tmp))); 1729 } 1730 1731 //The total number of operations is 0 1732 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask() 1733 { 1734 return simd128<2>::constant<(2)>(); 1735 } 1736 1737 //The total number of operations is 0 1738 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask() 1739 { 1740 return simd128<4>::constant<(12)>(); 1741 } 1742 1743 //The total number of operations is 0 1744 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::himask() 1745 { 1746 return simd128<8>::constant<(240)>(); 1747 } 1748 1749 //The total number of operations is 0 1750 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::himask() 1751 { 1752 return simd128<16>::constant<(65280)>(); 1753 } 1754 1755 //The total number of operations is 0 1756 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask() 1757 { 1758 return simd128<32>::constant<65536>(); 1759 } 1760 1761 //The total number of operations is 0 1762 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask() 1763 { 1764 return _mm_set_epi32((int32_t)(1), (int32_t)(0), (int32_t)(1), (int32_t)(0)); 1765 } 1766 1767 //The total number of operations is 0 1768 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask() 1769 { 1770 return _mm_set_epi32((int32_t)(1), (int32_t)(1), (int32_t)(0), (int32_t)(0)); 1771 } 1772 1773 //The total number of operations is 1 1774 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2) 1775 { 1776 return simd_xor(arg1, arg2); 1777 } 1778 1779 //The total number of operations is 10 1780 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2) 1781 { 1782 bitblock128_t tmp = simd_xor(arg1, arg2); 1783 return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(arg1, arg2))), tmp); 1784 } 1785 1786 //The total number of operations is 6 1787 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2) 1788 { 1789 return simd128<1>::ifh(simd128<(8)>::himask(), simd128<(8)>::add(arg1, simd_and(simd128<(8)>::himask(), arg2)), simd128<(8)>::add(arg1, arg2)); 1790 } 1791 1792 //The total number of operations is 1 1793 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2) 1794 { 1795 return _mm_add_epi8(arg1, arg2); 1796 } 1797 1798 //The total number of operations is 1 1799 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2) 1800 { 1801 return _mm_add_epi16(arg1, arg2); 1802 } 1803 1804 //The total number of operations is 1 1805 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2) 1806 { 1807 return _mm_add_epi32(arg1, arg2); 1808 } 1809 1810 //The total number of operations is 1 1811 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2) 1812 { 1813 return _mm_add_epi64(arg1, arg2); 1814 } 1815 1816 //The total number of operations is 11 1817 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2) 1818 { 1819 bitblock128_t partial = simd128<(64)>::add(arg1, arg2); 1820 bitblock128_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial)); 1821 bitblock128_t carry = simd128<128>::slli<(64)>(simd128<(64)>::srli<(63)>(carryMask)); 1822 return simd128<(64)>::add(partial, carry); 1823 } 1824 1582 1825 //The total number of operations is 1 1583 1826 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2) … … 1630 1873 bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(tmpAns, arg2)); 1631 1874 return simd128<1>::ifh(simd128<128>::himask(), tmpAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, tmpAns, arg1), arg2)); 1632 }1633 1634 //The total number of operations is 21635 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)1636 {1637 return simd_not(simd_xor(arg1, arg2));1638 }1639 1640 //The total number of operations is 81641 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)1642 {1643 bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);1644 bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));1645 bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);1646 return simd_or(loMask, hiMask);1647 }1648 1649 //The total number of operations is 91650 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2)1651 {1652 return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2))));1653 }1654 1655 //The total number of operations is 11656 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2)1657 {1658 return _mm_cmpeq_epi8(arg1, arg2);1659 }1660 1661 //The total number of operations is 11662 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2)1663 {1664 return _mm_cmpeq_epi16(arg1, arg2);1665 }1666 1667 //The total number of operations is 11668 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2)1669 {1670 return _mm_cmpeq_epi32(arg1, arg2);1671 }1672 1673 //The total number of operations is 11674 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2)1675 {1676 return _mm_cmpeq_epi64(arg1, arg2);1677 }1678 1679 //The total number of operations is 111680 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2)1681 {1682 bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2);1683 bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns));1684 bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask);1685 return simd_or(loMask, hiMask);1686 }1687 1688 //The total number of operations is 41689 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)1690 {1691 return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));1692 }1693 1694 //The total number of operations is 101695 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)1696 {1697 bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);1698 return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))1))>(), tmp)));1699 }1700 1701 //The total number of operations is 51702 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)1703 {1704 bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);1705 return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))1))>(), tmp)));1706 }1707 1708 //The total number of operations is 11709 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)1710 {1711 return _mm_srai_epi16(arg1, (int32_t)(sh));1712 }1713 1714 //The total number of operations is 11715 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)1716 {1717 return _mm_srai_epi32(arg1, (int32_t)(sh));1718 }1719 1720 //The total number of operations is 51721 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)1722 {1723 bitblock128_t tmp = simd128<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);1724 return simd_or(tmp, simd128<64>::sub(simd128<64>::constant<0>(), simd_and(simd128<64>::slli<((64((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))1)>(simd128<64>::constant<1>()), tmp)));1725 }1726 1727 //The total number of operations is 211728 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)1729 {1730 bitblock128_t tmp = simd128<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);1731 return simd_or(tmp, simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::slli<((128((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))1)>(simd128<128>::constant<1>()), tmp)));1732 }1733 1734 //The total number of operations is 01735 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask()1736 {1737 return simd128<2>::constant<(2)>();1738 }1739 1740 //The total number of operations is 01741 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask()1742 {1743 return simd128<4>::constant<(12)>();1744 }1745 1746 //The total number of operations is 01747 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::himask()1748 {1749 return simd128<8>::constant<(240)>();1750 }1751 1752 //The total number of operations is 01753 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::himask()1754 {1755 return simd128<16>::constant<(65280)>();1756 }1757 1758 //The total number of operations is 01759 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask()1760 {1761 return simd128<32>::constant<65536>();1762 }1763 1764 //The total number of operations is 01765 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask()1766 {1767 return _mm_set_epi32((int32_t)(1), (int32_t)(0), (int32_t)(1), (int32_t)(0));1768 }1769 1770 //The total number of operations is 01771 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask()1772 {1773 return _mm_set_epi32((int32_t)(1), (int32_t)(1), (int32_t)(0), (int32_t)(0));1774 }1775 1776 //The total number of operations is 11777 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2)1778 {1779 return simd_xor(arg1, arg2);1780 }1781 1782 //The total number of operations is 101783 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2)1784 {1785 bitblock128_t tmp = simd_xor(arg1, arg2);1786 return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(arg1, arg2))), tmp);1787 }1788 1789 //The total number of operations is 61790 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2)1791 {1792 return simd128<1>::ifh(simd128<(8)>::himask(), simd128<(8)>::add(arg1, simd_and(simd128<(8)>::himask(), arg2)), simd128<(8)>::add(arg1, arg2));1793 }1794 1795 //The total number of operations is 11796 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2)1797 {1798 return _mm_add_epi8(arg1, arg2);1799 }1800 1801 //The total number of operations is 11802 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2)1803 {1804 return _mm_add_epi16(arg1, arg2);1805 }1806 1807 //The total number of operations is 11808 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2)1809 {1810 return _mm_add_epi32(arg1, arg2);1811 }1812 1813 //The total number of operations is 11814 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2)1815 {1816 return _mm_add_epi64(arg1, arg2);1817 }1818 1819 //The total number of operations is 111820 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2)1821 {1822 bitblock128_t partial = simd128<(64)>::add(arg1, arg2);1823 bitblock128_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial));1824 bitblock128_t carry = simd128<128>::slli<(64)>(simd128<(64)>::srli<(63)>(carryMask));1825 return simd128<(64)>::add(partial, carry);1826 }1827 1828 //The total number of operations is 91829 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)1830 {1831 return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1);1832 }1833 1834 //The total number of operations is 191835 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1)1836 {1837 bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>());1838 return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1));1839 }1840 1841 //The total number of operations is 11842 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1)1843 {1844 return _mm_abs_epi8(arg1);1845 }1846 1847 //The total number of operations is 11848 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1)1849 {1850 return _mm_abs_epi16(arg1);1851 }1852 1853 //The total number of operations is 11854 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1)1855 {1856 return _mm_abs_epi32(arg1);1857 }1858 1859 //The total number of operations is 51860 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1)1861 {1862 bitblock128_t gtMask = simd128<64>::gt(arg1, simd128<64>::constant<0>());1863 return simd128<1>::ifh(gtMask, arg1, simd128<64>::sub(gtMask, arg1));1864 }1865 1866 //The total number of operations is 331867 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1)1868 {1869 bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1);1870 return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1));1871 1875 } 1872 1876 … … 2755 2759 } 2756 2760 2757 //The total number of operations is 42758 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1)2759 {2760 return simd128<128>::slli<(sh*2)>(arg1);2761 }2762 2763 //The total number of operations is 42764 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1)2765 {2766 return simd128<128>::slli<(sh*4)>(arg1);2767 }2768 2769 //The total number of operations is 42770 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1)2771 {2772 return simd128<128>::slli<(sh*8)>(arg1);2773 }2774 2775 //The total number of operations is 42776 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1)2777 {2778 return simd128<128>::slli<(sh*16)>(arg1);2779 }2780 2781 //The total number of operations is 42782 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1)2783 {2784 return simd128<128>::slli<(sh*32)>(arg1);2785 }2786 2787 //The total number of operations is 42788 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1)2789 {2790 return simd128<128>::slli<(sh*64)>(arg1);2791 }2792 2793 //The total number of operations is 42794 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1)2795 {2796 return simd128<128>::slli<(sh*128)>(arg1);2797 }2798 2799 2761 //The total number of operations is 5 2800 2762 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4) … … 2959 2921 } 2960 2922 2923 //The total number of operations is 4 2924 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1) 2925 { 2926 return simd128<128>::slli<(sh*2)>(arg1); 2927 } 2928 2929 //The total number of operations is 4 2930 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1) 2931 { 2932 return simd128<128>::slli<(sh*4)>(arg1); 2933 } 2934 2935 //The total number of operations is 4 2936 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1) 2937 { 2938 return simd128<128>::slli<(sh*8)>(arg1); 2939 } 2940 2941 //The total number of operations is 4 2942 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1) 2943 { 2944 return simd128<128>::slli<(sh*16)>(arg1); 2945 } 2946 2947 //The total number of operations is 4 2948 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1) 2949 { 2950 return simd128<128>::slli<(sh*32)>(arg1); 2951 } 2952 2953 //The total number of operations is 4 2954 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1) 2955 { 2956 return simd128<128>::slli<(sh*64)>(arg1); 2957 } 2958 2959 //The total number of operations is 4 2960 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1) 2961 { 2962 return simd128<128>::slli<(sh*128)>(arg1); 2963 } 2964 2961 2965 //The total number of operations is 13 2962 2966 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8) … … 2989 2993 } 2990 2994 2995 //The total number of operations is 11 2996 IDISA_ALWAYS_INLINE bitblock128_t bitblock128::sll(bitblock128_t arg1, bitblock128_t arg2) 2997 { 2998 return simd128<128>::sll(arg1, arg2); 2999 } 3000 2991 3001 //The total number of operations is 1 2992 3002 IDISA_ALWAYS_INLINE bitblock128_t bitblock128::load_unaligned(bitblock128_t* arg1) 2993 3003 { 2994 3004 return _mm_loadu_si128((bitblock128_t*)(arg1)); 3005 } 3006 3007 //The total number of operations is 4 3008 template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srli(bitblock128_t arg1) 3009 { 3010 return simd128<128>::srli<sh>(arg1); 2995 3011 } 2996 3012 … … 3013 3029 } 3014 3030 3031 //The total number of operations is 11 3032 IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srl(bitblock128_t arg1, bitblock128_t arg2) 3033 { 3034 return simd128<128>::srl(arg1, arg2); 3035 } 3036 3037 //The total number of operations is 4 3038 template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::slli(bitblock128_t arg1) 3039 { 3040 return simd128<128>::slli<sh>(arg1); 3041 } 3042 3015 3043 //The total number of operations is 2 3016 3044 IDISA_ALWAYS_INLINE bool bitblock128::any(bitblock128_t arg1)
Note: See TracChangeset
for help on using the changeset viewer.