Ignore:
Timestamp:
Oct 23, 2011, 9:43:33 AM (8 years ago)
Author:
cameron
Message:

bitblock::srl, sll, srli, slli implementations

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx.cpp

    r1573 r1580  
    2626        static IDISA_ALWAYS_INLINE bitblock256_t sub(bitblock256_t arg1, bitblock256_t arg2);
    2727        static IDISA_ALWAYS_INLINE bitblock256_t add_hl(bitblock256_t arg1);
     28        static IDISA_ALWAYS_INLINE bitblock256_t lomask();
    2829        static IDISA_ALWAYS_INLINE bitblock256_t umin(bitblock256_t arg1, bitblock256_t arg2);
    2930        template <uint64_t val> static IDISA_ALWAYS_INLINE bitblock256_t constant();
    3031        static IDISA_ALWAYS_INLINE bitblock256_t min(bitblock256_t arg1, bitblock256_t arg2);
    31         static IDISA_ALWAYS_INLINE bitblock256_t lomask();
    3232        static IDISA_ALWAYS_INLINE bitblock256_t umax(bitblock256_t arg1, bitblock256_t arg2);
    3333        static IDISA_ALWAYS_INLINE bitblock256_t abs(bitblock256_t arg1);
     
    8686public:
    8787        static IDISA_ALWAYS_INLINE bitblock256_t load_unaligned(bitblock256_t* arg1);
     88        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t srli(bitblock256_t arg1);
    8889        static IDISA_ALWAYS_INLINE void store_aligned(bitblock256_t* arg1, bitblock256_t arg2);
    8990        static IDISA_ALWAYS_INLINE bool all(bitblock256_t arg1);
    9091        static IDISA_ALWAYS_INLINE bool any(bitblock256_t arg1);
    9192        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock256_t arg1);
     93        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t slli(bitblock256_t arg1);
    9294        static IDISA_ALWAYS_INLINE bitblock256_t load_aligned(bitblock256_t* arg1);
    9395        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock256_t* arg1, bitblock256_t arg2);
     
    239241template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add_hl(bitblock256_t arg1);
    240242template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add_hl(bitblock256_t arg1);
    241 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask();
    242 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask();
    243 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask();
    244 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask();
    245 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask();
    246 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask();
    247 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask();
    248 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask();
    249243template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::constant();
    250244template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::constant();
     
    265259template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::min(bitblock256_t arg1, bitblock256_t arg2);
    266260template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::min(bitblock256_t arg1, bitblock256_t arg2);
     261template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask();
     262template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask();
     263template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask();
     264template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask();
     265template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask();
     266template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask();
     267template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask();
     268template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask();
    267269template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2);
    268270template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umin(bitblock256_t arg1, bitblock256_t arg2);
     
    274276template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umin(bitblock256_t arg1, bitblock256_t arg2);
    275277template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umin(bitblock256_t arg1, bitblock256_t arg2);
    276 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2);
    277 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2);
    278 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2);
    279 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2);
    280 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2);
    281 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2);
    282 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2);
    283 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2);
    284 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2);
     278template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1);
     279template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1);
     280template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1);
     281template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1);
     282template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1);
     283template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1);
     284template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1);
     285template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1);
    285286template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2);
    286287template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2);
     
    317318template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add(bitblock256_t arg1, bitblock256_t arg2);
    318319template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add(bitblock256_t arg1, bitblock256_t arg2);
    319 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1);
    320 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1);
    321 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1);
    322 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1);
    323 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1);
    324 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1);
    325 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1);
    326 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1);
     320template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2);
     321template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2);
     322template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2);
     323template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2);
     324template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2);
     325template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2);
     326template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2);
     327template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2);
     328template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2);
    327329template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
    328330template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
     
    470472template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    471473template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    472 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::slli(bitblock256_t arg1);
    473 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::slli(bitblock256_t arg1);
    474 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::slli(bitblock256_t arg1);
    475 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::slli(bitblock256_t arg1);
    476 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::slli(bitblock256_t arg1);
    477 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::slli(bitblock256_t arg1);
    478 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::slli(bitblock256_t arg1);
    479 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::slli(bitblock256_t arg1);
    480474template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    481475template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
     
    506500template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dslli(bitblock256_t arg1, bitblock256_t arg2);
    507501template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dslli(bitblock256_t arg1, bitblock256_t arg2);
     502template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::slli(bitblock256_t arg1);
     503template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::slli(bitblock256_t arg1);
     504template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::slli(bitblock256_t arg1);
     505template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::slli(bitblock256_t arg1);
     506template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::slli(bitblock256_t arg1);
     507template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::slli(bitblock256_t arg1);
     508template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::slli(bitblock256_t arg1);
     509template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::slli(bitblock256_t arg1);
    508510template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
    509511template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
     
    15471549
    15481550//The total number of operations is 0
    1549 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask()
    1550 {
    1551         return simd256<2>::constant<(1)>();
    1552 }
    1553 
    1554 //The total number of operations is 0
    1555 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask()
    1556 {
    1557         return simd256<4>::constant<(3)>();
    1558 }
    1559 
    1560 //The total number of operations is 0
    1561 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask()
    1562 {
    1563         return simd256<8>::constant<(15)>();
    1564 }
    1565 
    1566 //The total number of operations is 0
    1567 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask()
    1568 {
    1569         return simd256<16>::constant<(255)>();
    1570 }
    1571 
    1572 //The total number of operations is 0
    1573 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask()
    1574 {
    1575         return simd256<32>::constant<(65535)>();
    1576 }
    1577 
    1578 //The total number of operations is 0
    1579 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask()
    1580 {
    1581         return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1)));
    1582 }
    1583 
    1584 //The total number of operations is 0
    1585 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask()
    1586 {
    1587         return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1)));
    1588 }
    1589 
    1590 //The total number of operations is 0
    1591 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask()
    1592 {
    1593         return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1)));
    1594 }
    1595 
    1596 //The total number of operations is 0
    15971551template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::constant()
    15981552{
     
    17151669}
    17161670
     1671//The total number of operations is 0
     1672template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask()
     1673{
     1674        return simd256<2>::constant<(1)>();
     1675}
     1676
     1677//The total number of operations is 0
     1678template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask()
     1679{
     1680        return simd256<4>::constant<(3)>();
     1681}
     1682
     1683//The total number of operations is 0
     1684template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask()
     1685{
     1686        return simd256<8>::constant<(15)>();
     1687}
     1688
     1689//The total number of operations is 0
     1690template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask()
     1691{
     1692        return simd256<16>::constant<(255)>();
     1693}
     1694
     1695//The total number of operations is 0
     1696template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask()
     1697{
     1698        return simd256<32>::constant<(65535)>();
     1699}
     1700
     1701//The total number of operations is 0
     1702template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask()
     1703{
     1704        return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1)));
     1705}
     1706
     1707//The total number of operations is 0
     1708template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask()
     1709{
     1710        return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1)));
     1711}
     1712
     1713//The total number of operations is 0
     1714template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask()
     1715{
     1716        return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1)));
     1717}
     1718
    17171719//The total number of operations is 1
    17181720template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2)
     
    17791781}
    17801782
    1781 //The total number of operations is 1
    1782 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1783 {
    1784         return simd_or(arg1, arg2);
    1785 }
    1786 
    1787 //The total number of operations is 28
    1788 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1789 {
    1790         bitblock256_t tmpAns = simd256<(1)>::umax(arg1, arg2);
    1791         bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg1));
    1792         bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg2));
    1793         return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    1794 }
    1795 
    1796 //The total number of operations is 20
    1797 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1798 {
    1799         return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::umax(arg1, arg2)), simd256<(8)>::umax(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2)));
    1800 }
    1801 
    1802 //The total number of operations is 8
    1803 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1804 {
    1805         return avx_general_combine256(_mm_max_epu8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
    1806 }
    1807 
    1808 //The total number of operations is 8
    1809 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1810 {
    1811         return avx_general_combine256(_mm_max_epu16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
    1812 }
    1813 
    1814 //The total number of operations is 8
    1815 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1816 {
    1817         return avx_general_combine256(_mm_max_epu32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
    1818 }
    1819 
    1820 //The total number of operations is 14
    1821 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1822 {
    1823         bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808UL)>();
    1824         return simd_xor(simd256<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    1825 }
    1826 
    1827 //The total number of operations is 77
    1828 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1829 {
    1830         bitblock256_t tmpAns = simd256<(64)>::umax(arg1, arg2);
    1831         bitblock256_t eqMask1 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg1));
    1832         bitblock256_t eqMask2 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg2));
    1833         return simd256<1>::ifh(simd256<128>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    1834 }
    1835 
    1836 //The total number of operations is 264
    1837 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1838 {
    1839         bitblock256_t tmpAns = simd256<(128)>::umax(arg1, arg2);
    1840         bitblock256_t eqMask1 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg1));
    1841         bitblock256_t eqMask2 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg2));
    1842         return simd256<1>::ifh(simd256<256>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     1783//The total number of operations is 45
     1784template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1)
     1785{
     1786        return simd256<1>::ifh(simd256<2>::himask(), simd_and(arg1, simd256<256>::slli<1>(simd_not(arg1))), arg1);
     1787}
     1788
     1789//The total number of operations is 51
     1790template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1)
     1791{
     1792        bitblock256_t gtMask = simd256<4>::gt(arg1, simd256<4>::constant<0>());
     1793        return simd256<1>::ifh(gtMask, arg1, simd256<4>::sub(gtMask, arg1));
     1794}
     1795
     1796//The total number of operations is 6
     1797template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1)
     1798{
     1799        return avx_general_combine256(_mm_abs_epi8(avx_select_hi128(arg1)), _mm_abs_epi8(avx_select_lo128(arg1)));
     1800}
     1801
     1802//The total number of operations is 6
     1803template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1)
     1804{
     1805        return avx_general_combine256(_mm_abs_epi16(avx_select_hi128(arg1)), _mm_abs_epi16(avx_select_lo128(arg1)));
     1806}
     1807
     1808//The total number of operations is 6
     1809template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1)
     1810{
     1811        return avx_general_combine256(_mm_abs_epi32(avx_select_hi128(arg1)), _mm_abs_epi32(avx_select_lo128(arg1)));
     1812}
     1813
     1814//The total number of operations is 19
     1815template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1)
     1816{
     1817        bitblock256_t gtMask = simd256<64>::gt(arg1, simd256<64>::constant<0>());
     1818        return simd256<1>::ifh(gtMask, arg1, simd256<64>::sub(gtMask, arg1));
     1819}
     1820
     1821//The total number of operations is 117
     1822template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1)
     1823{
     1824        bitblock256_t eqMask = simd256<128>::eq(simd256<1>::ifh(simd256<128>::himask(), simd256<(64)>::abs(arg1), arg1), arg1);
     1825        return simd256<1>::ifh(eqMask, arg1, simd256<128>::sub(eqMask, arg1));
     1826}
     1827
     1828//The total number of operations is 391
     1829template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1)
     1830{
     1831        bitblock256_t eqMask = simd256<256>::eq(simd256<1>::ifh(simd256<256>::himask(), simd256<(128)>::abs(arg1), arg1), arg1);
     1832        return simd256<1>::ifh(eqMask, arg1, simd256<256>::sub(eqMask, arg1));
    18431833}
    18441834
     
    20732063}
    20742064
    2075 //The total number of operations is 45
    2076 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1)
    2077 {
    2078         return simd256<1>::ifh(simd256<2>::himask(), simd_and(arg1, simd256<256>::slli<1>(simd_not(arg1))), arg1);
    2079 }
    2080 
    2081 //The total number of operations is 51
    2082 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1)
    2083 {
    2084         bitblock256_t gtMask = simd256<4>::gt(arg1, simd256<4>::constant<0>());
    2085         return simd256<1>::ifh(gtMask, arg1, simd256<4>::sub(gtMask, arg1));
    2086 }
    2087 
    2088 //The total number of operations is 6
    2089 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1)
    2090 {
    2091         return avx_general_combine256(_mm_abs_epi8(avx_select_hi128(arg1)), _mm_abs_epi8(avx_select_lo128(arg1)));
    2092 }
    2093 
    2094 //The total number of operations is 6
    2095 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1)
    2096 {
    2097         return avx_general_combine256(_mm_abs_epi16(avx_select_hi128(arg1)), _mm_abs_epi16(avx_select_lo128(arg1)));
    2098 }
    2099 
    2100 //The total number of operations is 6
    2101 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1)
    2102 {
    2103         return avx_general_combine256(_mm_abs_epi32(avx_select_hi128(arg1)), _mm_abs_epi32(avx_select_lo128(arg1)));
    2104 }
    2105 
    2106 //The total number of operations is 19
    2107 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1)
    2108 {
    2109         bitblock256_t gtMask = simd256<64>::gt(arg1, simd256<64>::constant<0>());
    2110         return simd256<1>::ifh(gtMask, arg1, simd256<64>::sub(gtMask, arg1));
    2111 }
    2112 
    2113 //The total number of operations is 117
    2114 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1)
    2115 {
    2116         bitblock256_t eqMask = simd256<128>::eq(simd256<1>::ifh(simd256<128>::himask(), simd256<(64)>::abs(arg1), arg1), arg1);
    2117         return simd256<1>::ifh(eqMask, arg1, simd256<128>::sub(eqMask, arg1));
    2118 }
    2119 
    2120 //The total number of operations is 391
    2121 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1)
    2122 {
    2123         bitblock256_t eqMask = simd256<256>::eq(simd256<1>::ifh(simd256<256>::himask(), simd256<(128)>::abs(arg1), arg1), arg1);
    2124         return simd256<1>::ifh(eqMask, arg1, simd256<256>::sub(eqMask, arg1));
     2065//The total number of operations is 1
     2066template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2067{
     2068        return simd_or(arg1, arg2);
     2069}
     2070
     2071//The total number of operations is 28
     2072template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2073{
     2074        bitblock256_t tmpAns = simd256<(1)>::umax(arg1, arg2);
     2075        bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg1));
     2076        bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg2));
     2077        return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     2078}
     2079
     2080//The total number of operations is 20
     2081template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2082{
     2083        return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::umax(arg1, arg2)), simd256<(8)>::umax(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2)));
     2084}
     2085
     2086//The total number of operations is 8
     2087template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2088{
     2089        return avx_general_combine256(_mm_max_epu8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
     2090}
     2091
     2092//The total number of operations is 8
     2093template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2094{
     2095        return avx_general_combine256(_mm_max_epu16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
     2096}
     2097
     2098//The total number of operations is 8
     2099template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2100{
     2101        return avx_general_combine256(_mm_max_epu32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
     2102}
     2103
     2104//The total number of operations is 14
     2105template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2106{
     2107        bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808UL)>();
     2108        return simd_xor(simd256<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     2109}
     2110
     2111//The total number of operations is 77
     2112template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2113{
     2114        bitblock256_t tmpAns = simd256<(64)>::umax(arg1, arg2);
     2115        bitblock256_t eqMask1 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg1));
     2116        bitblock256_t eqMask2 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg2));
     2117        return simd256<1>::ifh(simd256<128>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     2118}
     2119
     2120//The total number of operations is 264
     2121template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2122{
     2123        bitblock256_t tmpAns = simd256<(128)>::umax(arg1, arg2);
     2124        bitblock256_t eqMask1 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg1));
     2125        bitblock256_t eqMask2 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg2));
     2126        return simd256<1>::ifh(simd256<256>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    21252127}
    21262128
     
    30503052}
    30513053
     3054//The total number of operations is 5
     3055template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     3056{
     3057        return simd256<1>::ifh(simd256<(4)>::himask(), mvmd256<1>::fill2(val1, val2), mvmd256<1>::fill2(val3, val4));
     3058}
     3059
     3060//The total number of operations is 5
     3061template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     3062{
     3063        return simd256<1>::ifh(simd256<(8)>::himask(), mvmd256<2>::fill2(val1, val2), mvmd256<2>::fill2(val3, val4));
     3064}
     3065
     3066//The total number of operations is 5
     3067template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     3068{
     3069        return simd256<1>::ifh(simd256<(16)>::himask(), mvmd256<4>::fill2(val1, val2), mvmd256<4>::fill2(val3, val4));
     3070}
     3071
     3072//The total number of operations is 5
     3073template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     3074{
     3075        return simd256<1>::ifh(simd256<(32)>::himask(), mvmd256<8>::fill2(val1, val2), mvmd256<8>::fill2(val3, val4));
     3076}
     3077
     3078//The total number of operations is 3
     3079template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     3080{
     3081        return simd_or(mvmd256<(32)>::fill4((val1<<16), (val3<<16), (val1<<16), (val3<<16)), mvmd256<(32)>::fill4((val2&(65535)), (val4&(65535)), (val2&(65535)), (val4&(65535))));
     3082}
     3083
     3084//The total number of operations is 1
     3085template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     3086{
     3087        return (bitblock256_t)_mm256_set_epi32((int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4), (int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4));
     3088}
     3089
     3090//The total number of operations is 41
     3091template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::srli(bitblock256_t arg1)
     3092{
     3093        return simd256<256>::srli<(sh*2)>(arg1);
     3094}
     3095
     3096//The total number of operations is 41
     3097template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::srli(bitblock256_t arg1)
     3098{
     3099        return simd256<256>::srli<(sh*4)>(arg1);
     3100}
     3101
     3102//The total number of operations is 41
     3103template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::srli(bitblock256_t arg1)
     3104{
     3105        return simd256<256>::srli<(sh*8)>(arg1);
     3106}
     3107
     3108//The total number of operations is 41
     3109template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::srli(bitblock256_t arg1)
     3110{
     3111        return simd256<256>::srli<(sh*16)>(arg1);
     3112}
     3113
     3114//The total number of operations is 41
     3115template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::srli(bitblock256_t arg1)
     3116{
     3117        return simd256<256>::srli<(sh*32)>(arg1);
     3118}
     3119
     3120//The total number of operations is 41
     3121template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::srli(bitblock256_t arg1)
     3122{
     3123        return simd256<256>::srli<(sh*64)>(arg1);
     3124}
     3125
     3126//The total number of operations is 41
     3127template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::srli(bitblock256_t arg1)
     3128{
     3129        return simd256<256>::srli<(sh*128)>(arg1);
     3130}
     3131
     3132//The total number of operations is 41
     3133template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::srli(bitblock256_t arg1)
     3134{
     3135        return simd256<256>::srli<(sh*256)>(arg1);
     3136}
     3137
     3138//The total number of operations is 1
     3139template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill2(uint64_t val1, uint64_t val2)
     3140{
     3141        return mvmd256<(2)>::fill(((val1<<1)|(val2&(1))));
     3142}
     3143
     3144//The total number of operations is 1
     3145template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill2(uint64_t val1, uint64_t val2)
     3146{
     3147        return mvmd256<(4)>::fill(((val1<<2)|(val2&(3))));
     3148}
     3149
     3150//The total number of operations is 1
     3151template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill2(uint64_t val1, uint64_t val2)
     3152{
     3153        return mvmd256<(8)>::fill(((val1<<4)|(val2&(15))));
     3154}
     3155
     3156//The total number of operations is 1
     3157template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill2(uint64_t val1, uint64_t val2)
     3158{
     3159        return mvmd256<(16)>::fill(((val1<<8)|(val2&(255))));
     3160}
     3161
     3162//The total number of operations is 1
     3163template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill2(uint64_t val1, uint64_t val2)
     3164{
     3165        return mvmd256<(32)>::fill(((val1<<16)|(val2&(65535))));
     3166}
     3167
     3168//The total number of operations is 5
     3169template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill2(uint64_t val1, uint64_t val2)
     3170{
     3171        return simd256<1>::ifh(simd256<(64)>::himask(), mvmd256<32>::fill(val1), mvmd256<32>::fill(val2));
     3172}
     3173
     3174//The total number of operations is 82
     3175template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dslli(bitblock256_t arg1, bitblock256_t arg2)
     3176{
     3177        return simd_or(mvmd256<2>::slli<sh>(arg1), mvmd256<2>::srli<((128)-sh)>(arg2));
     3178}
     3179
     3180//The total number of operations is 82
     3181template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::dslli(bitblock256_t arg1, bitblock256_t arg2)
     3182{
     3183        return simd_or(mvmd256<4>::slli<sh>(arg1), mvmd256<4>::srli<((64)-sh)>(arg2));
     3184}
     3185
     3186//The total number of operations is 82
     3187template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::dslli(bitblock256_t arg1, bitblock256_t arg2)
     3188{
     3189        return simd_or(mvmd256<8>::slli<sh>(arg1), mvmd256<8>::srli<((32)-sh)>(arg2));
     3190}
     3191
     3192//The total number of operations is 82
     3193template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::dslli(bitblock256_t arg1, bitblock256_t arg2)
     3194{
     3195        return simd_or(mvmd256<16>::slli<sh>(arg1), mvmd256<16>::srli<((16)-sh)>(arg2));
     3196}
     3197
     3198//The total number of operations is 82
     3199template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::dslli(bitblock256_t arg1, bitblock256_t arg2)
     3200{
     3201        return simd_or(mvmd256<32>::slli<sh>(arg1), mvmd256<32>::srli<((8)-sh)>(arg2));
     3202}
     3203
     3204//The total number of operations is 82
     3205template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::dslli(bitblock256_t arg1, bitblock256_t arg2)
     3206{
     3207        return simd_or(mvmd256<64>::slli<sh>(arg1), mvmd256<64>::srli<((4)-sh)>(arg2));
     3208}
     3209
     3210//The total number of operations is 82
     3211template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dslli(bitblock256_t arg1, bitblock256_t arg2)
     3212{
     3213        return simd_or(mvmd256<128>::slli<sh>(arg1), mvmd256<128>::srli<((2)-sh)>(arg2));
     3214}
     3215
     3216//The total number of operations is 82
     3217template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dslli(bitblock256_t arg1, bitblock256_t arg2)
     3218{
     3219        return simd_or(mvmd256<256>::slli<sh>(arg1), mvmd256<256>::srli<((1)-sh)>(arg2));
     3220}
     3221
    30523222//The total number of operations is 40
    30533223template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::slli(bitblock256_t arg1)
     
    30983268}
    30993269
    3100 //The total number of operations is 5
    3101 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
    3102 {
    3103         return simd256<1>::ifh(simd256<(4)>::himask(), mvmd256<1>::fill2(val1, val2), mvmd256<1>::fill2(val3, val4));
    3104 }
    3105 
    3106 //The total number of operations is 5
    3107 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
    3108 {
    3109         return simd256<1>::ifh(simd256<(8)>::himask(), mvmd256<2>::fill2(val1, val2), mvmd256<2>::fill2(val3, val4));
    3110 }
    3111 
    3112 //The total number of operations is 5
    3113 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
    3114 {
    3115         return simd256<1>::ifh(simd256<(16)>::himask(), mvmd256<4>::fill2(val1, val2), mvmd256<4>::fill2(val3, val4));
    3116 }
    3117 
    3118 //The total number of operations is 5
    3119 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
    3120 {
    3121         return simd256<1>::ifh(simd256<(32)>::himask(), mvmd256<8>::fill2(val1, val2), mvmd256<8>::fill2(val3, val4));
    3122 }
    3123 
    3124 //The total number of operations is 3
    3125 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
    3126 {
    3127         return simd_or(mvmd256<(32)>::fill4((val1<<16), (val3<<16), (val1<<16), (val3<<16)), mvmd256<(32)>::fill4((val2&(65535)), (val4&(65535)), (val2&(65535)), (val4&(65535))));
    3128 }
    3129 
    3130 //The total number of operations is 1
    3131 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
    3132 {
    3133         return (bitblock256_t)_mm256_set_epi32((int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4), (int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4));
    3134 }
    3135 
    3136 //The total number of operations is 41
    3137 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::srli(bitblock256_t arg1)
    3138 {
    3139         return simd256<256>::srli<(sh*2)>(arg1);
    3140 }
    3141 
    3142 //The total number of operations is 41
    3143 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::srli(bitblock256_t arg1)
    3144 {
    3145         return simd256<256>::srli<(sh*4)>(arg1);
    3146 }
    3147 
    3148 //The total number of operations is 41
    3149 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::srli(bitblock256_t arg1)
    3150 {
    3151         return simd256<256>::srli<(sh*8)>(arg1);
    3152 }
    3153 
    3154 //The total number of operations is 41
    3155 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::srli(bitblock256_t arg1)
    3156 {
    3157         return simd256<256>::srli<(sh*16)>(arg1);
    3158 }
    3159 
    3160 //The total number of operations is 41
    3161 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::srli(bitblock256_t arg1)
    3162 {
    3163         return simd256<256>::srli<(sh*32)>(arg1);
    3164 }
    3165 
    3166 //The total number of operations is 41
    3167 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::srli(bitblock256_t arg1)
    3168 {
    3169         return simd256<256>::srli<(sh*64)>(arg1);
    3170 }
    3171 
    3172 //The total number of operations is 41
    3173 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::srli(bitblock256_t arg1)
    3174 {
    3175         return simd256<256>::srli<(sh*128)>(arg1);
    3176 }
    3177 
    3178 //The total number of operations is 41
    3179 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::srli(bitblock256_t arg1)
    3180 {
    3181         return simd256<256>::srli<(sh*256)>(arg1);
    3182 }
    3183 
    3184 //The total number of operations is 1
    3185 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill2(uint64_t val1, uint64_t val2)
    3186 {
    3187         return mvmd256<(2)>::fill(((val1<<1)|(val2&(1))));
    3188 }
    3189 
    3190 //The total number of operations is 1
    3191 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill2(uint64_t val1, uint64_t val2)
    3192 {
    3193         return mvmd256<(4)>::fill(((val1<<2)|(val2&(3))));
    3194 }
    3195 
    3196 //The total number of operations is 1
    3197 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill2(uint64_t val1, uint64_t val2)
    3198 {
    3199         return mvmd256<(8)>::fill(((val1<<4)|(val2&(15))));
    3200 }
    3201 
    3202 //The total number of operations is 1
    3203 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill2(uint64_t val1, uint64_t val2)
    3204 {
    3205         return mvmd256<(16)>::fill(((val1<<8)|(val2&(255))));
    3206 }
    3207 
    3208 //The total number of operations is 1
    3209 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill2(uint64_t val1, uint64_t val2)
    3210 {
    3211         return mvmd256<(32)>::fill(((val1<<16)|(val2&(65535))));
    3212 }
    3213 
    3214 //The total number of operations is 5
    3215 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill2(uint64_t val1, uint64_t val2)
    3216 {
    3217         return simd256<1>::ifh(simd256<(64)>::himask(), mvmd256<32>::fill(val1), mvmd256<32>::fill(val2));
    3218 }
    3219 
    3220 //The total number of operations is 82
    3221 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dslli(bitblock256_t arg1, bitblock256_t arg2)
    3222 {
    3223         return simd_or(mvmd256<2>::slli<sh>(arg1), mvmd256<2>::srli<((128)-sh)>(arg2));
    3224 }
    3225 
    3226 //The total number of operations is 82
    3227 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::dslli(bitblock256_t arg1, bitblock256_t arg2)
    3228 {
    3229         return simd_or(mvmd256<4>::slli<sh>(arg1), mvmd256<4>::srli<((64)-sh)>(arg2));
    3230 }
    3231 
    3232 //The total number of operations is 82
    3233 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::dslli(bitblock256_t arg1, bitblock256_t arg2)
    3234 {
    3235         return simd_or(mvmd256<8>::slli<sh>(arg1), mvmd256<8>::srli<((32)-sh)>(arg2));
    3236 }
    3237 
    3238 //The total number of operations is 82
    3239 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::dslli(bitblock256_t arg1, bitblock256_t arg2)
    3240 {
    3241         return simd_or(mvmd256<16>::slli<sh>(arg1), mvmd256<16>::srli<((16)-sh)>(arg2));
    3242 }
    3243 
    3244 //The total number of operations is 82
    3245 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::dslli(bitblock256_t arg1, bitblock256_t arg2)
    3246 {
    3247         return simd_or(mvmd256<32>::slli<sh>(arg1), mvmd256<32>::srli<((8)-sh)>(arg2));
    3248 }
    3249 
    3250 //The total number of operations is 82
    3251 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::dslli(bitblock256_t arg1, bitblock256_t arg2)
    3252 {
    3253         return simd_or(mvmd256<64>::slli<sh>(arg1), mvmd256<64>::srli<((4)-sh)>(arg2));
    3254 }
    3255 
    3256 //The total number of operations is 82
    3257 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dslli(bitblock256_t arg1, bitblock256_t arg2)
    3258 {
    3259         return simd_or(mvmd256<128>::slli<sh>(arg1), mvmd256<128>::srli<((2)-sh)>(arg2));
    3260 }
    3261 
    3262 //The total number of operations is 82
    3263 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dslli(bitblock256_t arg1, bitblock256_t arg2)
    3264 {
    3265         return simd_or(mvmd256<256>::slli<sh>(arg1), mvmd256<256>::srli<((1)-sh)>(arg2));
    3266 }
    3267 
    32683270//The total number of operations is 13
    32693271template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
     
    33083310}
    33093311
     3312//The total number of operations is 41
     3313template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t bitblock256::srli(bitblock256_t arg1)
     3314{
     3315        return simd256<256>::srli<sh>(arg1);
     3316}
     3317
    33103318//The total number of operations is 1
    33113319IDISA_ALWAYS_INLINE void bitblock256::store_aligned(bitblock256_t* arg1, bitblock256_t arg2)
     
    33263334}
    33273335
     3336//The total number of operations is 40
     3337template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t bitblock256::slli(bitblock256_t arg1)
     3338{
     3339        return simd256<256>::slli<sh>(arg1);
     3340}
     3341
    33283342//The total number of operations is 1
    33293343IDISA_ALWAYS_INLINE bool bitblock256::any(bitblock256_t arg1)
Note: See TracChangeset for help on using the changeset viewer.