Ignore:
Timestamp:
Nov 29, 2013, 3:03:33 PM (5 years ago)
Author:
linmengl
Message:

regenerate libraries, with negative number constants eliminated.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx.cpp

    r3526 r3576  
    5555        static IDISA_ALWAYS_INLINE bitblock256_t add_hl(bitblock256_t arg1);
    5656        static IDISA_ALWAYS_INLINE bitblock256_t lomask();
     57        static IDISA_ALWAYS_INLINE bitblock256_t lt(bitblock256_t arg1, bitblock256_t arg2);
    5758        static IDISA_ALWAYS_INLINE bitblock256_t umin(bitblock256_t arg1, bitblock256_t arg2);
    5859        template <typename FieldType<fw>::T val> static IDISA_ALWAYS_INLINE bitblock256_t constant();
    5960        static IDISA_ALWAYS_INLINE bitblock256_t min(bitblock256_t arg1, bitblock256_t arg2);
    60         static IDISA_ALWAYS_INLINE bitblock256_t add(bitblock256_t arg1, bitblock256_t arg2);
    6161        static IDISA_ALWAYS_INLINE bitblock256_t umax(bitblock256_t arg1, bitblock256_t arg2);
    6262        static IDISA_ALWAYS_INLINE bitblock256_t abs(bitblock256_t arg1);
     
    6464        static IDISA_ALWAYS_INLINE bitblock256_t any(bitblock256_t arg1);
    6565        template <uint16_t sh> static IDISA_ALWAYS_INLINE bitblock256_t srai(bitblock256_t arg1);
    66         static IDISA_ALWAYS_INLINE bitblock256_t lt(bitblock256_t arg1, bitblock256_t arg2);
     66        static IDISA_ALWAYS_INLINE bitblock256_t add(bitblock256_t arg1, bitblock256_t arg2);
    6767        static IDISA_ALWAYS_INLINE bitblock256_t ugt(bitblock256_t arg1, bitblock256_t arg2);
    6868};
     
    128128IDISA_ALWAYS_INLINE bitblock256_t simd_nor(bitblock256_t arg1, bitblock256_t arg2);
    129129IDISA_ALWAYS_INLINE bitblock256_t simd_not(bitblock256_t arg1);
     130IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2);
    130131IDISA_ALWAYS_INLINE bitblock256_t simd_or(bitblock256_t arg1, bitblock256_t arg2);
    131 IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2);
    132132IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2);
    133133IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2);
     
    269269template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
    270270template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
    271 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1);
    272 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1);
    273 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1);
    274 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1);
    275 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1);
    276 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1);
    277 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1);
    278 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1);
    279271template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add_hl(bitblock256_t arg1);
    280272template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::add_hl(bitblock256_t arg1);
     
    285277template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add_hl(bitblock256_t arg1);
    286278template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add_hl(bitblock256_t arg1);
    287 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask();
    288 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask();
    289 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask();
    290 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask();
    291 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask();
    292 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask();
    293 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask();
    294 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask();
    295279template <> template <FieldType<1>::T val> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::constant();
    296280template <> template <FieldType<2>::T val> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::constant();
     
    311295template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::min(bitblock256_t arg1, bitblock256_t arg2);
    312296template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::min(bitblock256_t arg1, bitblock256_t arg2);
     297template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask();
     298template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask();
     299template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask();
     300template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask();
     301template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask();
     302template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask();
     303template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask();
     304template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask();
    313305template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2);
    314306template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umin(bitblock256_t arg1, bitblock256_t arg2);
     
    320312template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umin(bitblock256_t arg1, bitblock256_t arg2);
    321313template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umin(bitblock256_t arg1, bitblock256_t arg2);
    322 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2);
    323 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2);
    324 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2);
    325 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2);
    326 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2);
    327 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2);
    328 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2);
    329 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2);
    330 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2);
     314template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1);
     315template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1);
     316template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1);
     317template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1);
     318template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1);
     319template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1);
     320template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1);
     321template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1);
     322template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2);
     323template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2);
     324template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2);
     325template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2);
     326template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2);
     327template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2);
     328template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2);
     329template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2);
     330template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2);
     331template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1);
     332template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1);
     333template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1);
     334template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1);
     335template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1);
     336template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1);
     337template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1);
     338template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1);
    331339template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::lt(bitblock256_t arg1, bitblock256_t arg2);
    332340template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lt(bitblock256_t arg1, bitblock256_t arg2);
     
    338346template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lt(bitblock256_t arg1, bitblock256_t arg2);
    339347template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lt(bitblock256_t arg1, bitblock256_t arg2);
    340 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2);
    341 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2);
    342 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2);
    343 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2);
    344 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2);
    345 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2);
    346 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2);
    347 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2);
    348 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2);
    349348template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::himask();
    350349template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::himask();
     
    364363template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add(bitblock256_t arg1, bitblock256_t arg2);
    365364template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add(bitblock256_t arg1, bitblock256_t arg2);
    366 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1);
    367 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1);
    368 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1);
    369 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1);
    370 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1);
    371 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1);
    372 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1);
    373 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1);
     365template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2);
     366template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2);
     367template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2);
     368template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2);
     369template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2);
     370template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2);
     371template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2);
     372template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2);
     373template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2);
    374374template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
    375375template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
     
    597597IDISA_ALWAYS_INLINE bitblock256_t simd_not(bitblock256_t arg1)
    598598{
    599         return simd_xor(arg1, simd256<32>::constant<-1>());
     599        return simd_xor(arg1, simd256<32>::constant<4294967295ULL>());
     600}
     601
     602//The total number of operations is 1.0
     603IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2)
     604{
     605        return _mm256_andnot_ps(arg2, arg1);
    600606}
    601607
     
    604610{
    605611        return _mm256_or_ps(arg1, arg2);
    606 }
    607 
    608 //The total number of operations is 1.0
    609 IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2)
    610 {
    611         return _mm256_andnot_ps(arg2, arg1);
    612612}
    613613
     
    15851585}
    15861586
    1587 //The total number of operations is 7.0
    1588 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1)
    1589 {
    1590         return ((sh == 0) ? arg1 : simd_or(simd_and(simd256<2>::himask(), arg1), simd256<2>::srli<1>(arg1)));
    1591 }
    1592 
    1593 //The total number of operations is 17.5
    1594 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1)
    1595 {
    1596         return simd_or(simd_and(simd256<4>::himask(), simd256<(2)>::srai<((sh < (2)) ? sh : (2))>(arg1)), ((sh <= (2)) ? simd256<4>::srli<sh>(arg1) : simd256<(2)>::srai<(sh-(2))>(simd256<4>::srli<(2)>(arg1))));
    1597 }
    1598 
    1599 //The total number of operations is 12.0
    1600 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1)
    1601 {
    1602         bitblock256_t tmp = simd256<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
    1603         return simd_or(tmp, simd256<8>::sub(simd256<8>::constant<0>(), simd_and(simd256<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1604 }
    1605 
    1606 //The total number of operations is 4.0
    1607 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1)
    1608 {
    1609         return avx_general_combine256(_mm_srai_epi16(avx_select_hi128(arg1), (int32_t)(sh)), _mm_srai_epi16(avx_select_lo128(arg1), (int32_t)(sh)));
    1610 }
    1611 
    1612 //The total number of operations is 4.0
    1613 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1)
    1614 {
    1615         return avx_general_combine256(_mm_srai_epi32(avx_select_hi128(arg1), (int32_t)(sh)), _mm_srai_epi32(avx_select_lo128(arg1), (int32_t)(sh)));
    1616 }
    1617 
    1618 //The total number of operations is 12.0
    1619 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1)
    1620 {
    1621         return simd_or(simd_and(simd256<64>::himask(), simd256<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd256<64>::srli<sh>(arg1) : simd256<(32)>::srai<(sh-(32))>(simd256<64>::srli<(32)>(arg1))));
    1622 }
    1623 
    1624 //The total number of operations is 28.3333333333
    1625 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1)
    1626 {
    1627         return simd_or(simd_and(simd256<128>::himask(), simd256<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd256<128>::srli<sh>(arg1) : simd256<(64)>::srai<(sh-(64))>(simd256<128>::srli<(64)>(arg1))));
    1628 }
    1629 
    1630 //The total number of operations is 58.5
    1631 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1)
    1632 {
    1633         return simd_or(simd_and(simd256<256>::himask(), simd256<(128)>::srai<((sh < (128)) ? sh : (128))>(arg1)), ((sh <= (128)) ? simd256<256>::srli<sh>(arg1) : simd256<(128)>::srai<(sh-(128))>(simd256<256>::srli<(128)>(arg1))));
    1634 }
    1635 
    16361587//The total number of operations is 10.0
    16371588template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add_hl(bitblock256_t arg1)
     
    16831634
    16841635//The total number of operations is 0
    1685 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask()
    1686 {
    1687         return simd256<2>::constant<(1)>();
    1688 }
    1689 
    1690 //The total number of operations is 0
    1691 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask()
    1692 {
    1693         return simd256<4>::constant<(3)>();
    1694 }
    1695 
    1696 //The total number of operations is 0
    1697 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask()
    1698 {
    1699         return simd256<8>::constant<(15)>();
    1700 }
    1701 
    1702 //The total number of operations is 0
    1703 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask()
    1704 {
    1705         return simd256<16>::constant<(255)>();
    1706 }
    1707 
    1708 //The total number of operations is 0
    1709 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask()
    1710 {
    1711         return simd256<32>::constant<(65535)>();
    1712 }
    1713 
    1714 //The total number of operations is 0
    1715 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask()
    1716 {
    1717         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1))));
    1718 }
    1719 
    1720 //The total number of operations is 0
    1721 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask()
    1722 {
    1723         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1))));
    1724 }
    1725 
    1726 //The total number of operations is 0
    1727 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask()
    1728 {
    1729         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1))));
    1730 }
    1731 
    1732 //The total number of operations is 0
    17331636template <> template <FieldType<1>::T val> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::constant()
    17341637{
    1735         return simd256<32>::constant<(-1*val)>();
     1638        return simd256<2>::constant<((val+val)+val)>();
    17361639}
    17371640
     
    18511754}
    18521755
     1756//The total number of operations is 0
     1757template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask()
     1758{
     1759        return simd256<2>::constant<(1)>();
     1760}
     1761
     1762//The total number of operations is 0
     1763template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask()
     1764{
     1765        return simd256<4>::constant<(3)>();
     1766}
     1767
     1768//The total number of operations is 0
     1769template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask()
     1770{
     1771        return simd256<8>::constant<(15)>();
     1772}
     1773
     1774//The total number of operations is 0
     1775template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask()
     1776{
     1777        return simd256<16>::constant<(255)>();
     1778}
     1779
     1780//The total number of operations is 0
     1781template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask()
     1782{
     1783        return simd256<32>::constant<(65535)>();
     1784}
     1785
     1786//The total number of operations is 0
     1787template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask()
     1788{
     1789        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)))));
     1790}
     1791
     1792//The total number of operations is 0
     1793template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask()
     1794{
     1795        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)))));
     1796}
     1797
     1798//The total number of operations is 0
     1799template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask()
     1800{
     1801        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)))));
     1802}
     1803
    18531804//The total number of operations is 1.0
    18541805template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2)
     
    19151866}
    19161867
    1917 //The total number of operations is 1.0
    1918 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1919 {
    1920         return simd_or(arg1, arg2);
    1921 }
    1922 
    1923 //The total number of operations is 24.0
    1924 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1925 {
    1926         bitblock256_t tmpAns = simd256<(1)>::umax(arg1, arg2);
    1927         bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg1));
    1928         bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg2));
    1929         return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     1868//The total number of operations is 19.0
     1869template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1)
     1870{
     1871        return simd256<1>::ifh(simd256<2>::himask(), simd_and(arg1, simd256<256>::slli<1>(simd_not(arg1))), arg1);
     1872}
     1873
     1874//The total number of operations is 39.0
     1875template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1)
     1876{
     1877        bitblock256_t gtMask = simd256<4>::gt(arg1, simd256<4>::constant<0>());
     1878        return simd256<1>::ifh(gtMask, arg1, simd256<4>::sub(gtMask, arg1));
     1879}
     1880
     1881//The total number of operations is 4.0
     1882template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1)
     1883{
     1884        return avx_general_combine256(_mm_abs_epi8(avx_select_hi128(arg1)), _mm_abs_epi8(avx_select_lo128(arg1)));
     1885}
     1886
     1887//The total number of operations is 4.0
     1888template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1)
     1889{
     1890        return avx_general_combine256(_mm_abs_epi16(avx_select_hi128(arg1)), _mm_abs_epi16(avx_select_lo128(arg1)));
     1891}
     1892
     1893//The total number of operations is 4.0
     1894template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1)
     1895{
     1896        return avx_general_combine256(_mm_abs_epi32(avx_select_hi128(arg1)), _mm_abs_epi32(avx_select_lo128(arg1)));
     1897}
     1898
     1899//The total number of operations is 13.0
     1900template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1)
     1901{
     1902        bitblock256_t gtMask = simd256<64>::gt(arg1, simd256<64>::constant<0>());
     1903        return simd256<1>::ifh(gtMask, arg1, simd256<64>::sub(gtMask, arg1));
     1904}
     1905
     1906//The total number of operations is 69.0
     1907template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1)
     1908{
     1909        bitblock256_t eqMask = simd256<128>::eq(simd256<1>::ifh(simd256<128>::himask(), simd256<(64)>::abs(arg1), arg1), arg1);
     1910        return simd256<1>::ifh(eqMask, arg1, simd256<128>::sub(eqMask, arg1));
     1911}
     1912
     1913//The total number of operations is 204.333333333
     1914template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1)
     1915{
     1916        bitblock256_t eqMask = simd256<256>::eq(simd256<1>::ifh(simd256<256>::himask(), simd256<(128)>::abs(arg1), arg1), arg1);
     1917        return simd256<1>::ifh(eqMask, arg1, simd256<256>::sub(eqMask, arg1));
     1918}
     1919
     1920//The total number of operations is 2.0
     1921template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2)
     1922{
     1923        return simd_not(simd_xor(arg1, arg2));
    19301924}
    19311925
    19321926//The total number of operations is 14.0
    1933 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1934 {
    1935         return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::umax(arg1, arg2)), simd256<(8)>::umax(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2)));
    1936 }
    1937 
    1938 //The total number of operations is 5.0
    1939 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1940 {
    1941         return avx_general_combine256(_mm_max_epu8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
    1942 }
    1943 
    1944 //The total number of operations is 5.0
    1945 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1946 {
    1947         return avx_general_combine256(_mm_max_epu16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
    1948 }
    1949 
    1950 //The total number of operations is 5.0
    1951 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1952 {
    1953         return avx_general_combine256(_mm_max_epu32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
    1954 }
    1955 
    1956 //The total number of operations is 11.0
    1957 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1958 {
    1959         bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808ULL)>();
    1960         return simd_xor(simd256<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    1961 }
    1962 
    1963 //The total number of operations is 46.6666666667
    1964 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1965 {
    1966         bitblock256_t tmpAns = simd256<(64)>::umax(arg1, arg2);
    1967         bitblock256_t eqMask1 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg1));
    1968         bitblock256_t eqMask2 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg2));
    1969         return simd256<1>::ifh(simd256<128>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    1970 }
    1971 
    1972 //The total number of operations is 131.0
    1973 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1974 {
    1975         bitblock256_t tmpAns = simd256<(128)>::umax(arg1, arg2);
    1976         bitblock256_t eqMask1 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg1));
    1977         bitblock256_t eqMask2 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg2));
    1978         return simd256<1>::ifh(simd256<256>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     1927template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2)
     1928{
     1929        bitblock256_t tmpAns = simd256<(1)>::eq(arg1, arg2);
     1930        bitblock256_t loMask = simd_and(tmpAns, simd256<2>::srli<(1)>(tmpAns));
     1931        bitblock256_t hiMask = simd256<2>::slli<(1)>(loMask);
     1932        return simd_or(loMask, hiMask);
     1933}
     1934
     1935//The total number of operations is 17.0
     1936template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2)
     1937{
     1938        return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::eq(simd_and(simd256<(8)>::himask(), arg1), simd_and(simd256<(8)>::himask(), arg2))), simd_and(simd256<(8)>::lomask(), simd256<(8)>::eq(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2))));
     1939}
     1940
     1941//The total number of operations is 5.0
     1942template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2)
     1943{
     1944        return avx_general_combine256(_mm_cmpeq_epi8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
     1945}
     1946
     1947//The total number of operations is 5.0
     1948template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2)
     1949{
     1950        return avx_general_combine256(_mm_cmpeq_epi16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
     1951}
     1952
     1953//The total number of operations is 5.0
     1954template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2)
     1955{
     1956        return avx_general_combine256(_mm_cmpeq_epi32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
     1957}
     1958
     1959//The total number of operations is 5.0
     1960template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2)
     1961{
     1962        return avx_general_combine256(_mm_cmpeq_epi64(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi64(avx_select_lo128(arg1), avx_select_lo128(arg2)));
     1963}
     1964
     1965//The total number of operations is 23.6666666667
     1966template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2)
     1967{
     1968        bitblock256_t tmpAns = simd256<(64)>::eq(arg1, arg2);
     1969        bitblock256_t loMask = simd_and(tmpAns, simd256<128>::srli<(64)>(tmpAns));
     1970        bitblock256_t hiMask = simd256<128>::slli<(64)>(loMask);
     1971        return simd_or(loMask, hiMask);
     1972}
     1973
     1974//The total number of operations is 53.6666666667
     1975template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2)
     1976{
     1977        bitblock256_t tmpAns = simd256<(128)>::eq(arg1, arg2);
     1978        bitblock256_t loMask = simd_and(tmpAns, simd256<256>::srli<(128)>(tmpAns));
     1979        bitblock256_t hiMask = simd256<256>::slli<(128)>(loMask);
     1980        return simd_or(loMask, hiMask);
     1981}
     1982
     1983//The total number of operations is 7.0
     1984template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1)
     1985{
     1986        return ((sh == 0) ? arg1 : simd_or(simd_and(simd256<2>::himask(), arg1), simd256<2>::srli<1>(arg1)));
     1987}
     1988
     1989//The total number of operations is 17.5
     1990template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1)
     1991{
     1992        return simd_or(simd_and(simd256<4>::himask(), simd256<(2)>::srai<((sh < (2)) ? sh : (2))>(arg1)), ((sh <= (2)) ? simd256<4>::srli<sh>(arg1) : simd256<(2)>::srai<(sh-(2))>(simd256<4>::srli<(2)>(arg1))));
     1993}
     1994
     1995//The total number of operations is 12.0
     1996template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1)
     1997{
     1998        bitblock256_t tmp = simd256<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
     1999        return simd_or(tmp, simd256<8>::sub(simd256<8>::constant<0>(), simd_and(simd256<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     2000}
     2001
     2002//The total number of operations is 4.0
     2003template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1)
     2004{
     2005        return avx_general_combine256(_mm_srai_epi16(avx_select_hi128(arg1), (int32_t)(sh)), _mm_srai_epi16(avx_select_lo128(arg1), (int32_t)(sh)));
     2006}
     2007
     2008//The total number of operations is 4.0
     2009template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1)
     2010{
     2011        return avx_general_combine256(_mm_srai_epi32(avx_select_hi128(arg1), (int32_t)(sh)), _mm_srai_epi32(avx_select_lo128(arg1), (int32_t)(sh)));
     2012}
     2013
     2014//The total number of operations is 12.0
     2015template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1)
     2016{
     2017        return simd_or(simd_and(simd256<64>::himask(), simd256<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd256<64>::srli<sh>(arg1) : simd256<(32)>::srai<(sh-(32))>(simd256<64>::srli<(32)>(arg1))));
     2018}
     2019
     2020//The total number of operations is 28.3333333333
     2021template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1)
     2022{
     2023        return simd_or(simd_and(simd256<128>::himask(), simd256<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd256<128>::srli<sh>(arg1) : simd256<(64)>::srai<(sh-(64))>(simd256<128>::srli<(64)>(arg1))));
     2024}
     2025
     2026//The total number of operations is 58.5
     2027template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1)
     2028{
     2029        return simd_or(simd_and(simd256<256>::himask(), simd256<(128)>::srai<((sh < (128)) ? sh : (128))>(arg1)), ((sh <= (128)) ? simd256<256>::srli<sh>(arg1) : simd256<(128)>::srai<(sh-(128))>(simd256<256>::srli<(128)>(arg1))));
    19792030}
    19802031
     
    20462097}
    20472098
    2048 //The total number of operations is 2.0
    2049 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2050 {
    2051         return simd_not(simd_xor(arg1, arg2));
    2052 }
    2053 
    2054 //The total number of operations is 14.0
    2055 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2056 {
    2057         bitblock256_t tmpAns = simd256<(1)>::eq(arg1, arg2);
    2058         bitblock256_t loMask = simd_and(tmpAns, simd256<2>::srli<(1)>(tmpAns));
    2059         bitblock256_t hiMask = simd256<2>::slli<(1)>(loMask);
    2060         return simd_or(loMask, hiMask);
    2061 }
    2062 
    2063 //The total number of operations is 17.0
    2064 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2065 {
    2066         return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::eq(simd_and(simd256<(8)>::himask(), arg1), simd_and(simd256<(8)>::himask(), arg2))), simd_and(simd256<(8)>::lomask(), simd256<(8)>::eq(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2))));
    2067 }
    2068 
    2069 //The total number of operations is 5.0
    2070 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2071 {
    2072         return avx_general_combine256(_mm_cmpeq_epi8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
    2073 }
    2074 
    2075 //The total number of operations is 5.0
    2076 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2077 {
    2078         return avx_general_combine256(_mm_cmpeq_epi16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
    2079 }
    2080 
    2081 //The total number of operations is 5.0
    2082 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2083 {
    2084         return avx_general_combine256(_mm_cmpeq_epi32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
    2085 }
    2086 
    2087 //The total number of operations is 5.0
    2088 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2089 {
    2090         return avx_general_combine256(_mm_cmpeq_epi64(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi64(avx_select_lo128(arg1), avx_select_lo128(arg2)));
    2091 }
    2092 
    2093 //The total number of operations is 23.6666666667
    2094 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2095 {
    2096         bitblock256_t tmpAns = simd256<(64)>::eq(arg1, arg2);
    2097         bitblock256_t loMask = simd_and(tmpAns, simd256<128>::srli<(64)>(tmpAns));
    2098         bitblock256_t hiMask = simd256<128>::slli<(64)>(loMask);
    2099         return simd_or(loMask, hiMask);
    2100 }
    2101 
    2102 //The total number of operations is 53.6666666667
    2103 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2104 {
    2105         bitblock256_t tmpAns = simd256<(128)>::eq(arg1, arg2);
    2106         bitblock256_t loMask = simd_and(tmpAns, simd256<256>::srli<(128)>(tmpAns));
    2107         bitblock256_t hiMask = simd256<256>::slli<(128)>(loMask);
    2108         return simd_or(loMask, hiMask);
    2109 }
    2110 
    21112099//The total number of operations is 0
    21122100template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::himask()
     
    21362124template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::himask()
    21372125{
    2138         return simd256<32>::constant<-65536>();
     2126        return simd256<32>::constant<4294901760ULL>();
    21392127}
    21402128
     
    21422130template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::himask()
    21432131{
    2144         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0))));
     2132        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0))));
    21452133}
    21462134
     
    21482136template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::himask()
    21492137{
    2150         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0))));
     2138        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(0))));
    21512139}
    21522140
     
    21542142template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::himask()
    21552143{
    2156         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0))));
     2144        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0))));
    21572145}
    21582146
     
    22222210}
    22232211
    2224 //The total number of operations is 19.0
    2225 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1)
    2226 {
    2227         return simd256<1>::ifh(simd256<2>::himask(), simd_and(arg1, simd256<256>::slli<1>(simd_not(arg1))), arg1);
    2228 }
    2229 
    2230 //The total number of operations is 39.0
    2231 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1)
    2232 {
    2233         bitblock256_t gtMask = simd256<4>::gt(arg1, simd256<4>::constant<0>());
    2234         return simd256<1>::ifh(gtMask, arg1, simd256<4>::sub(gtMask, arg1));
    2235 }
    2236 
    2237 //The total number of operations is 4.0
    2238 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1)
    2239 {
    2240         return avx_general_combine256(_mm_abs_epi8(avx_select_hi128(arg1)), _mm_abs_epi8(avx_select_lo128(arg1)));
    2241 }
    2242 
    2243 //The total number of operations is 4.0
    2244 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1)
    2245 {
    2246         return avx_general_combine256(_mm_abs_epi16(avx_select_hi128(arg1)), _mm_abs_epi16(avx_select_lo128(arg1)));
    2247 }
    2248 
    2249 //The total number of operations is 4.0
    2250 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1)
    2251 {
    2252         return avx_general_combine256(_mm_abs_epi32(avx_select_hi128(arg1)), _mm_abs_epi32(avx_select_lo128(arg1)));
    2253 }
    2254 
    2255 //The total number of operations is 13.0
    2256 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1)
    2257 {
    2258         bitblock256_t gtMask = simd256<64>::gt(arg1, simd256<64>::constant<0>());
    2259         return simd256<1>::ifh(gtMask, arg1, simd256<64>::sub(gtMask, arg1));
    2260 }
    2261 
    2262 //The total number of operations is 69.0
    2263 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1)
    2264 {
    2265         bitblock256_t eqMask = simd256<128>::eq(simd256<1>::ifh(simd256<128>::himask(), simd256<(64)>::abs(arg1), arg1), arg1);
    2266         return simd256<1>::ifh(eqMask, arg1, simd256<128>::sub(eqMask, arg1));
    2267 }
    2268 
    2269 //The total number of operations is 204.333333333
    2270 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1)
    2271 {
    2272         bitblock256_t eqMask = simd256<256>::eq(simd256<1>::ifh(simd256<256>::himask(), simd256<(128)>::abs(arg1), arg1), arg1);
    2273         return simd256<1>::ifh(eqMask, arg1, simd256<256>::sub(eqMask, arg1));
     2212//The total number of operations is 1.0
     2213template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2214{
     2215        return simd_or(arg1, arg2);
     2216}
     2217
     2218//The total number of operations is 24.0
     2219template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2220{
     2221        bitblock256_t tmpAns = simd256<(1)>::umax(arg1, arg2);
     2222        bitblock256_t eqMask1 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg1));
     2223        bitblock256_t eqMask2 = simd256<2>::srli<(1)>(simd256<(1)>::eq(tmpAns, arg2));
     2224        return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     2225}
     2226
     2227//The total number of operations is 14.0
     2228template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2229{
     2230        return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::umax(arg1, arg2)), simd256<(8)>::umax(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2)));
     2231}
     2232
     2233//The total number of operations is 5.0
     2234template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2235{
     2236        return avx_general_combine256(_mm_max_epu8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
     2237}
     2238
     2239//The total number of operations is 5.0
     2240template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2241{
     2242        return avx_general_combine256(_mm_max_epu16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
     2243}
     2244
     2245//The total number of operations is 5.0
     2246template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2247{
     2248        return avx_general_combine256(_mm_max_epu32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
     2249}
     2250
     2251//The total number of operations is 11.0
     2252template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2253{
     2254        bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808ULL)>();
     2255        return simd_xor(simd256<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     2256}
     2257
     2258//The total number of operations is 46.6666666667
     2259template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2260{
     2261        bitblock256_t tmpAns = simd256<(64)>::umax(arg1, arg2);
     2262        bitblock256_t eqMask1 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg1));
     2263        bitblock256_t eqMask2 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg2));
     2264        return simd256<1>::ifh(simd256<128>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     2265}
     2266
     2267//The total number of operations is 131.0
     2268template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2269{
     2270        bitblock256_t tmpAns = simd256<(128)>::umax(arg1, arg2);
     2271        bitblock256_t eqMask1 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg1));
     2272        bitblock256_t eqMask2 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg2));
     2273        return simd256<1>::ifh(simd256<256>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    22742274}
    22752275
     
    35023502IDISA_ALWAYS_INLINE bool bitblock256::all(bitblock256_t arg1)
    35033503{
    3504         return _mm256_testz_si256(((__m256i)(simd_not(arg1))), ((__m256i)(simd256<8>::constant<-1>()))) == 1;
    3505 }
    3506 
    3507 //The total number of operations is 1.0
    3508 IDISA_ALWAYS_INLINE bool bitblock256::any(bitblock256_t arg1)
    3509 {
    3510         return _mm256_testz_si256(((__m256i)(arg1)), ((__m256i)(arg1))) == 0;
    3511 }
    3512 
    3513 //The total number of operations is 1.0
    3514 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_aligned(const bitblock256_t* arg1)
    3515 {
    3516         return _mm256_load_ps((float*)(arg1));
    3517 }
    3518 
    3519 //The total number of operations is 1.0
    3520 IDISA_ALWAYS_INLINE void bitblock256::store_unaligned(bitblock256_t arg1, bitblock256_t* arg2)
    3521 {
    3522         _mm256_storeu_ps((float*)(arg2), arg1);
     3504        return _mm256_testz_si256(((__m256i)(simd_not(arg1))), ((__m256i)(simd256<8>::constant<255>()))) == 1;
    35233505}
    35243506
     
    35303512
    35313513//The total number of operations is 1.0
     3514IDISA_ALWAYS_INLINE bool bitblock256::any(bitblock256_t arg1)
     3515{
     3516        return _mm256_testz_si256(((__m256i)(arg1)), ((__m256i)(arg1))) == 0;
     3517}
     3518
     3519//The total number of operations is 1.0
     3520IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_aligned(const bitblock256_t* arg1)
     3521{
     3522        return _mm256_load_ps((float*)(arg1));
     3523}
     3524
     3525//The total number of operations is 1.0
    35323526IDISA_ALWAYS_INLINE void bitblock256::store_aligned(bitblock256_t arg1, bitblock256_t* arg2)
    35333527{
     
    35353529}
    35363530
     3531//The total number of operations is 1.0
     3532IDISA_ALWAYS_INLINE void bitblock256::store_unaligned(bitblock256_t arg1, bitblock256_t* arg2)
     3533{
     3534        _mm256_storeu_ps((float*)(arg2), arg1);
     3535}
     3536
    35373537#endif
Note: See TracChangeset for help on using the changeset viewer.