Ignore:
Timestamp:
Nov 29, 2013, 3:03:33 PM (6 years ago)
Author:
linmengl
Message:

regenerate libraries, with negative number constants eliminated.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_avx2.cpp

    r3526 r3576  
    5858        static IDISA_ALWAYS_INLINE bitblock256_t srl(bitblock256_t arg1, bitblock256_t shift_mask);
    5959        static IDISA_ALWAYS_INLINE bitblock256_t lomask();
     60        static IDISA_ALWAYS_INLINE bitblock256_t lt(bitblock256_t arg1, bitblock256_t arg2);
    6061        static IDISA_ALWAYS_INLINE bitblock256_t umin(bitblock256_t arg1, bitblock256_t arg2);
    6162        template <typename FieldType<fw>::T val> static IDISA_ALWAYS_INLINE bitblock256_t constant();
    6263        static IDISA_ALWAYS_INLINE bitblock256_t min(bitblock256_t arg1, bitblock256_t arg2);
    63         static IDISA_ALWAYS_INLINE bitblock256_t add(bitblock256_t arg1, bitblock256_t arg2);
    6464        static IDISA_ALWAYS_INLINE bitblock256_t umax(bitblock256_t arg1, bitblock256_t arg2);
    6565        static IDISA_ALWAYS_INLINE bitblock256_t abs(bitblock256_t arg1);
     
    6767        static IDISA_ALWAYS_INLINE bitblock256_t any(bitblock256_t arg1);
    6868        template <uint16_t sh> static IDISA_ALWAYS_INLINE bitblock256_t srai(bitblock256_t arg1);
    69         static IDISA_ALWAYS_INLINE bitblock256_t lt(bitblock256_t arg1, bitblock256_t arg2);
     69        static IDISA_ALWAYS_INLINE bitblock256_t add(bitblock256_t arg1, bitblock256_t arg2);
    7070        static IDISA_ALWAYS_INLINE bitblock256_t ugt(bitblock256_t arg1, bitblock256_t arg2);
    7171};
     
    134134IDISA_ALWAYS_INLINE bitblock256_t simd_nor(bitblock256_t arg1, bitblock256_t arg2);
    135135IDISA_ALWAYS_INLINE bitblock256_t simd_not(bitblock256_t arg1);
     136IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2);
    136137IDISA_ALWAYS_INLINE bitblock256_t simd_or(bitblock256_t arg1, bitblock256_t arg2);
    137 IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2);
    138138IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2);
    139139IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2);
     
    281281template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
    282282template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
    283 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1);
    284 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1);
    285 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1);
    286 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1);
    287 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1);
    288 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1);
    289 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1);
    290 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1);
    291283template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add_hl(bitblock256_t arg1);
    292284template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::add_hl(bitblock256_t arg1);
     
    301293template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srl(bitblock256_t arg1, bitblock256_t shift_mask);
    302294template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srl(bitblock256_t arg1, bitblock256_t shift_mask);
    303 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask();
    304 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask();
    305 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask();
    306 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask();
    307 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask();
    308 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask();
    309 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask();
    310 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask();
    311295template <> template <FieldType<1>::T val> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::constant();
    312296template <> template <FieldType<2>::T val> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::constant();
     
    327311template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::min(bitblock256_t arg1, bitblock256_t arg2);
    328312template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::min(bitblock256_t arg1, bitblock256_t arg2);
     313template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask();
     314template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask();
     315template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask();
     316template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask();
     317template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask();
     318template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask();
     319template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask();
     320template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask();
    329321template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2);
    330322template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umin(bitblock256_t arg1, bitblock256_t arg2);
     
    336328template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umin(bitblock256_t arg1, bitblock256_t arg2);
    337329template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umin(bitblock256_t arg1, bitblock256_t arg2);
    338 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2);
    339 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2);
    340 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2);
    341 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2);
    342 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2);
    343 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2);
    344 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2);
    345 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2);
    346 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2);
     330template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1);
     331template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1);
     332template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1);
     333template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1);
     334template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1);
     335template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1);
     336template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1);
     337template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1);
     338template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2);
     339template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2);
     340template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2);
     341template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2);
     342template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2);
     343template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2);
     344template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2);
     345template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2);
     346template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2);
     347template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1);
     348template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1);
     349template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1);
     350template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1);
     351template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1);
     352template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1);
     353template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1);
     354template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1);
    347355template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::lt(bitblock256_t arg1, bitblock256_t arg2);
    348356template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lt(bitblock256_t arg1, bitblock256_t arg2);
     
    354362template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lt(bitblock256_t arg1, bitblock256_t arg2);
    355363template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lt(bitblock256_t arg1, bitblock256_t arg2);
    356 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2);
    357 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2);
    358 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2);
    359 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2);
    360 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2);
    361 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2);
    362 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2);
    363 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2);
    364 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2);
    365364template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::himask();
    366365template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::himask();
     
    380379template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add(bitblock256_t arg1, bitblock256_t arg2);
    381380template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add(bitblock256_t arg1, bitblock256_t arg2);
    382 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1);
    383 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1);
    384 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1);
    385 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1);
    386 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1);
    387 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1);
    388 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1);
    389 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1);
     381template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2);
     382template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2);
     383template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2);
     384template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2);
     385template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2);
     386template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2);
     387template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2);
     388template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2);
     389template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2);
    390390template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
    391391template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
     
    619619IDISA_ALWAYS_INLINE bitblock256_t simd_not(bitblock256_t arg1)
    620620{
    621         return simd_xor(arg1, simd256<32>::constant<-1>());
     621        return simd_xor(arg1, simd256<32>::constant<4294967295ULL>());
     622}
     623
     624//The total number of operations is 1.0
     625IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2)
     626{
     627        return _mm256_andnot_si256(arg2, arg1);
    622628}
    623629
     
    626632{
    627633        return _mm256_or_si256(arg1, arg2);
    628 }
    629 
    630 //The total number of operations is 1.0
    631 IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2)
    632 {
    633         return _mm256_andnot_si256(arg2, arg1);
    634634}
    635635
     
    16401640}
    16411641
    1642 //The total number of operations is 4.0
    1643 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1)
    1644 {
    1645         return ((sh == 0) ? arg1 : simd_or(simd_and(simd256<2>::himask(), arg1), simd256<2>::srli<1>(arg1)));
    1646 }
    1647 
    1648 //The total number of operations is 10.0
    1649 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1)
    1650 {
    1651         bitblock256_t tmp = simd256<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
    1652         return simd_or(tmp, simd256<4>::sub(simd256<4>::constant<0>(), simd_and(simd256<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1653 }
    1654 
    1655 //The total number of operations is 5.0
    1656 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1)
    1657 {
    1658         bitblock256_t tmp = simd256<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
    1659         return simd_or(tmp, simd256<8>::sub(simd256<8>::constant<0>(), simd_and(simd256<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1660 }
    1661 
    1662 //The total number of operations is 1.0
    1663 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1)
    1664 {
    1665         return _mm256_srai_epi16(arg1, (int32_t)(sh));
    1666 }
    1667 
    1668 //The total number of operations is 1.0
    1669 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1)
    1670 {
    1671         return _mm256_srai_epi32(arg1, (int32_t)(sh));
    1672 }
    1673 
    1674 //The total number of operations is 4.5
    1675 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1)
    1676 {
    1677         return simd_or(simd_and(simd256<64>::himask(), simd256<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd256<64>::srli<sh>(arg1) : simd256<(32)>::srai<(sh-(32))>(simd256<64>::srli<(32)>(arg1))));
    1678 }
    1679 
    1680 //The total number of operations is 14.0833333333
    1681 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1)
    1682 {
    1683         return simd_or(simd_and(simd256<128>::himask(), simd256<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd256<128>::srli<sh>(arg1) : simd256<(64)>::srai<(sh-(64))>(simd256<128>::srli<(64)>(arg1))));
    1684 }
    1685 
    1686 //The total number of operations is 32.625
    1687 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1)
    1688 {
    1689         return simd_or(simd_and(simd256<256>::himask(), simd256<(128)>::srai<((sh < (128)) ? sh : (128))>(arg1)), ((sh <= (128)) ? simd256<256>::srli<sh>(arg1) : simd256<(128)>::srai<(sh-(128))>(simd256<256>::srli<(128)>(arg1))));
    1690 }
    1691 
    16921642//The total number of operations is 3.0
    16931643template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add_hl(bitblock256_t arg1)
     
    17661716
    17671717//The total number of operations is 0
    1768 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask()
    1769 {
    1770         return simd256<2>::constant<(1)>();
    1771 }
    1772 
    1773 //The total number of operations is 0
    1774 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask()
    1775 {
    1776         return simd256<4>::constant<(3)>();
    1777 }
    1778 
    1779 //The total number of operations is 0
    1780 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask()
    1781 {
    1782         return simd256<8>::constant<(15)>();
    1783 }
    1784 
    1785 //The total number of operations is 0
    1786 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask()
    1787 {
    1788         return simd256<16>::constant<(255)>();
    1789 }
    1790 
    1791 //The total number of operations is 0
    1792 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask()
    1793 {
    1794         return simd256<32>::constant<(65535)>();
    1795 }
    1796 
    1797 //The total number of operations is 0
    1798 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask()
    1799 {
    1800         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1))));
    1801 }
    1802 
    1803 //The total number of operations is 0
    1804 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask()
    1805 {
    1806         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1))));
    1807 }
    1808 
    1809 //The total number of operations is 0
    1810 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask()
    1811 {
    1812         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1))));
    1813 }
    1814 
    1815 //The total number of operations is 0
    18161718template <> template <FieldType<1>::T val> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::constant()
    18171719{
    1818         return simd256<32>::constant<(-1*val)>();
     1720        return simd256<2>::constant<((val+val)+val)>();
    18191721}
    18201722
     
    19301832}
    19311833
     1834//The total number of operations is 0
     1835template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask()
     1836{
     1837        return simd256<2>::constant<(1)>();
     1838}
     1839
     1840//The total number of operations is 0
     1841template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask()
     1842{
     1843        return simd256<4>::constant<(3)>();
     1844}
     1845
     1846//The total number of operations is 0
     1847template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask()
     1848{
     1849        return simd256<8>::constant<(15)>();
     1850}
     1851
     1852//The total number of operations is 0
     1853template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask()
     1854{
     1855        return simd256<16>::constant<(255)>();
     1856}
     1857
     1858//The total number of operations is 0
     1859template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask()
     1860{
     1861        return simd256<32>::constant<(65535)>();
     1862}
     1863
     1864//The total number of operations is 0
     1865template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask()
     1866{
     1867        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)))));
     1868}
     1869
     1870//The total number of operations is 0
     1871template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask()
     1872{
     1873        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)))));
     1874}
     1875
     1876//The total number of operations is 0
     1877template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask()
     1878{
     1879        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)))));
     1880}
     1881
    19321882//The total number of operations is 1.0
    19331883template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2)
     
    19911941}
    19921942
    1993 //The total number of operations is 1.0
    1994 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2)
    1995 {
    1996         return simd_or(arg1, arg2);
    1997 }
    1998 
    1999 //The total number of operations is 16.0
    2000 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2)
    2001 {
    2002         return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::umax(arg1, arg2)), simd256<(4)>::umax(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2)));
    2003 }
    2004 
    2005 //The total number of operations is 6.0
    2006 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2)
    2007 {
    2008         return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::umax(arg1, arg2)), simd256<(8)>::umax(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2)));
    2009 }
    2010 
    2011 //The total number of operations is 1.0
    2012 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2)
    2013 {
    2014         return _mm256_max_epu8(arg1, arg2);
    2015 }
    2016 
    2017 //The total number of operations is 1.0
    2018 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2)
    2019 {
    2020         return _mm256_max_epu16(arg1, arg2);
    2021 }
    2022 
    2023 //The total number of operations is 1.0
    2024 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2)
    2025 {
    2026         return _mm256_max_epu32(arg1, arg2);
    2027 }
    2028 
    2029 //The total number of operations is 7.0
    2030 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2)
    2031 {
    2032         bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808ULL)>();
    2033         return simd_xor(simd256<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    2034 }
    2035 
    2036 //The total number of operations is 28.6666666667
    2037 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2)
    2038 {
    2039         bitblock256_t tmpAns = simd256<(64)>::umax(arg1, arg2);
    2040         bitblock256_t eqMask1 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg1));
    2041         bitblock256_t eqMask2 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg2));
    2042         return simd256<1>::ifh(simd256<128>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    2043 }
    2044 
    2045 //The total number of operations is 84.0
    2046 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2)
    2047 {
    2048         bitblock256_t tmpAns = simd256<(128)>::umax(arg1, arg2);
    2049         bitblock256_t eqMask1 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg1));
    2050         bitblock256_t eqMask2 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg2));
    2051         return simd256<1>::ifh(simd256<256>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     1943//The total number of operations is 14.5
     1944template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1)
     1945{
     1946        return simd256<1>::ifh(simd256<2>::himask(), simd_and(arg1, simd256<256>::slli<1>(simd_not(arg1))), arg1);
     1947}
     1948
     1949//The total number of operations is 19.0
     1950template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1)
     1951{
     1952        bitblock256_t gtMask = simd256<4>::gt(arg1, simd256<4>::constant<0>());
     1953        return simd256<1>::ifh(gtMask, arg1, simd256<4>::sub(gtMask, arg1));
     1954}
     1955
     1956//The total number of operations is 1.0
     1957template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1)
     1958{
     1959        return _mm256_abs_epi8(arg1);
     1960}
     1961
     1962//The total number of operations is 1.0
     1963template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1)
     1964{
     1965        return _mm256_abs_epi16(arg1);
     1966}
     1967
     1968//The total number of operations is 1.0
     1969template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1)
     1970{
     1971        return _mm256_abs_epi32(arg1);
     1972}
     1973
     1974//The total number of operations is 5.0
     1975template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1)
     1976{
     1977        bitblock256_t gtMask = simd256<64>::gt(arg1, simd256<64>::constant<0>());
     1978        return simd256<1>::ifh(gtMask, arg1, simd256<64>::sub(gtMask, arg1));
     1979}
     1980
     1981//The total number of operations is 37.0
     1982template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1)
     1983{
     1984        bitblock256_t eqMask = simd256<128>::eq(simd256<1>::ifh(simd256<128>::himask(), simd256<(64)>::abs(arg1), arg1), arg1);
     1985        return simd256<1>::ifh(eqMask, arg1, simd256<128>::sub(eqMask, arg1));
     1986}
     1987
     1988//The total number of operations is 120.833333333
     1989template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1)
     1990{
     1991        bitblock256_t eqMask = simd256<256>::eq(simd256<1>::ifh(simd256<256>::himask(), simd256<(128)>::abs(arg1), arg1), arg1);
     1992        return simd256<1>::ifh(eqMask, arg1, simd256<256>::sub(eqMask, arg1));
     1993}
     1994
     1995//The total number of operations is 2.0
     1996template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2)
     1997{
     1998        return simd_not(simd_xor(arg1, arg2));
     1999}
     2000
     2001//The total number of operations is 8.0
     2002template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2)
     2003{
     2004        bitblock256_t tmpAns = simd256<(1)>::eq(arg1, arg2);
     2005        bitblock256_t loMask = simd_and(tmpAns, simd256<2>::srli<(1)>(tmpAns));
     2006        bitblock256_t hiMask = simd256<2>::slli<(1)>(loMask);
     2007        return simd_or(loMask, hiMask);
     2008}
     2009
     2010//The total number of operations is 9.0
     2011template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2)
     2012{
     2013        return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::eq(simd_and(simd256<(8)>::himask(), arg1), simd_and(simd256<(8)>::himask(), arg2))), simd_and(simd256<(8)>::lomask(), simd256<(8)>::eq(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2))));
     2014}
     2015
     2016//The total number of operations is 1.0
     2017template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2)
     2018{
     2019        return _mm256_cmpeq_epi8(arg1, arg2);
     2020}
     2021
     2022//The total number of operations is 1.0
     2023template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2)
     2024{
     2025        return _mm256_cmpeq_epi16(arg1, arg2);
     2026}
     2027
     2028//The total number of operations is 1.0
     2029template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2)
     2030{
     2031        return _mm256_cmpeq_epi32(arg1, arg2);
     2032}
     2033
     2034//The total number of operations is 1.0
     2035template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2)
     2036{
     2037        return _mm256_cmpeq_epi64(arg1, arg2);
     2038}
     2039
     2040//The total number of operations is 13.6666666667
     2041template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2)
     2042{
     2043        bitblock256_t tmpAns = simd256<(64)>::eq(arg1, arg2);
     2044        bitblock256_t loMask = simd_and(tmpAns, simd256<128>::srli<(64)>(tmpAns));
     2045        bitblock256_t hiMask = simd256<128>::slli<(64)>(loMask);
     2046        return simd_or(loMask, hiMask);
     2047}
     2048
     2049//The total number of operations is 34.6666666667
     2050template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2)
     2051{
     2052        bitblock256_t tmpAns = simd256<(128)>::eq(arg1, arg2);
     2053        bitblock256_t loMask = simd_and(tmpAns, simd256<256>::srli<(128)>(tmpAns));
     2054        bitblock256_t hiMask = simd256<256>::slli<(128)>(loMask);
     2055        return simd_or(loMask, hiMask);
     2056}
     2057
     2058//The total number of operations is 4.0
     2059template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1)
     2060{
     2061        return ((sh == 0) ? arg1 : simd_or(simd_and(simd256<2>::himask(), arg1), simd256<2>::srli<1>(arg1)));
     2062}
     2063
     2064//The total number of operations is 10.0
     2065template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1)
     2066{
     2067        bitblock256_t tmp = simd256<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
     2068        return simd_or(tmp, simd256<4>::sub(simd256<4>::constant<0>(), simd_and(simd256<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     2069}
     2070
     2071//The total number of operations is 5.0
     2072template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1)
     2073{
     2074        bitblock256_t tmp = simd256<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
     2075        return simd_or(tmp, simd256<8>::sub(simd256<8>::constant<0>(), simd_and(simd256<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     2076}
     2077
     2078//The total number of operations is 1.0
     2079template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1)
     2080{
     2081        return _mm256_srai_epi16(arg1, (int32_t)(sh));
     2082}
     2083
     2084//The total number of operations is 1.0
     2085template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1)
     2086{
     2087        return _mm256_srai_epi32(arg1, (int32_t)(sh));
     2088}
     2089
     2090//The total number of operations is 4.5
     2091template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1)
     2092{
     2093        return simd_or(simd_and(simd256<64>::himask(), simd256<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd256<64>::srli<sh>(arg1) : simd256<(32)>::srai<(sh-(32))>(simd256<64>::srli<(32)>(arg1))));
     2094}
     2095
     2096//The total number of operations is 14.0833333333
     2097template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1)
     2098{
     2099        return simd_or(simd_and(simd256<128>::himask(), simd256<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd256<128>::srli<sh>(arg1) : simd256<(64)>::srai<(sh-(64))>(simd256<128>::srli<(64)>(arg1))));
     2100}
     2101
     2102//The total number of operations is 32.625
     2103template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1)
     2104{
     2105        return simd_or(simd_and(simd256<256>::himask(), simd256<(128)>::srai<((sh < (128)) ? sh : (128))>(arg1)), ((sh <= (128)) ? simd256<256>::srli<sh>(arg1) : simd256<(128)>::srai<(sh-(128))>(simd256<256>::srli<(128)>(arg1))));
    20522106}
    20532107
     
    21182172}
    21192173
    2120 //The total number of operations is 2.0
    2121 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2122 {
    2123         return simd_not(simd_xor(arg1, arg2));
    2124 }
    2125 
    2126 //The total number of operations is 8.0
    2127 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2128 {
    2129         bitblock256_t tmpAns = simd256<(1)>::eq(arg1, arg2);
    2130         bitblock256_t loMask = simd_and(tmpAns, simd256<2>::srli<(1)>(tmpAns));
    2131         bitblock256_t hiMask = simd256<2>::slli<(1)>(loMask);
    2132         return simd_or(loMask, hiMask);
    2133 }
    2134 
    2135 //The total number of operations is 9.0
    2136 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2137 {
    2138         return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::eq(simd_and(simd256<(8)>::himask(), arg1), simd_and(simd256<(8)>::himask(), arg2))), simd_and(simd256<(8)>::lomask(), simd256<(8)>::eq(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2))));
    2139 }
    2140 
    2141 //The total number of operations is 1.0
    2142 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2143 {
    2144         return _mm256_cmpeq_epi8(arg1, arg2);
    2145 }
    2146 
    2147 //The total number of operations is 1.0
    2148 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2149 {
    2150         return _mm256_cmpeq_epi16(arg1, arg2);
    2151 }
    2152 
    2153 //The total number of operations is 1.0
    2154 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2155 {
    2156         return _mm256_cmpeq_epi32(arg1, arg2);
    2157 }
    2158 
    2159 //The total number of operations is 1.0
    2160 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2161 {
    2162         return _mm256_cmpeq_epi64(arg1, arg2);
    2163 }
    2164 
    2165 //The total number of operations is 13.6666666667
    2166 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2167 {
    2168         bitblock256_t tmpAns = simd256<(64)>::eq(arg1, arg2);
    2169         bitblock256_t loMask = simd_and(tmpAns, simd256<128>::srli<(64)>(tmpAns));
    2170         bitblock256_t hiMask = simd256<128>::slli<(64)>(loMask);
    2171         return simd_or(loMask, hiMask);
    2172 }
    2173 
    2174 //The total number of operations is 34.6666666667
    2175 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2)
    2176 {
    2177         bitblock256_t tmpAns = simd256<(128)>::eq(arg1, arg2);
    2178         bitblock256_t loMask = simd_and(tmpAns, simd256<256>::srli<(128)>(tmpAns));
    2179         bitblock256_t hiMask = simd256<256>::slli<(128)>(loMask);
    2180         return simd_or(loMask, hiMask);
    2181 }
    2182 
    21832174//The total number of operations is 0
    21842175template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::himask()
     
    22082199template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::himask()
    22092200{
    2210         return simd256<32>::constant<-65536>();
     2201        return simd256<32>::constant<4294901760ULL>();
    22112202}
    22122203
     
    22142205template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::himask()
    22152206{
    2216         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0))));
     2207        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0))));
    22172208}
    22182209
     
    22202211template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::himask()
    22212212{
    2222         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0))));
     2213        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(0))));
    22232214}
    22242215
     
    22262217template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::himask()
    22272218{
    2228         return ((bitblock256_t)(_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0))));
     2219        return ((bitblock256_t)(_mm256_set_epi32((int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0))));
    22292220}
    22302221
     
    22942285}
    22952286
    2296 //The total number of operations is 14.5
    2297 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1)
    2298 {
    2299         return simd256<1>::ifh(simd256<2>::himask(), simd_and(arg1, simd256<256>::slli<1>(simd_not(arg1))), arg1);
    2300 }
    2301 
    2302 //The total number of operations is 19.0
    2303 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1)
    2304 {
    2305         bitblock256_t gtMask = simd256<4>::gt(arg1, simd256<4>::constant<0>());
    2306         return simd256<1>::ifh(gtMask, arg1, simd256<4>::sub(gtMask, arg1));
    2307 }
    2308 
    2309 //The total number of operations is 1.0
    2310 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1)
    2311 {
    2312         return _mm256_abs_epi8(arg1);
    2313 }
    2314 
    2315 //The total number of operations is 1.0
    2316 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1)
    2317 {
    2318         return _mm256_abs_epi16(arg1);
    2319 }
    2320 
    2321 //The total number of operations is 1.0
    2322 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1)
    2323 {
    2324         return _mm256_abs_epi32(arg1);
    2325 }
    2326 
    2327 //The total number of operations is 5.0
    2328 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1)
    2329 {
    2330         bitblock256_t gtMask = simd256<64>::gt(arg1, simd256<64>::constant<0>());
    2331         return simd256<1>::ifh(gtMask, arg1, simd256<64>::sub(gtMask, arg1));
    2332 }
    2333 
    2334 //The total number of operations is 37.0
    2335 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1)
    2336 {
    2337         bitblock256_t eqMask = simd256<128>::eq(simd256<1>::ifh(simd256<128>::himask(), simd256<(64)>::abs(arg1), arg1), arg1);
    2338         return simd256<1>::ifh(eqMask, arg1, simd256<128>::sub(eqMask, arg1));
    2339 }
    2340 
    2341 //The total number of operations is 120.833333333
    2342 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1)
    2343 {
    2344         bitblock256_t eqMask = simd256<256>::eq(simd256<1>::ifh(simd256<256>::himask(), simd256<(128)>::abs(arg1), arg1), arg1);
    2345         return simd256<1>::ifh(eqMask, arg1, simd256<256>::sub(eqMask, arg1));
     2287//The total number of operations is 1.0
     2288template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2289{
     2290        return simd_or(arg1, arg2);
     2291}
     2292
     2293//The total number of operations is 16.0
     2294template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2295{
     2296        return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::umax(arg1, arg2)), simd256<(4)>::umax(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2)));
     2297}
     2298
     2299//The total number of operations is 6.0
     2300template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2301{
     2302        return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::umax(arg1, arg2)), simd256<(8)>::umax(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2)));
     2303}
     2304
     2305//The total number of operations is 1.0
     2306template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2307{
     2308        return _mm256_max_epu8(arg1, arg2);
     2309}
     2310
     2311//The total number of operations is 1.0
     2312template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2313{
     2314        return _mm256_max_epu16(arg1, arg2);
     2315}
     2316
     2317//The total number of operations is 1.0
     2318template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2319{
     2320        return _mm256_max_epu32(arg1, arg2);
     2321}
     2322
     2323//The total number of operations is 7.0
     2324template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2325{
     2326        bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808ULL)>();
     2327        return simd_xor(simd256<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     2328}
     2329
     2330//The total number of operations is 28.6666666667
     2331template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2332{
     2333        bitblock256_t tmpAns = simd256<(64)>::umax(arg1, arg2);
     2334        bitblock256_t eqMask1 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg1));
     2335        bitblock256_t eqMask2 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg2));
     2336        return simd256<1>::ifh(simd256<128>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     2337}
     2338
     2339//The total number of operations is 84.0
     2340template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2)
     2341{
     2342        bitblock256_t tmpAns = simd256<(128)>::umax(arg1, arg2);
     2343        bitblock256_t eqMask1 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg1));
     2344        bitblock256_t eqMask2 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg2));
     2345        return simd256<1>::ifh(simd256<256>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    23462346}
    23472347
     
    34123412template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::srli(bitblock256_t arg1)
    34133413{
    3414         return ((sh == 3) ? simd_and(_mm256_set_epi64x((int64_t)(0), (int64_t)(0), (int64_t)(0), (int64_t)(-1)), _mm256_permute4x64_epi64(arg1, (int32_t)(3))) : ((sh == 2) ? simd_and(_mm256_set_epi64x((int64_t)(0), (int64_t)(0), (int64_t)(-1), (int64_t)(-1)), _mm256_permute4x64_epi64(arg1, (int32_t)(14))) : ((sh == 1) ? simd_and(_mm256_set_epi64x((int64_t)(0), (int64_t)(-1), (int64_t)(-1), (int64_t)(-1)), _mm256_permute4x64_epi64(arg1, (int32_t)(57))) : ((sh == 0) ? arg1 : simd256<32>::constant<0>()))));
     3414        return ((sh == 3) ? simd_and(_mm256_set_epi64x((int64_t)(0), (int64_t)(0), (int64_t)(0), (int64_t)(18446744073709551615ULL)), _mm256_permute4x64_epi64(arg1, (int32_t)(3))) : ((sh == 2) ? simd_and(_mm256_set_epi64x((int64_t)(0), (int64_t)(0), (int64_t)(18446744073709551615ULL), (int64_t)(18446744073709551615ULL)), _mm256_permute4x64_epi64(arg1, (int32_t)(14))) : ((sh == 1) ? simd_and(_mm256_set_epi64x((int64_t)(0), (int64_t)(18446744073709551615ULL), (int64_t)(18446744073709551615ULL), (int64_t)(18446744073709551615ULL)), _mm256_permute4x64_epi64(arg1, (int32_t)(57))) : ((sh == 0) ? arg1 : simd256<32>::constant<0>()))));
    34153415}
    34163416
     
    35563556template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::slli(bitblock256_t arg1)
    35573557{
    3558         return ((sh == 1) ? simd_and(_mm256_set_epi64x((int64_t)(-1), (int64_t)(-1), (int64_t)(-1), (int64_t)(0)), _mm256_permute4x64_epi64(arg1, (int32_t)((144)))) : ((sh == 2) ? simd_and(_mm256_set_epi64x((int64_t)(-1), (int64_t)(-1), (int64_t)(0), (int64_t)(0)), _mm256_permute4x64_epi64(arg1, (int32_t)(64))) : ((sh == 3) ? simd_and(_mm256_set_epi64x((int64_t)(-1), (int64_t)(0), (int64_t)(0), (int64_t)(0)), _mm256_permute4x64_epi64(arg1, (int32_t)(0))) : ((sh == 0) ? arg1 : simd256<32>::constant<0>()))));
     3558        return ((sh == 1) ? simd_and(_mm256_set_epi64x((int64_t)(18446744073709551615ULL), (int64_t)(18446744073709551615ULL), (int64_t)(18446744073709551615ULL), (int64_t)(0)), _mm256_permute4x64_epi64(arg1, (int32_t)((144)))) : ((sh == 2) ? simd_and(_mm256_set_epi64x((int64_t)(18446744073709551615ULL), (int64_t)(18446744073709551615ULL), (int64_t)(0), (int64_t)(0)), _mm256_permute4x64_epi64(arg1, (int32_t)(64))) : ((sh == 3) ? simd_and(_mm256_set_epi64x((int64_t)(18446744073709551615ULL), (int64_t)(0), (int64_t)(0), (int64_t)(0)), _mm256_permute4x64_epi64(arg1, (int32_t)(0))) : ((sh == 0) ? arg1 : simd256<32>::constant<0>()))));
    35593559}
    35603560
     
    36343634IDISA_ALWAYS_INLINE bool bitblock256::all(bitblock256_t arg1)
    36353635{
    3636         return _mm256_testz_si256(((__m256i)(simd_not(arg1))), ((__m256i)(simd256<8>::constant<-1>()))) == 1;
     3636        return _mm256_testz_si256(((__m256i)(simd_not(arg1))), ((__m256i)(simd256<8>::constant<255>()))) == 1;
    36373637}
    36383638
     
    36433643}
    36443644
    3645 //The total number of operations is 1.0
    3646 IDISA_ALWAYS_INLINE bool bitblock256::any(bitblock256_t arg1)
    3647 {
    3648         return _mm256_testz_si256(((__m256i)(arg1)), ((__m256i)(arg1))) == 0;
    3649 }
    3650 
    3651 //The total number of operations is 1.0
    3652 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_aligned(const bitblock256_t* arg1)
    3653 {
    3654         return _mm256_load_si256((bitblock256_t*)(arg1));
    3655 }
    3656 
    3657 //The total number of operations is 1.0
    3658 IDISA_ALWAYS_INLINE void bitblock256::store_unaligned(bitblock256_t arg1, bitblock256_t* arg2)
    3659 {
    3660         _mm256_storeu_si256((bitblock256_t*)(arg2), arg1);
    3661 }
    3662 
    36633645//The total number of operations is 9.5
    36643646template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock256_t bitblock256::slli(bitblock256_t arg1)
     
    36683650
    36693651//The total number of operations is 1.0
     3652IDISA_ALWAYS_INLINE bool bitblock256::any(bitblock256_t arg1)
     3653{
     3654        return _mm256_testz_si256(((__m256i)(arg1)), ((__m256i)(arg1))) == 0;
     3655}
     3656
     3657//The total number of operations is 1.0
     3658IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_aligned(const bitblock256_t* arg1)
     3659{
     3660        return _mm256_load_si256((bitblock256_t*)(arg1));
     3661}
     3662
     3663//The total number of operations is 1.0
    36703664IDISA_ALWAYS_INLINE void bitblock256::store_aligned(bitblock256_t arg1, bitblock256_t* arg2)
    36713665{
     
    36733667}
    36743668
     3669//The total number of operations is 1.0
     3670IDISA_ALWAYS_INLINE void bitblock256::store_unaligned(bitblock256_t arg1, bitblock256_t* arg2)
     3671{
     3672        _mm256_storeu_si256((bitblock256_t*)(arg2), arg1);
     3673}
     3674
    36753675#endif
Note: See TracChangeset for help on using the changeset viewer.