Ignore:
Timestamp:
Nov 29, 2013, 3:03:33 PM (6 years ago)
Author:
linmengl
Message:

regenerate libraries, with negative number constants eliminated.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_ssse3.cpp

    r3526 r3576  
    5656        static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t shift_mask);
    5757        static IDISA_ALWAYS_INLINE bitblock128_t lomask();
     58        static IDISA_ALWAYS_INLINE bitblock128_t lt(bitblock128_t arg1, bitblock128_t arg2);
    5859        static IDISA_ALWAYS_INLINE bitblock128_t vsll(bitblock128_t arg1, bitblock128_t shift_mask);
    5960        static IDISA_ALWAYS_INLINE bitblock128_t umin(bitblock128_t arg1, bitblock128_t arg2);
    6061        template <typename FieldType<fw>::T val> static IDISA_ALWAYS_INLINE bitblock128_t constant();
    6162        static IDISA_ALWAYS_INLINE bitblock128_t min(bitblock128_t arg1, bitblock128_t arg2);
    62         static IDISA_ALWAYS_INLINE bitblock128_t add(bitblock128_t arg1, bitblock128_t arg2);
    6363        static IDISA_ALWAYS_INLINE bitblock128_t umax(bitblock128_t arg1, bitblock128_t arg2);
    6464        static IDISA_ALWAYS_INLINE bitblock128_t abs(bitblock128_t arg1);
     
    6666        static IDISA_ALWAYS_INLINE bitblock128_t any(bitblock128_t arg1);
    6767        template <uint16_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srai(bitblock128_t arg1);
    68         static IDISA_ALWAYS_INLINE bitblock128_t lt(bitblock128_t arg1, bitblock128_t arg2);
     68        static IDISA_ALWAYS_INLINE bitblock128_t add(bitblock128_t arg1, bitblock128_t arg2);
    6969        static IDISA_ALWAYS_INLINE bitblock128_t ugt(bitblock128_t arg1, bitblock128_t arg2);
    7070};
     
    135135IDISA_ALWAYS_INLINE bitblock128_t simd_nor(bitblock128_t arg1, bitblock128_t arg2);
    136136IDISA_ALWAYS_INLINE bitblock128_t simd_not(bitblock128_t arg1);
     137IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    137138IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2);
    138 IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2);
    139139IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2);
    140140IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2);
     
    261261template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3);
    262262template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3);
    263 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1);
    264 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1);
    265 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1);
    266 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1);
    267 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1);
    268 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1);
    269 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1);
    270263template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::vsrl(bitblock128_t arg1, bitblock128_t shift_mask);
    271264template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::vsrl(bitblock128_t arg1, bitblock128_t shift_mask);
     
    278271template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add_hl(bitblock128_t arg1);
    279272template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srl(bitblock128_t arg1, bitblock128_t shift_mask);
    280 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask();
    281 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask();
    282 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask();
    283 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask();
    284 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask();
    285 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask();
    286 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask();
    287273template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::vsll(bitblock128_t arg1, bitblock128_t shift_mask);
    288274template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::vsll(bitblock128_t arg1, bitblock128_t shift_mask);
     
    303289template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::min(bitblock128_t arg1, bitblock128_t arg2);
    304290template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2);
     291template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask();
     292template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask();
     293template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask();
     294template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask();
     295template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask();
     296template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask();
     297template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask();
    305298template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2);
    306299template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umin(bitblock128_t arg1, bitblock128_t arg2);
     
    311304template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umin(bitblock128_t arg1, bitblock128_t arg2);
    312305template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umin(bitblock128_t arg1, bitblock128_t arg2);
    313 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2);
    314 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2);
    315 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2);
    316 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2);
    317 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2);
    318 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2);
    319 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2);
    320 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2);
     306template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
     307template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
     308template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1);
     309template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1);
     310template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1);
     311template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1);
     312template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1);
     313template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2);
     314template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2);
     315template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2);
     316template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2);
     317template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2);
     318template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2);
     319template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2);
     320template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2);
     321template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1);
     322template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1);
     323template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1);
     324template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1);
     325template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1);
     326template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1);
     327template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1);
    321328template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::lt(bitblock128_t arg1, bitblock128_t arg2);
    322329template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lt(bitblock128_t arg1, bitblock128_t arg2);
     
    327334template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2);
    328335template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2);
    329 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2);
    330 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2);
    331 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2);
    332 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2);
    333 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2);
    334 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2);
    335 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2);
    336 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2);
    337336template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask();
    338337template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask();
     
    350349template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2);
    351350template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2);
    352 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1);
    353 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1);
    354 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1);
    355 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1);
    356 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1);
    357 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1);
    358 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1);
     351template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2);
     352template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2);
     353template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2);
     354template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2);
     355template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2);
     356template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2);
     357template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2);
     358template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2);
    359359template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2);
    360360template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::umin_hl(bitblock128_t arg1, bitblock128_t arg2);
     
    560560IDISA_ALWAYS_INLINE bitblock128_t simd_not(bitblock128_t arg1)
    561561{
    562         return simd_xor(arg1, simd128<32>::constant<-1>());
     562        return simd_xor(arg1, simd128<32>::constant<4294967295ULL>());
     563}
     564
     565//The total number of operations is 1.0
     566IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2)
     567{
     568        return _mm_andnot_si128(arg2, arg1);
    563569}
    564570
     
    567573{
    568574        return _mm_or_si128(arg1, arg2);
    569 }
    570 
    571 //The total number of operations is 1.0
    572 IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2)
    573 {
    574         return _mm_andnot_si128(arg2, arg1);
    575575}
    576576
     
    13081308template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::neg(bitblock128_t arg1)
    13091309{
    1310         return _mm_sign_epi32(arg1, simd128<32>::constant<-1>());
     1310        return _mm_sign_epi32(arg1, simd128<32>::constant<((4294967296ULL)-1)>());
    13111311}
    13121312
     
    14131413}
    14141414
    1415 //The total number of operations is 4.0
    1416 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)
    1417 {
    1418         return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));
    1419 }
    1420 
    1421 //The total number of operations is 10.0
    1422 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)
    1423 {
    1424         bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
    1425         return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1426 }
    1427 
    1428 //The total number of operations is 5.0
    1429 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
    1430 {
    1431         bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
    1432         return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
    1433 }
    1434 
    1435 //The total number of operations is 1.0
    1436 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
    1437 {
    1438         return _mm_srai_epi16(arg1, (int32_t)(sh));
    1439 }
    1440 
    1441 //The total number of operations is 1.0
    1442 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
    1443 {
    1444         return _mm_srai_epi32(arg1, (int32_t)(sh));
    1445 }
    1446 
    1447 //The total number of operations is 4.5
    1448 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    1449 {
    1450         return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
    1451 }
    1452 
    1453 //The total number of operations is 11.0833333333
    1454 template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    1455 {
    1456         return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    1457 }
    1458 
    14591415//The total number of operations is 10.0
    14601416template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::vsrl(bitblock128_t arg1, bitblock128_t shift_mask)
     
    15191475}
    15201476
    1521 //The total number of operations is 0
    1522 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask()
    1523 {
    1524         return simd128<2>::constant<(1)>();
    1525 }
    1526 
    1527 //The total number of operations is 0
    1528 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask()
    1529 {
    1530         return simd128<4>::constant<(3)>();
    1531 }
    1532 
    1533 //The total number of operations is 0
    1534 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask()
    1535 {
    1536         return simd128<8>::constant<(15)>();
    1537 }
    1538 
    1539 //The total number of operations is 0
    1540 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask()
    1541 {
    1542         return simd128<16>::constant<(255)>();
    1543 }
    1544 
    1545 //The total number of operations is 0
    1546 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask()
    1547 {
    1548         return simd128<32>::constant<(65535)>();
    1549 }
    1550 
    1551 //The total number of operations is 0
    1552 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask()
    1553 {
    1554         return _mm_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1));
    1555 }
    1556 
    1557 //The total number of operations is 0
    1558 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask()
    1559 {
    1560         return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1));
    1561 }
    1562 
    15631477//The total number of operations is 10.0
    15641478template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::vsll(bitblock128_t arg1, bitblock128_t shift_mask)
     
    15771491template <> template <FieldType<1>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant()
    15781492{
    1579         return simd128<32>::constant<(-1*val)>();
     1493        return simd128<2>::constant<((val+val)+val)>();
    15801494}
    15811495
     
    16721586{
    16731587        return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg2, arg1);
     1588}
     1589
     1590//The total number of operations is 0
     1591template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lomask()
     1592{
     1593        return simd128<2>::constant<(1)>();
     1594}
     1595
     1596//The total number of operations is 0
     1597template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lomask()
     1598{
     1599        return simd128<4>::constant<(3)>();
     1600}
     1601
     1602//The total number of operations is 0
     1603template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lomask()
     1604{
     1605        return simd128<8>::constant<(15)>();
     1606}
     1607
     1608//The total number of operations is 0
     1609template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lomask()
     1610{
     1611        return simd128<16>::constant<(255)>();
     1612}
     1613
     1614//The total number of operations is 0
     1615template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lomask()
     1616{
     1617        return simd128<32>::constant<(65535)>();
     1618}
     1619
     1620//The total number of operations is 0
     1621template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lomask()
     1622{
     1623        return _mm_set_epi32((int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)));
     1624}
     1625
     1626//The total number of operations is 0
     1627template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lomask()
     1628{
     1629        return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)));
    16741630}
    16751631
     
    17301686}
    17311687
    1732 //The total number of operations is 1.0
    1733 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2)
    1734 {
    1735         return simd_or(arg1, arg2);
    1736 }
    1737 
    1738 //The total number of operations is 15.6666666667
    1739 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2)
    1740 {
    1741         return simd128<1>::ifh(simd128<2>::himask(), simd_or(arg1, arg2), simd_or(simd_and(arg2, simd128<128>::srli<1>(simd_or(simd_not(arg1), arg2))), simd_and(arg1, simd128<128>::srli<1>(simd_or(arg1, simd_not(arg2))))));
    1742 }
    1743 
    1744 //The total number of operations is 6.0
    1745 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2)
    1746 {
    1747         return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::umax(arg1, arg2)), simd128<(8)>::umax(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2)));
    1748 }
    1749 
    1750 //The total number of operations is 1.0
    1751 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2)
    1752 {
    1753         return _mm_max_epu8(arg1, arg2);
     1688//The total number of operations is 7.33333333333
     1689template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
     1690{
     1691        return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1);
     1692}
     1693
     1694//The total number of operations is 19.0
     1695template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1)
     1696{
     1697        bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>());
     1698        return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1));
     1699}
     1700
     1701//The total number of operations is 1.0
     1702template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1)
     1703{
     1704        return _mm_abs_epi8(arg1);
     1705}
     1706
     1707//The total number of operations is 1.0
     1708template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1)
     1709{
     1710        return _mm_abs_epi16(arg1);
     1711}
     1712
     1713//The total number of operations is 1.0
     1714template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1)
     1715{
     1716        return _mm_abs_epi32(arg1);
     1717}
     1718
     1719//The total number of operations is 13.0
     1720template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1)
     1721{
     1722        bitblock128_t eqMask = simd128<64>::eq(simd128<1>::ifh(simd128<64>::himask(), simd128<(32)>::abs(arg1), arg1), arg1);
     1723        return simd128<1>::ifh(eqMask, arg1, simd128<64>::sub(eqMask, arg1));
     1724}
     1725
     1726//The total number of operations is 40.0
     1727template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1)
     1728{
     1729        bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1);
     1730        return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1));
     1731}
     1732
     1733//The total number of operations is 2.0
     1734template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1735{
     1736        return simd_not(simd_xor(arg1, arg2));
     1737}
     1738
     1739//The total number of operations is 8.0
     1740template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1741{
     1742        bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
     1743        bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
     1744        bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
     1745        return simd_or(loMask, hiMask);
     1746}
     1747
     1748//The total number of operations is 9.0
     1749template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1750{
     1751        return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2))));
     1752}
     1753
     1754//The total number of operations is 1.0
     1755template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1756{
     1757        return _mm_cmpeq_epi8(arg1, arg2);
     1758}
     1759
     1760//The total number of operations is 1.0
     1761template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1762{
     1763        return _mm_cmpeq_epi16(arg1, arg2);
     1764}
     1765
     1766//The total number of operations is 1.0
     1767template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1768{
     1769        return _mm_cmpeq_epi32(arg1, arg2);
     1770}
     1771
     1772//The total number of operations is 5.0
     1773template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1774{
     1775        bitblock128_t tmpAns = simd128<(32)>::eq(arg1, arg2);
     1776        bitblock128_t loMask = simd_and(tmpAns, simd128<64>::srli<(32)>(tmpAns));
     1777        bitblock128_t hiMask = simd128<64>::slli<(32)>(loMask);
     1778        return simd_or(loMask, hiMask);
     1779}
     1780
     1781//The total number of operations is 11.6666666667
     1782template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2)
     1783{
     1784        bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2);
     1785        bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns));
     1786        bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask);
     1787        return simd_or(loMask, hiMask);
    17541788}
    17551789
    17561790//The total number of operations is 4.0
    1757 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2)
    1758 {
    1759         bitblock128_t high_bit = simd128<16>::constant<(32768)>();
    1760         return simd_xor(simd128<16>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    1761 }
    1762 
    1763 //The total number of operations is 7.0
    1764 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2)
    1765 {
    1766         bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
    1767         return simd_xor(simd128<32>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
    1768 }
    1769 
    1770 //The total number of operations is 20.0
    1771 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2)
    1772 {
    1773         bitblock128_t tmpAns = simd128<(32)>::umax(arg1, arg2);
    1774         bitblock128_t eqMask1 = simd128<64>::srli<(32)>(simd128<(32)>::eq(tmpAns, arg1));
    1775         bitblock128_t eqMask2 = simd128<64>::srli<(32)>(simd128<(32)>::eq(tmpAns, arg2));
    1776         return simd128<1>::ifh(simd128<64>::himask(), tmpAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    1777 }
    1778 
    1779 //The total number of operations is 43.6666666667
    1780 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2)
    1781 {
    1782         bitblock128_t tmpAns = simd128<(64)>::umax(arg1, arg2);
    1783         bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(tmpAns, arg1));
    1784         bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(tmpAns, arg2));
    1785         return simd128<1>::ifh(simd128<128>::himask(), tmpAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     1791template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)
     1792{
     1793        return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));
     1794}
     1795
     1796//The total number of operations is 10.0
     1797template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)
     1798{
     1799        bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
     1800        return simd_or(tmp, simd128<4>::sub(simd128<4>::constant<0>(), simd_and(simd128<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     1801}
     1802
     1803//The total number of operations is 5.0
     1804template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
     1805{
     1806        bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
     1807        return simd_or(tmp, simd128<8>::sub(simd128<8>::constant<0>(), simd_and(simd128<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
     1808}
     1809
     1810//The total number of operations is 1.0
     1811template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
     1812{
     1813        return _mm_srai_epi16(arg1, (int32_t)(sh));
     1814}
     1815
     1816//The total number of operations is 1.0
     1817template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
     1818{
     1819        return _mm_srai_epi32(arg1, (int32_t)(sh));
     1820}
     1821
     1822//The total number of operations is 4.5
     1823template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
     1824{
     1825        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     1826}
     1827
     1828//The total number of operations is 11.0833333333
     1829template <> template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
     1830{
     1831        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
    17861832}
    17871833
     
    18411887}
    18421888
    1843 //The total number of operations is 2.0
    1844 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1845 {
    1846         return simd_not(simd_xor(arg1, arg2));
    1847 }
    1848 
    1849 //The total number of operations is 8.0
    1850 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1851 {
    1852         bitblock128_t tmpAns = simd128<(1)>::eq(arg1, arg2);
    1853         bitblock128_t loMask = simd_and(tmpAns, simd128<2>::srli<(1)>(tmpAns));
    1854         bitblock128_t hiMask = simd128<2>::slli<(1)>(loMask);
    1855         return simd_or(loMask, hiMask);
    1856 }
    1857 
    1858 //The total number of operations is 9.0
    1859 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1860 {
    1861         return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::eq(simd_and(simd128<(8)>::himask(), arg1), simd_and(simd128<(8)>::himask(), arg2))), simd_and(simd128<(8)>::lomask(), simd128<(8)>::eq(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2))));
    1862 }
    1863 
    1864 //The total number of operations is 1.0
    1865 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1866 {
    1867         return _mm_cmpeq_epi8(arg1, arg2);
    1868 }
    1869 
    1870 //The total number of operations is 1.0
    1871 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1872 {
    1873         return _mm_cmpeq_epi16(arg1, arg2);
    1874 }
    1875 
    1876 //The total number of operations is 1.0
    1877 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1878 {
    1879         return _mm_cmpeq_epi32(arg1, arg2);
    1880 }
    1881 
    1882 //The total number of operations is 5.0
    1883 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1884 {
    1885         bitblock128_t tmpAns = simd128<(32)>::eq(arg1, arg2);
    1886         bitblock128_t loMask = simd_and(tmpAns, simd128<64>::srli<(32)>(tmpAns));
    1887         bitblock128_t hiMask = simd128<64>::slli<(32)>(loMask);
    1888         return simd_or(loMask, hiMask);
    1889 }
    1890 
    1891 //The total number of operations is 11.6666666667
    1892 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2)
    1893 {
    1894         bitblock128_t tmpAns = simd128<(64)>::eq(arg1, arg2);
    1895         bitblock128_t loMask = simd_and(tmpAns, simd128<128>::srli<(64)>(tmpAns));
    1896         bitblock128_t hiMask = simd128<128>::slli<(64)>(loMask);
    1897         return simd_or(loMask, hiMask);
    1898 }
    1899 
    19001889//The total number of operations is 0
    19011890template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask()
     
    19251914template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::himask()
    19261915{
    1927         return simd128<32>::constant<-65536>();
     1916        return simd128<32>::constant<4294901760ULL>();
    19281917}
    19291918
     
    19311920template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::himask()
    19321921{
    1933         return _mm_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0));
     1922        return _mm_set_epi32((int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(((4294967296ULL)-1)), (int32_t)(0));
    19341923}
    19351924
     
    19371926template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::himask()
    19381927{
    1939         return _mm_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0));
     1928        return _mm_set_epi32((int32_t)(((4294967296ULL)-1)), (int32_t)(((4294967296ULL)-1)), (int32_t)(0), (int32_t)(0));
    19401929}
    19411930
     
    19921981}
    19931982
    1994 //The total number of operations is 7.33333333333
    1995 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1)
    1996 {
    1997         return simd128<1>::ifh(simd128<2>::himask(), simd_and(arg1, simd128<128>::slli<1>(simd_not(arg1))), arg1);
    1998 }
    1999 
    2000 //The total number of operations is 19.0
    2001 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1)
    2002 {
    2003         bitblock128_t gtMask = simd128<4>::gt(arg1, simd128<4>::constant<0>());
    2004         return simd128<1>::ifh(gtMask, arg1, simd128<4>::sub(gtMask, arg1));
    2005 }
    2006 
    2007 //The total number of operations is 1.0
    2008 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1)
    2009 {
    2010         return _mm_abs_epi8(arg1);
    2011 }
    2012 
    2013 //The total number of operations is 1.0
    2014 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1)
    2015 {
    2016         return _mm_abs_epi16(arg1);
    2017 }
    2018 
    2019 //The total number of operations is 1.0
    2020 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1)
    2021 {
    2022         return _mm_abs_epi32(arg1);
    2023 }
    2024 
    2025 //The total number of operations is 13.0
    2026 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1)
    2027 {
    2028         bitblock128_t eqMask = simd128<64>::eq(simd128<1>::ifh(simd128<64>::himask(), simd128<(32)>::abs(arg1), arg1), arg1);
    2029         return simd128<1>::ifh(eqMask, arg1, simd128<64>::sub(eqMask, arg1));
    2030 }
    2031 
    2032 //The total number of operations is 40.0
    2033 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1)
    2034 {
    2035         bitblock128_t eqMask = simd128<128>::eq(simd128<1>::ifh(simd128<128>::himask(), simd128<(64)>::abs(arg1), arg1), arg1);
    2036         return simd128<1>::ifh(eqMask, arg1, simd128<128>::sub(eqMask, arg1));
     1983//The total number of operations is 1.0
     1984template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2)
     1985{
     1986        return simd_or(arg1, arg2);
     1987}
     1988
     1989//The total number of operations is 15.6666666667
     1990template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2)
     1991{
     1992        return simd128<1>::ifh(simd128<2>::himask(), simd_or(arg1, arg2), simd_or(simd_and(arg2, simd128<128>::srli<1>(simd_or(simd_not(arg1), arg2))), simd_and(arg1, simd128<128>::srli<1>(simd_or(arg1, simd_not(arg2))))));
     1993}
     1994
     1995//The total number of operations is 6.0
     1996template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2)
     1997{
     1998        return simd_or(simd_and(simd128<(8)>::himask(), simd128<(8)>::umax(arg1, arg2)), simd128<(8)>::umax(simd_and(simd128<(8)>::lomask(), arg1), simd_and(simd128<(8)>::lomask(), arg2)));
     1999}
     2000
     2001//The total number of operations is 1.0
     2002template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2)
     2003{
     2004        return _mm_max_epu8(arg1, arg2);
     2005}
     2006
     2007//The total number of operations is 4.0
     2008template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2)
     2009{
     2010        bitblock128_t high_bit = simd128<16>::constant<(32768)>();
     2011        return simd_xor(simd128<16>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     2012}
     2013
     2014//The total number of operations is 7.0
     2015template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2)
     2016{
     2017        bitblock128_t high_bit = simd128<32>::constant<(2147483648ULL)>();
     2018        return simd_xor(simd128<32>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
     2019}
     2020
     2021//The total number of operations is 20.0
     2022template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2)
     2023{
     2024        bitblock128_t tmpAns = simd128<(32)>::umax(arg1, arg2);
     2025        bitblock128_t eqMask1 = simd128<64>::srli<(32)>(simd128<(32)>::eq(tmpAns, arg1));
     2026        bitblock128_t eqMask2 = simd128<64>::srli<(32)>(simd128<(32)>::eq(tmpAns, arg2));
     2027        return simd128<1>::ifh(simd128<64>::himask(), tmpAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, tmpAns, arg1), arg2));
     2028}
     2029
     2030//The total number of operations is 43.6666666667
     2031template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2)
     2032{
     2033        bitblock128_t tmpAns = simd128<(64)>::umax(arg1, arg2);
     2034        bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(tmpAns, arg1));
     2035        bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(tmpAns, arg2));
     2036        return simd128<1>::ifh(simd128<128>::himask(), tmpAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, tmpAns, arg1), arg2));
    20372037}
    20382038
     
    32043204IDISA_ALWAYS_INLINE bool bitblock128::all(bitblock128_t arg1)
    32053205{
    3206         return hsimd128<8>::signmask(simd128<8>::eq(arg1, simd128<8>::constant<-1>())) == 65535;
     3206        return hsimd128<8>::signmask(simd128<8>::eq(arg1, simd128<8>::constant<255>())) == 65535;
    32073207}
    32083208
     
    32133213}
    32143214
     3215//The total number of operations is 2.33333333333
     3216template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::slli(bitblock128_t arg1)
     3217{
     3218        return simd128<128>::slli<sh>(arg1);
     3219}
     3220
    32153221//The total number of operations is 2.0
    32163222IDISA_ALWAYS_INLINE bool bitblock128::any(bitblock128_t arg1)
     
    32263232
    32273233//The total number of operations is 1.0
     3234IDISA_ALWAYS_INLINE void bitblock128::store_aligned(bitblock128_t arg1, bitblock128_t* arg2)
     3235{
     3236        _mm_store_si128((bitblock128_t*)(arg2), arg1);
     3237}
     3238
     3239//The total number of operations is 1.0
    32283240IDISA_ALWAYS_INLINE void bitblock128::store_unaligned(bitblock128_t arg1, bitblock128_t* arg2)
    32293241{
     
    32313243}
    32323244
    3233 //The total number of operations is 2.33333333333
    3234 template <uint16_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::slli(bitblock128_t arg1)
    3235 {
    3236         return simd128<128>::slli<sh>(arg1);
    3237 }
    3238 
    3239 //The total number of operations is 1.0
    3240 IDISA_ALWAYS_INLINE void bitblock128::store_aligned(bitblock128_t arg1, bitblock128_t* arg2)
    3241 {
    3242         _mm_store_si128((bitblock128_t*)(arg2), arg1);
    3243 }
    3244 
    32453245#endif
Note: See TracChangeset for help on using the changeset viewer.