 Timestamp:
 Dec 12, 2011, 12:00:15 PM (8 years ago)
 Location:
 trunk
 Files:

 3 added
 2 deleted
 116 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/idisa.hpp
r1580 r1767 1 2 /* Copyright (c) 2011, Hua Huang and Robert D. Cameron. 3 Licensed under the Academic Free License 3.0. 4 This file is generated by the IDISA+ generator; 5 modifications should be made only by changing the 6 generator configuration and data files. */ 7 1 8 #ifndef IDISA_HPP 2 9 #define IDISA_HPP 
trunk/lib/idisa128.hpp
r1740 r1767 1 2 /* Copyright (c) 2011, Hua Huang and Robert D. Cameron. 3 Licensed under the Academic Free License 3.0. 4 This file is generated by the IDISA+ generator; 5 modifications should be made only by changing the 6 generator configuration and data files. */ 7 1 8 #ifndef IDISA128_HPP 2 9 #define IDISA128_HPP 
trunk/lib/idisa256.hpp
r1548 r1767 1 2 /* Copyright (c) 2011, Hua Huang and Robert D. Cameron. 3 Licensed under the Academic Free License 3.0. 4 This file is generated by the IDISA+ generator; 5 modifications should be made only by changing the 6 generator configuration and data files. */ 7 1 8 #ifndef IDISA256_HPP 2 9 #define IDISA256_HPP 
trunk/lib/idisa_cpp/idisa_avx.cpp
r1661 r1767 1 2 /* Copyright (c) 2011, Hua Huang and Robert D. Cameron. 3 Licensed under the Academic Free License 3.0. 4 This file is generated by the IDISA+ generator; 5 modifications should be made only by changing the 6 generator configuration and data files. */ 7 1 8 #ifndef IDISA_AVX_CPP 2 9 #define IDISA_AVX_CPP … … 101 108 IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2); 102 109 IDISA_ALWAYS_INLINE bitblock256_t simd_or(bitblock256_t arg1, bitblock256_t arg2); 110 IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2); 103 111 IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2); 104 IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2);105 112 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::max(bitblock256_t arg1, bitblock256_t arg2); 106 113 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::max(bitblock256_t arg1, bitblock256_t arg2); … … 537 544 #define avx_general_combine256(x, y) \ 538 545 (_mm256_insertf128_ps(_mm256_castps128_ps256((__m128) y), (__m128) x, 1)) 539 //The total number of operations is 2 546 //The total number of operations is 2.0 540 547 IDISA_ALWAYS_INLINE bitblock256_t simd_nor(bitblock256_t arg1, bitblock256_t arg2) 541 548 { … … 543 550 } 544 551 545 //The total number of operations is 1 552 //The total number of operations is 1.0 546 553 IDISA_ALWAYS_INLINE bitblock256_t simd_not(bitblock256_t arg1) 547 554 { … … 549 556 } 550 557 551 //The total number of operations is 1 558 //The total number of operations is 1.0 552 559 IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2) 553 560 { … … 555 562 } 556 563 557 //The total number of operations is 1 564 //The total number of operations is 1.0 558 565 IDISA_ALWAYS_INLINE bitblock256_t simd_or(bitblock256_t arg1, bitblock256_t arg2) 559 566 { … … 561 568 } 562 569 563 //The total number of operations is 1 570 //The total number of operations is 1.0 571 IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2) 572 { 573 return _mm256_and_ps(arg1, arg2); 574 } 575 576 //The total number of operations is 1.0 564 577 IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2) 565 578 { … … 567 580 } 568 581 569 //The total number of operations is 1 570 IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2) 571 { 572 return _mm256_and_ps(arg1, arg2); 573 } 574 575 //The total number of operations is 1 582 //The total number of operations is 1.0 576 583 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::max(bitblock256_t arg1, bitblock256_t arg2) 577 584 { … … 579 586 } 580 587 581 //The total number of operations is 2 9588 //The total number of operations is 25.0 582 589 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::max(bitblock256_t arg1, bitblock256_t arg2) 583 590 { … … 589 596 } 590 597 591 //The total number of operations is 23598 //The total number of operations is 17.0 592 599 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::max(bitblock256_t arg1, bitblock256_t arg2) 593 600 { … … 596 603 } 597 604 598 //The total number of operations is 8605 //The total number of operations is 5.0 599 606 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::max(bitblock256_t arg1, bitblock256_t arg2) 600 607 { … … 602 609 } 603 610 604 //The total number of operations is 8611 //The total number of operations is 5.0 605 612 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::max(bitblock256_t arg1, bitblock256_t arg2) 606 613 { … … 608 615 } 609 616 610 //The total number of operations is 8617 //The total number of operations is 5.0 611 618 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::max(bitblock256_t arg1, bitblock256_t arg2) 612 619 { … … 614 621 } 615 622 616 //The total number of operations is 11623 //The total number of operations is 8.0 617 624 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::max(bitblock256_t arg1, bitblock256_t arg2) 618 625 { … … 620 627 } 621 628 622 //The total number of operations is 88629 //The total number of operations is 54.6666666667 623 630 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::max(bitblock256_t arg1, bitblock256_t arg2) 624 631 { … … 630 637 } 631 638 632 //The total number of operations is 352639 //The total number of operations is 186.666666667 633 640 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::max(bitblock256_t arg1, bitblock256_t arg2) 634 641 { … … 640 647 } 641 648 642 //The total number of operations is 1 649 //The total number of operations is 1.0 643 650 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::mult(bitblock256_t arg1, bitblock256_t arg2) 644 651 { … … 646 653 } 647 654 648 //The total number of operations is 95655 //The total number of operations is 43.0 649 656 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::mult(bitblock256_t arg1, bitblock256_t arg2) 650 657 { … … 654 661 } 655 662 656 //The total number of operations is 104663 //The total number of operations is 74.0 657 664 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::mult(bitblock256_t arg1, bitblock256_t arg2) 658 665 { … … 663 670 } 664 671 665 //The total number of operations is 39672 //The total number of operations is 27.0 666 673 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::mult(bitblock256_t arg1, bitblock256_t arg2) 667 674 { … … 672 679 } 673 680 674 //The total number of operations is 8681 //The total number of operations is 5.0 675 682 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::mult(bitblock256_t arg1, bitblock256_t arg2) 676 683 { … … 678 685 } 679 686 680 //The total number of operations is 8687 //The total number of operations is 5.0 681 688 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::mult(bitblock256_t arg1, bitblock256_t arg2) 682 689 { … … 684 691 } 685 692 686 //The total number of operations is 66693 //The total number of operations is 43.0 687 694 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::mult(bitblock256_t arg1, bitblock256_t arg2) 688 695 { … … 698 705 } 699 706 700 //The total number of operations is 877707 //The total number of operations is 496.0 701 708 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::mult(bitblock256_t arg1, bitblock256_t arg2) 702 709 { … … 712 719 } 713 720 714 //The total number of operations is 5001721 //The total number of operations is 2711.33333333 715 722 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::mult(bitblock256_t arg1, bitblock256_t arg2) 716 723 { … … 726 733 } 727 734 728 //The total number of operations is 1 735 //The total number of operations is 1.0 729 736 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::gt(bitblock256_t arg1, bitblock256_t arg2) 730 737 { … … 732 739 } 733 740 734 //The total number of operations is 30741 //The total number of operations is 24.0 735 742 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::gt(bitblock256_t arg1, bitblock256_t arg2) 736 743 { … … 742 749 } 743 750 744 //The total number of operations is 2 8751 //The total number of operations is 22.0 745 752 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::gt(bitblock256_t arg1, bitblock256_t arg2) 746 753 { … … 749 756 } 750 757 751 //The total number of operations is 8758 //The total number of operations is 5.0 752 759 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::gt(bitblock256_t arg1, bitblock256_t arg2) 753 760 { … … 755 762 } 756 763 757 //The total number of operations is 8764 //The total number of operations is 5.0 758 765 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::gt(bitblock256_t arg1, bitblock256_t arg2) 759 766 { … … 761 768 } 762 769 763 //The total number of operations is 8770 //The total number of operations is 5.0 764 771 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::gt(bitblock256_t arg1, bitblock256_t arg2) 765 772 { … … 767 774 } 768 775 769 //The total number of operations is 8776 //The total number of operations is 5.0 770 777 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::gt(bitblock256_t arg1, bitblock256_t arg2) 771 778 { … … 773 780 } 774 781 775 //The total number of operations is 151782 //The total number of operations is 65.0 776 783 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::gt(bitblock256_t arg1, bitblock256_t arg2) 777 784 { … … 783 790 } 784 791 785 //The total number of operations is 646792 //The total number of operations is 239.166666667 786 793 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::gt(bitblock256_t arg1, bitblock256_t arg2) 787 794 { … … 793 800 } 794 801 795 //The total number of operations is 978802 //The total number of operations is 696.0 796 803 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umult(bitblock256_t arg1, bitblock256_t arg2) 797 804 { … … 802 809 } 803 810 804 //The total number of operations is 476811 //The total number of operations is 338.0 805 812 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umult(bitblock256_t arg1, bitblock256_t arg2) 806 813 { … … 811 818 } 812 819 813 //The total number of operations is 225820 //The total number of operations is 159.0 814 821 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umult(bitblock256_t arg1, bitblock256_t arg2) 815 822 { … … 820 827 } 821 828 822 //The total number of operations is 101829 //The total number of operations is 71.0 823 830 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umult(bitblock256_t arg1, bitblock256_t arg2) 824 831 { … … 829 836 } 830 837 831 //The total number of operations is 39838 //The total number of operations is 27.0 832 839 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umult(bitblock256_t arg1, bitblock256_t arg2) 833 840 { … … 838 845 } 839 846 840 //The total number of operations is 8847 //The total number of operations is 5.0 841 848 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umult(bitblock256_t arg1, bitblock256_t arg2) 842 849 { … … 844 851 } 845 852 846 //The total number of operations is 237853 //The total number of operations is 136.0 847 854 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umult(bitblock256_t arg1, bitblock256_t arg2) 848 855 { … … 862 869 } 863 870 864 //The total number of operations is 1521871 //The total number of operations is 833.666666667 865 872 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umult(bitblock256_t arg1, bitblock256_t arg2) 866 873 { … … 880 887 } 881 888 882 //The total number of operations is 1 889 //The total number of operations is 1.0 883 890 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ult(bitblock256_t arg1, bitblock256_t arg2) 884 891 { … … 886 893 } 887 894 888 //The total number of operations is 2 9895 //The total number of operations is 23.0 889 896 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ult(bitblock256_t arg1, bitblock256_t arg2) 890 897 { … … 895 902 } 896 903 897 //The total number of operations is 48904 //The total number of operations is 36.0 898 905 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ult(bitblock256_t arg1, bitblock256_t arg2) 899 906 { … … 901 908 } 902 909 903 //The total number of operations is 21910 //The total number of operations is 15.0 904 911 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ult(bitblock256_t arg1, bitblock256_t arg2) 905 912 { … … 908 915 } 909 916 910 //The total number of operations is 21917 //The total number of operations is 15.0 911 918 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ult(bitblock256_t arg1, bitblock256_t arg2) 912 919 { … … 915 922 } 916 923 917 //The total number of operations is 21924 //The total number of operations is 15.0 918 925 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ult(bitblock256_t arg1, bitblock256_t arg2) 919 926 { 920 bitblock256_t high_bit = simd256<32>::constant<(2147483648UL )>();927 bitblock256_t high_bit = simd256<32>::constant<(2147483648ULL)>(); 921 928 return simd256<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 922 929 } 923 930 924 //The total number of operations is 21931 //The total number of operations is 15.0 925 932 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ult(bitblock256_t arg1, bitblock256_t arg2) 926 933 { 927 bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808UL )>();934 bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808ULL)>(); 928 935 return simd256<64>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 929 936 } 930 937 931 //The total number of operations is 154938 //The total number of operations is 68.0 932 939 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ult(bitblock256_t arg1, bitblock256_t arg2) 933 940 { … … 938 945 } 939 946 940 //The total number of operations is 496947 //The total number of operations is 182.166666667 941 948 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ult(bitblock256_t arg1, bitblock256_t arg2) 942 949 { 943 return simd_and(simd256<256>::srai<(255)>(simd_or(simd_and(simd_not(arg1), arg2), simd_and(simd_not(simd_xor(arg1, arg2)), simd256<256>::sub(arg1, arg2)))), simd_not(simd256<256>::eq(arg1, arg2))); 944 } 945 946 //The total number of operations is 1 950 bitblock256_t tmpAns = simd256<(128)>::ult(arg1, arg2); 951 bitblock256_t mask = simd_and(tmpAns, simd256<256>::srli<(128)>(simd256<(128)>::eq(arg1, arg2))); 952 mask = simd_or(mask, simd256<256>::slli<(128)>(mask)); 953 return simd_or(simd256<256>::srai<(128)>(tmpAns), mask); 954 } 955 956 //The total number of operations is 1.0 947 957 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::lt(bitblock256_t arg1, bitblock256_t arg2) 948 958 { … … 950 960 } 951 961 952 //The total number of operations is 30962 //The total number of operations is 24.0 953 963 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lt(bitblock256_t arg1, bitblock256_t arg2) 954 964 { … … 960 970 } 961 971 962 //The total number of operations is 50972 //The total number of operations is 38.0 963 973 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lt(bitblock256_t arg1, bitblock256_t arg2) 964 974 { … … 967 977 } 968 978 969 //The total number of operations is 1 9979 //The total number of operations is 13.0 970 980 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lt(bitblock256_t arg1, bitblock256_t arg2) 971 981 { … … 973 983 } 974 984 975 //The total number of operations is 1 9985 //The total number of operations is 13.0 976 986 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lt(bitblock256_t arg1, bitblock256_t arg2) 977 987 { … … 979 989 } 980 990 981 //The total number of operations is 1 9991 //The total number of operations is 13.0 982 992 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lt(bitblock256_t arg1, bitblock256_t arg2) 983 993 { … … 985 995 } 986 996 987 //The total number of operations is 1 9997 //The total number of operations is 13.0 988 998 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lt(bitblock256_t arg1, bitblock256_t arg2) 989 999 { … … 991 1001 } 992 1002 993 //The total number of operations is 1731003 //The total number of operations is 81.0 994 1004 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lt(bitblock256_t arg1, bitblock256_t arg2) 995 1005 { … … 1001 1011 } 1002 1012 1003 //The total number of operations is 6791013 //The total number of operations is 263.166666667 1004 1014 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lt(bitblock256_t arg1, bitblock256_t arg2) 1005 1015 { … … 1011 1021 } 1012 1022 1013 //The total number of operations is 71023 //The total number of operations is 5.0 1014 1024 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srli(bitblock256_t arg1) 1015 1025 { … … 1017 1027 } 1018 1028 1019 //The total number of operations is 71029 //The total number of operations is 5.0 1020 1030 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srli(bitblock256_t arg1) 1021 1031 { … … 1023 1033 } 1024 1034 1025 //The total number of operations is 71035 //The total number of operations is 5.0 1026 1036 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srli(bitblock256_t arg1) 1027 1037 { … … 1029 1039 } 1030 1040 1031 //The total number of operations is 61041 //The total number of operations is 4.0 1032 1042 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srli(bitblock256_t arg1) 1033 1043 { … … 1035 1045 } 1036 1046 1037 //The total number of operations is 61047 //The total number of operations is 4.0 1038 1048 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srli(bitblock256_t arg1) 1039 1049 { … … 1041 1051 } 1042 1052 1043 //The total number of operations is 61053 //The total number of operations is 4.0 1044 1054 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srli(bitblock256_t arg1) 1045 1055 { … … 1047 1057 } 1048 1058 1049 //The total number of operations is 191059 //The total number of operations is 8.33333333333 1050 1060 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srli(bitblock256_t arg1) 1051 1061 { … … 1053 1063 } 1054 1064 1055 //The total number of operations is 411065 //The total number of operations is 14.5 1056 1066 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srli(bitblock256_t arg1) 1057 1067 { … … 1059 1069 } 1060 1070 1061 //The total number of operations is 1 1071 //The total number of operations is 1.0 1062 1072 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ctz(bitblock256_t arg1) 1063 1073 { … … 1065 1075 } 1066 1076 1067 //The total number of operations is 341077 //The total number of operations is 27.0 1068 1078 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ctz(bitblock256_t arg1) 1069 1079 { … … 1071 1081 } 1072 1082 1073 //The total number of operations is 521083 //The total number of operations is 36.0 1074 1084 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ctz(bitblock256_t arg1) 1075 1085 { … … 1077 1087 } 1078 1088 1079 //The total number of operations is 561089 //The total number of operations is 38.0 1080 1090 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ctz(bitblock256_t arg1) 1081 1091 { … … 1083 1093 } 1084 1094 1085 //The total number of operations is 711095 //The total number of operations is 48.0 1086 1096 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ctz(bitblock256_t arg1) 1087 1097 { … … 1089 1099 } 1090 1100 1091 //The total number of operations is 861101 //The total number of operations is 58.0 1092 1102 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ctz(bitblock256_t arg1) 1093 1103 { … … 1095 1105 } 1096 1106 1097 //The total number of operations is 641107 //The total number of operations is 44.0 1098 1108 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ctz(bitblock256_t arg1) 1099 1109 { … … 1101 1111 } 1102 1112 1103 //The total number of operations is 1 641113 //The total number of operations is 101.0 1104 1114 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ctz(bitblock256_t arg1) 1105 1115 { … … 1107 1117 } 1108 1118 1109 //The total number of operations is 3431119 //The total number of operations is 192.166666667 1110 1120 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ctz(bitblock256_t arg1) 1111 1121 { … … 1113 1123 } 1114 1124 1115 //The total number of operations is 1 1125 //The total number of operations is 1.0 1116 1126 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1117 1127 { … … 1119 1129 } 1120 1130 1121 //The total number of operations is 2 91131 //The total number of operations is 23.0 1122 1132 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1123 1133 { … … 1128 1138 } 1129 1139 1130 //The total number of operations is 2 61140 //The total number of operations is 20.0 1131 1141 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1132 1142 { … … 1134 1144 } 1135 1145 1136 //The total number of operations is 101146 //The total number of operations is 7.0 1137 1147 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1138 1148 { … … 1141 1151 } 1142 1152 1143 //The total number of operations is 101153 //The total number of operations is 7.0 1144 1154 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1145 1155 { … … 1148 1158 } 1149 1159 1150 //The total number of operations is 101160 //The total number of operations is 7.0 1151 1161 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1152 1162 { 1153 bitblock256_t high_bit = simd256<32>::constant<(2147483648UL )>();1163 bitblock256_t high_bit = simd256<32>::constant<(2147483648ULL)>(); 1154 1164 return simd256<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 1155 1165 } 1156 1166 1157 //The total number of operations is 101167 //The total number of operations is 7.0 1158 1168 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1159 1169 { 1160 bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808UL )>();1170 bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808ULL)>(); 1161 1171 return simd256<64>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)); 1162 1172 } 1163 1173 1164 //The total number of operations is 1431174 //The total number of operations is 60.0 1165 1175 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1166 1176 { … … 1171 1181 } 1172 1182 1173 //The total number of operations is 4951183 //The total number of operations is 174.166666667 1174 1184 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ugt(bitblock256_t arg1, bitblock256_t arg2) 1175 1185 { … … 1180 1190 } 1181 1191 1182 //The total number of operations is 91192 //The total number of operations is 7.0 1183 1193 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::xor_hl(bitblock256_t arg1) 1184 1194 { … … 1186 1196 } 1187 1197 1188 //The total number of operations is 91198 //The total number of operations is 7.0 1189 1199 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::xor_hl(bitblock256_t arg1) 1190 1200 { … … 1192 1202 } 1193 1203 1194 //The total number of operations is 91204 //The total number of operations is 7.0 1195 1205 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::xor_hl(bitblock256_t arg1) 1196 1206 { … … 1198 1208 } 1199 1209 1200 //The total number of operations is 81210 //The total number of operations is 6.0 1201 1211 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::xor_hl(bitblock256_t arg1) 1202 1212 { … … 1204 1214 } 1205 1215 1206 //The total number of operations is 81216 //The total number of operations is 6.0 1207 1217 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::xor_hl(bitblock256_t arg1) 1208 1218 { … … 1210 1220 } 1211 1221 1212 //The total number of operations is 81222 //The total number of operations is 6.0 1213 1223 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::xor_hl(bitblock256_t arg1) 1214 1224 { … … 1216 1226 } 1217 1227 1218 //The total number of operations is 211228 //The total number of operations is 10.3333333333 1219 1229 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::xor_hl(bitblock256_t arg1) 1220 1230 { … … 1222 1232 } 1223 1233 1224 //The total number of operations is 431234 //The total number of operations is 16.5 1225 1235 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::xor_hl(bitblock256_t arg1) 1226 1236 { … … 1234 1244 } 1235 1245 1236 //The total number of operations is 1 51246 //The total number of operations is 10.0 1237 1247 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::popcount(bitblock256_t arg1) 1238 1248 { … … 1240 1250 } 1241 1251 1242 //The total number of operations is 311252 //The total number of operations is 21.0 1243 1253 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::popcount(bitblock256_t arg1) 1244 1254 { … … 1246 1256 } 1247 1257 1248 //The total number of operations is 471258 //The total number of operations is 32.0 1249 1259 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::popcount(bitblock256_t arg1) 1250 1260 { … … 1252 1262 } 1253 1263 1254 //The total number of operations is 621264 //The total number of operations is 42.0 1255 1265 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::popcount(bitblock256_t arg1) 1256 1266 { … … 1258 1268 } 1259 1269 1260 //The total number of operations is 771270 //The total number of operations is 52.0 1261 1271 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::popcount(bitblock256_t arg1) 1262 1272 { … … 1264 1274 } 1265 1275 1266 //The total number of operations is 551276 //The total number of operations is 38.0 1267 1277 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::popcount(bitblock256_t arg1) 1268 1278 { … … 1271 1281 } 1272 1282 1273 //The total number of operations is 1191283 //The total number of operations is 73.6666666667 1274 1284 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::popcount(bitblock256_t arg1) 1275 1285 { … … 1277 1287 } 1278 1288 1279 //The total number of operations is 2051289 //The total number of operations is 115.5 1280 1290 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::popcount(bitblock256_t arg1) 1281 1291 { … … 1284 1294 } 1285 1295 1286 //The total number of operations is 1 81296 //The total number of operations is 16.0 1287 1297 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::neg(bitblock256_t arg1) 1288 1298 { … … 1290 1300 } 1291 1301 1292 //The total number of operations is 201302 //The total number of operations is 14.0 1293 1303 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::neg(bitblock256_t arg1) 1294 1304 { … … 1296 1306 } 1297 1307 1298 //The total number of operations is 81308 //The total number of operations is 5.0 1299 1309 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::neg(bitblock256_t arg1) 1300 1310 { … … 1302 1312 } 1303 1313 1304 //The total number of operations is 81314 //The total number of operations is 5.0 1305 1315 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::neg(bitblock256_t arg1) 1306 1316 { … … 1308 1318 } 1309 1319 1310 //The total number of operations is 81320 //The total number of operations is 5.0 1311 1321 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::neg(bitblock256_t arg1) 1312 1322 { … … 1314 1324 } 1315 1325 1316 //The total number of operations is 81326 //The total number of operations is 5.0 1317 1327 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::neg(bitblock256_t arg1) 1318 1328 { … … 1320 1330 } 1321 1331 1322 //The total number of operations is 441332 //The total number of operations is 26.3333333333 1323 1333 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::neg(bitblock256_t arg1) 1324 1334 { … … 1326 1336 } 1327 1337 1328 //The total number of operations is 1371338 //The total number of operations is 75.6666666667 1329 1339 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::neg(bitblock256_t arg1) 1330 1340 { … … 1332 1342 } 1333 1343 1334 //The total number of operations is 71344 //The total number of operations is 5.0 1335 1345 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::slli(bitblock256_t arg1) 1336 1346 { … … 1338 1348 } 1339 1349 1340 //The total number of operations is 71350 //The total number of operations is 5.0 1341 1351 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::slli(bitblock256_t arg1) 1342 1352 { … … 1344 1354 } 1345 1355 1346 //The total number of operations is 71356 //The total number of operations is 5.0 1347 1357 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::slli(bitblock256_t arg1) 1348 1358 { … … 1350 1360 } 1351 1361 1352 //The total number of operations is 61362 //The total number of operations is 4.0 1353 1363 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::slli(bitblock256_t arg1) 1354 1364 { … … 1356 1366 } 1357 1367 1358 //The total number of operations is 61368 //The total number of operations is 4.0 1359 1369 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::slli(bitblock256_t arg1) 1360 1370 { … … 1362 1372 } 1363 1373 1364 //The total number of operations is 61374 //The total number of operations is 4.0 1365 1375 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::slli(bitblock256_t arg1) 1366 1376 { … … 1368 1378 } 1369 1379 1370 //The total number of operations is 191380 //The total number of operations is 8.33333333333 1371 1381 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::slli(bitblock256_t arg1) 1372 1382 { … … 1374 1384 } 1375 1385 1376 //The total number of operations is 401386 //The total number of operations is 14.0 1377 1387 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::slli(bitblock256_t arg1) 1378 1388 { … … 1380 1390 } 1381 1391 1382 //The total number of operations is 3 1392 //The total number of operations is 3.0 1383 1393 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3) 1384 1394 { … … 1386 1396 } 1387 1397 1388 //The total number of operations is 1 31398 //The total number of operations is 11.0 1389 1399 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3) 1390 1400 { … … 1392 1402 } 1393 1403 1394 //The total number of operations is 231404 //The total number of operations is 19.0 1395 1405 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3) 1396 1406 { … … 1398 1408 } 1399 1409 1400 //The total number of operations is 111410 //The total number of operations is 8.0 1401 1411 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3) 1402 1412 { … … 1404 1414 } 1405 1415 1406 //The total number of operations is 111416 //The total number of operations is 8.0 1407 1417 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3) 1408 1418 { … … 1410 1420 } 1411 1421 1412 //The total number of operations is 111422 //The total number of operations is 8.0 1413 1423 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3) 1414 1424 { … … 1416 1426 } 1417 1427 1418 //The total number of operations is 1 1428 //The total number of operations is 1.0 1419 1429 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3) 1420 1430 { … … 1422 1432 } 1423 1433 1424 //The total number of operations is 231434 //The total number of operations is 12.3333333333 1425 1435 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3) 1426 1436 { … … 1428 1438 } 1429 1439 1430 //The total number of operations is 671440 //The total number of operations is 29.8333333333 1431 1441 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3) 1432 1442 { … … 1434 1444 } 1435 1445 1436 //The total number of operations is 1 1446 //The total number of operations is 1.0 1437 1447 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::sub(bitblock256_t arg1, bitblock256_t arg2) 1438 1448 { … … 1440 1450 } 1441 1451 1442 //The total number of operations is 1 81452 //The total number of operations is 16.0 1443 1453 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::sub(bitblock256_t arg1, bitblock256_t arg2) 1444 1454 { … … 1450 1460 } 1451 1461 1452 //The total number of operations is 201462 //The total number of operations is 14.0 1453 1463 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::sub(bitblock256_t arg1, bitblock256_t arg2) 1454 1464 { … … 1456 1466 } 1457 1467 1458 //The total number of operations is 81468 //The total number of operations is 5.0 1459 1469 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::sub(bitblock256_t arg1, bitblock256_t arg2) 1460 1470 { … … 1462 1472 } 1463 1473 1464 //The total number of operations is 81474 //The total number of operations is 5.0 1465 1475 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::sub(bitblock256_t arg1, bitblock256_t arg2) 1466 1476 { … … 1468 1478 } 1469 1479 1470 //The total number of operations is 81480 //The total number of operations is 5.0 1471 1481 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::sub(bitblock256_t arg1, bitblock256_t arg2) 1472 1482 { … … 1474 1484 } 1475 1485 1476 //The total number of operations is 81486 //The total number of operations is 5.0 1477 1487 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::sub(bitblock256_t arg1, bitblock256_t arg2) 1478 1488 { … … 1480 1490 } 1481 1491 1482 //The total number of operations is 441492 //The total number of operations is 26.3333333333 1483 1493 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::sub(bitblock256_t arg1, bitblock256_t arg2) 1484 1494 { 1485 bitblock256_t ans = simd256<(64)>::sub(arg1, arg2); 1486 bitblock256_t borrowMask = simd_or(simd_andc(arg2, arg1), simd_and(simd_not(simd_xor(arg1, arg2)), ans)); 1487 bitblock256_t loMask = simd256<128>::lomask(); 1488 bitblock256_t borrow = simd256<128>::slli<1>(simd_and(borrowMask, loMask)); 1489 return simd256<1>::ifh(loMask, ans, simd256<(64)>::sub(ans, borrow)); 1490 } 1491 1492 //The total number of operations is 137 1495 bitblock256_t partial = simd256<(64)>::sub(arg1, arg2); 1496 bitblock256_t borrowMask = simd_or(simd_andc(arg2, arg1), simd_andc(partial, simd_xor(arg1, arg2))); 1497 bitblock256_t borrow = simd256<128>::slli<(64)>(simd256<(64)>::srli<(63)>(borrowMask)); 1498 return simd256<(64)>::sub(partial, borrow); 1499 } 1500 1501 //The total number of operations is 75.6666666667 1493 1502 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::sub(bitblock256_t arg1, bitblock256_t arg2) 1494 1503 { … … 1500 1509 } 1501 1510 1502 //The total number of operations is 1 51511 //The total number of operations is 10.0 1503 1512 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add_hl(bitblock256_t arg1) 1504 1513 { … … 1506 1515 } 1507 1516 1508 //The total number of operations is 1 61517 //The total number of operations is 11.0 1509 1518 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::add_hl(bitblock256_t arg1) 1510 1519 { … … 1512 1521 } 1513 1522 1514 //The total number of operations is 1 61523 //The total number of operations is 11.0 1515 1524 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::add_hl(bitblock256_t arg1) 1516 1525 { … … 1518 1527 } 1519 1528 1520 //The total number of operations is 1 51529 //The total number of operations is 10.0 1521 1530 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::add_hl(bitblock256_t arg1) 1522 1531 { … … 1524 1533 } 1525 1534 1526 //The total number of operations is 1 51535 //The total number of operations is 10.0 1527 1536 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::add_hl(bitblock256_t arg1) 1528 1537 { … … 1530 1539 } 1531 1540 1532 //The total number of operations is 1 51541 //The total number of operations is 10.0 1533 1542 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::add_hl(bitblock256_t arg1) 1534 1543 { … … 1536 1545 } 1537 1546 1538 //The total number of operations is 641547 //The total number of operations is 35.6666666667 1539 1548 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add_hl(bitblock256_t arg1) 1540 1549 { … … 1542 1551 } 1543 1552 1544 //The total number of operations is 1791553 //The total number of operations is 91.1666666667 1545 1554 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add_hl(bitblock256_t arg1) 1546 1555 { … … 1602 1611 } 1603 1612 1604 //The total number of operations is 1 1613 //The total number of operations is 1.0 1605 1614 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::min(bitblock256_t arg1, bitblock256_t arg2) 1606 1615 { … … 1608 1617 } 1609 1618 1610 //The total number of operations is 2 91619 //The total number of operations is 25.0 1611 1620 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::min(bitblock256_t arg1, bitblock256_t arg2) 1612 1621 { … … 1618 1627 } 1619 1628 1620 //The total number of operations is 231629 //The total number of operations is 17.0 1621 1630 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::min(bitblock256_t arg1, bitblock256_t arg2) 1622 1631 { … … 1625 1634 } 1626 1635 1627 //The total number of operations is 81636 //The total number of operations is 5.0 1628 1637 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::min(bitblock256_t arg1, bitblock256_t arg2) 1629 1638 { … … 1631 1640 } 1632 1641 1633 //The total number of operations is 81642 //The total number of operations is 5.0 1634 1643 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::min(bitblock256_t arg1, bitblock256_t arg2) 1635 1644 { … … 1637 1646 } 1638 1647 1639 //The total number of operations is 81648 //The total number of operations is 5.0 1640 1649 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::min(bitblock256_t arg1, bitblock256_t arg2) 1641 1650 { … … 1643 1652 } 1644 1653 1645 //The total number of operations is 111654 //The total number of operations is 8.0 1646 1655 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::min(bitblock256_t arg1, bitblock256_t arg2) 1647 1656 { … … 1649 1658 } 1650 1659 1651 //The total number of operations is 881660 //The total number of operations is 54.6666666667 1652 1661 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::min(bitblock256_t arg1, bitblock256_t arg2) 1653 1662 { … … 1659 1668 } 1660 1669 1661 //The total number of operations is 3521670 //The total number of operations is 186.666666667 1662 1671 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::min(bitblock256_t arg1, bitblock256_t arg2) 1663 1672 { … … 1717 1726 } 1718 1727 1719 //The total number of operations is 1 1728 //The total number of operations is 1.0 1720 1729 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2) 1721 1730 { … … 1723 1732 } 1724 1733 1725 //The total number of operations is 2 81734 //The total number of operations is 24.0 1726 1735 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umin(bitblock256_t arg1, bitblock256_t arg2) 1727 1736 { … … 1732 1741 } 1733 1742 1734 //The total number of operations is 201743 //The total number of operations is 14.0 1735 1744 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umin(bitblock256_t arg1, bitblock256_t arg2) 1736 1745 { … … 1738 1747 } 1739 1748 1740 //The total number of operations is 81749 //The total number of operations is 5.0 1741 1750 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umin(bitblock256_t arg1, bitblock256_t arg2) 1742 1751 { … … 1744 1753 } 1745 1754 1746 //The total number of operations is 81755 //The total number of operations is 5.0 1747 1756 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umin(bitblock256_t arg1, bitblock256_t arg2) 1748 1757 { … … 1750 1759 } 1751 1760 1752 //The total number of operations is 81761 //The total number of operations is 5.0 1753 1762 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umin(bitblock256_t arg1, bitblock256_t arg2) 1754 1763 { … … 1756 1765 } 1757 1766 1758 //The total number of operations is 1 41767 //The total number of operations is 11.0 1759 1768 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umin(bitblock256_t arg1, bitblock256_t arg2) 1760 1769 { 1761 bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808UL )>();1770 bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808ULL)>(); 1762 1771 return simd_xor(simd256<64>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 1763 1772 } 1764 1773 1765 //The total number of operations is 771774 //The total number of operations is 46.6666666667 1766 1775 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umin(bitblock256_t arg1, bitblock256_t arg2) 1767 1776 { … … 1772 1781 } 1773 1782 1774 //The total number of operations is 2641783 //The total number of operations is 132.0 1775 1784 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umin(bitblock256_t arg1, bitblock256_t arg2) 1776 1785 { … … 1781 1790 } 1782 1791 1783 //The total number of operations is 451792 //The total number of operations is 19.0 1784 1793 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1) 1785 1794 { … … 1787 1796 } 1788 1797 1789 //The total number of operations is 511798 //The total number of operations is 39.0 1790 1799 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1) 1791 1800 { … … 1794 1803 } 1795 1804 1796 //The total number of operations is 61805 //The total number of operations is 4.0 1797 1806 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1) 1798 1807 { … … 1800 1809 } 1801 1810 1802 //The total number of operations is 61811 //The total number of operations is 4.0 1803 1812 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1) 1804 1813 { … … 1806 1815 } 1807 1816 1808 //The total number of operations is 61817 //The total number of operations is 4.0 1809 1818 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1) 1810 1819 { … … 1812 1821 } 1813 1822 1814 //The total number of operations is 1 91823 //The total number of operations is 13.0 1815 1824 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1) 1816 1825 { … … 1819 1828 } 1820 1829 1821 //The total number of operations is 1171830 //The total number of operations is 69.0 1822 1831 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1) 1823 1832 { … … 1826 1835 } 1827 1836 1828 //The total number of operations is 3911837 //The total number of operations is 204.833333333 1829 1838 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1) 1830 1839 { … … 1833 1842 } 1834 1843 1835 //The total number of operations is 2 1844 //The total number of operations is 2.0 1836 1845 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2) 1837 1846 { … … 1839 1848 } 1840 1849 1841 //The total number of operations is 1 81850 //The total number of operations is 14.0 1842 1851 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2) 1843 1852 { … … 1848 1857 } 1849 1858 1850 //The total number of operations is 231859 //The total number of operations is 17.0 1851 1860 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2) 1852 1861 { … … 1854 1863 } 1855 1864 1856 //The total number of operations is 81865 //The total number of operations is 5.0 1857 1866 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2) 1858 1867 { … … 1860 1869 } 1861 1870 1862 //The total number of operations is 81871 //The total number of operations is 5.0 1863 1872 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2) 1864 1873 { … … 1866 1875 } 1867 1876 1868 //The total number of operations is 81877 //The total number of operations is 5.0 1869 1878 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2) 1870 1879 { … … 1872 1881 } 1873 1882 1874 //The total number of operations is 81883 //The total number of operations is 5.0 1875 1884 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2) 1876 1885 { … … 1878 1887 } 1879 1888 1880 //The total number of operations is 481889 //The total number of operations is 23.6666666667 1881 1890 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2) 1882 1891 { … … 1887 1896 } 1888 1897 1889 //The total number of operations is 1311898 //The total number of operations is 54.1666666667 1890 1899 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2) 1891 1900 { … … 1896 1905 } 1897 1906 1898 //The total number of operations is 91907 //The total number of operations is 7.0 1899 1908 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1) 1900 1909 { … … 1902 1911 } 1903 1912 1904 //The total number of operations is 291913 //The total number of operations is 17.5 1905 1914 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1) 1906 1915 { 1907 bitblock256_t tmp = simd256<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1); 1908 return simd_or(tmp, simd256<4>::sub(simd256<4>::constant<0>(), simd_and(simd256<4>::constant<(1<<((4((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))1))>(), tmp))); 1909 } 1910 1911 //The total number of operations is 17 1916 return simd_or(simd_and(simd256<4>::himask(), simd256<(2)>::srai<((sh < (2)) ? sh : (2))>(arg1)), ((sh <= (2)) ? simd256<4>::srli<sh>(arg1) : simd256<(2)>::srai<(sh(2))>(simd256<4>::srli<(2)>(arg1)))); 1917 } 1918 1919 //The total number of operations is 12.0 1912 1920 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1) 1913 1921 { … … 1916 1924 } 1917 1925 1918 //The total number of operations is 61926 //The total number of operations is 4.0 1919 1927 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1) 1920 1928 { … … 1922 1930 } 1923 1931 1924 //The total number of operations is 61932 //The total number of operations is 4.0 1925 1933 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1) 1926 1934 { … … 1928 1936 } 1929 1937 1930 //The total number of operations is 221938 //The total number of operations is 12.0 1931 1939 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1) 1932 1940 { 1933 bitblock256_t tmp = simd256<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1); 1934 return simd_or(tmp, simd256<64>::sub(simd256<64>::constant<0>(), simd_and(simd256<64>::slli<((64((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))1)>(simd256<64>::constant<1>()), tmp))); 1935 } 1936 1937 //The total number of operations is 84 1941 return simd_or(simd_and(simd256<64>::himask(), simd256<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd256<64>::srli<sh>(arg1) : simd256<(32)>::srai<(sh(32))>(simd256<64>::srli<(32)>(arg1)))); 1942 } 1943 1944 //The total number of operations is 28.3333333333 1938 1945 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1) 1939 1946 { 1940 bitblock256_t tmp = simd256<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1); 1941 return simd_or(tmp, simd256<128>::sub(simd256<128>::constant<0>(), simd_and(simd256<128>::slli<((128((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))1)>(simd256<128>::constant<1>()), tmp))); 1942 } 1943 1944 //The total number of operations is 220 1947 return simd_or(simd_and(simd256<128>::himask(), simd256<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd256<128>::srli<sh>(arg1) : simd256<(64)>::srai<(sh(64))>(simd256<128>::srli<(64)>(arg1)))); 1948 } 1949 1950 //The total number of operations is 59.0 1945 1951 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1) 1946 1952 { 1947 bitblock256_t tmp = simd256<256>::srli<((sh >= 256) ? (255) : ((sh < 0) ? 0 : sh))>(arg1); 1948 return simd_or(tmp, simd256<256>::sub(simd256<256>::constant<0>(), simd_and(simd256<256>::slli<((256((sh >= 256) ? (255) : ((sh < 0) ? 0 : sh)))1)>(simd256<256>::constant<1>()), tmp))); 1953 return simd_or(simd_and(simd256<256>::himask(), simd256<(128)>::srai<((sh < (128)) ? sh : (128))>(arg1)), ((sh <= (128)) ? simd256<256>::srli<sh>(arg1) : simd256<(128)>::srai<(sh(128))>(simd256<256>::srli<(128)>(arg1)))); 1949 1954 } 1950 1955 … … 1997 2002 } 1998 2003 1999 //The total number of operations is 1 2004 //The total number of operations is 1.0 2000 2005 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::add(bitblock256_t arg1, bitblock256_t arg2) 2001 2006 { … … 2003 2008 } 2004 2009 2005 //The total number of operations is 1 82010 //The total number of operations is 16.0 2006 2011 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add(bitblock256_t arg1, bitblock256_t arg2) 2007 2012 { … … 2013 2018 } 2014 2019 2015 //The total number of operations is 202020 //The total number of operations is 14.0 2016 2021 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::add(bitblock256_t arg1, bitblock256_t arg2) 2017 2022 { … … 2019 2024 } 2020 2025 2021 //The total number of operations is 82026 //The total number of operations is 5.0 2022 2027 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::add(bitblock256_t arg1, bitblock256_t arg2) 2023 2028 { … … 2025 2030 } 2026 2031 2027 //The total number of operations is 82032 //The total number of operations is 5.0 2028 2033 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::add(bitblock256_t arg1, bitblock256_t arg2) 2029 2034 { … … 2031 2036 } 2032 2037 2033 //The total number of operations is 82038 //The total number of operations is 5.0 2034 2039 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::add(bitblock256_t arg1, bitblock256_t arg2) 2035 2040 { … … 2037 2042 } 2038 2043 2039 //The total number of operations is 82044 //The total number of operations is 5.0 2040 2045 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::add(bitblock256_t arg1, bitblock256_t arg2) 2041 2046 { … … 2043 2048 } 2044 2049 2045 //The total number of operations is 442050 //The total number of operations is 26.3333333333 2046 2051 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add(bitblock256_t arg1, bitblock256_t arg2) 2047 2052 { 2048 bitblock256_t ans = simd256<(64)>::add(arg1, arg2); 2049 bitblock256_t carryMask = simd_or(simd_and(arg1, arg2), simd_and(simd_xor(arg1, arg2), simd_not(ans))); 2050 bitblock256_t loMask = simd256<128>::lomask(); 2051 bitblock256_t carry = simd256<128>::slli<1>(simd_and(carryMask, loMask)); 2052 return simd256<1>::ifh(loMask, ans, simd256<(64)>::add(ans, carry)); 2053 } 2054 2055 //The total number of operations is 137 2053 bitblock256_t partial = simd256<(64)>::add(arg1, arg2); 2054 bitblock256_t carryMask = simd_or(simd_and(arg1, arg2), simd_andc(simd_xor(arg1, arg2), partial)); 2055 bitblock256_t carry = simd256<128>::slli<(64)>(simd256<(64)>::srli<(63)>(carryMask)); 2056 return simd256<(64)>::add(partial, carry); 2057 } 2058 2059 //The total number of operations is 75.6666666667 2056 2060 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add(bitblock256_t arg1, bitblock256_t arg2) 2057 2061 { … … 2063 2067 } 2064 2068 2065 //The total number of operations is 1 2069 //The total number of operations is 1.0 2066 2070 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2) 2067 2071 { … … 2069 2073 } 2070 2074 2071 //The total number of operations is 2 82075 //The total number of operations is 24.0 2072 2076 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2) 2073 2077 { … … 2078 2082 } 2079 2083 2080 //The total number of operations is 202084 //The total number of operations is 14.0 2081 2085 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2) 2082 2086 { … … 2084 2088 } 2085 2089 2086 //The total number of operations is 82090 //The total number of operations is 5.0 2087 2091 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2) 2088 2092 { … … 2090 2094 } 2091 2095 2092 //The total number of operations is 82096 //The total number of operations is 5.0 2093 2097 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2) 2094 2098 { … … 2096 2100 } 2097 2101 2098 //The total number of operations is 82102 //The total number of operations is 5.0 2099 2103 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2) 2100 2104 { … … 2102 2106 } 2103 2107 2104 //The total number of operations is 1 42108 //The total number of operations is 11.0 2105 2109 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2) 2106 2110 { 2107 bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808UL )>();2111 bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808ULL)>(); 2108 2112 return simd_xor(simd256<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit); 2109 2113 } 2110 2114 2111 //The total number of operations is 772115 //The total number of operations is 46.6666666667 2112 2116 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2) 2113 2117 { … … 2118 2122 } 2119 2123 2120 //The total number of operations is 2642124 //The total number of operations is 132.0 2121 2125 template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2) 2122 2126 { … … 2127 2131 } 2128 2132 2129 //The total number of operations is 5612133 //The total number of operations is 233.0 2130 2134 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::umin_hl(bitblock256_t arg1, bitblock256_t arg2) 2131 2135 { … … 2133 2137 } 2134 2138 2135 //The total number of operations is 4122139 //The total number of operations is 186.0 2136 2140 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::umin_hl(bitblock256_t arg1, bitblock256_t arg2) 2137 2141 { … … 2139 2143 } 2140 2144 2141 //The total number of operations is 2282145 //The total number of operations is 106.0 2142 2146 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::umin_hl(bitblock256_t arg1, bitblock256_t arg2) 2143 2147 { … … 2145 2149 } 2146 2150 2147 //The total number of operations is 382151 //The total number of operations is 25.0 2148 2152 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::umin_hl(bitblock256_t arg1, bitblock256_t arg2) 2149 2153 { … … 2151 2155 } 2152 2156 2153 //The total number of operations is 382157 //The total number of operations is 25.0 2154 2158 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::umin_hl(bitblock256_t arg1, bitblock256_t arg2) 2155 2159 { … … 2157 2161 } 2158 2162 2159 //The total number of operations is 4502163 //The total number of operations is 179.0 2160 2164 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::umin_hl(bitblock256_t arg1, bitblock256_t arg2) 2161 2165 { … … 2163 2167 } 2164 2168 2165 //The total number of operations is 4562169 //The total number of operations is 203.666666667 2166 2170 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::umin_hl(bitblock256_t arg1, bitblock256_t arg2) 2167 2171 { … … 2169 2173 } 2170 2174 2171 //The total number of operations is 1642175 //The total number of operations is 81.1666666667 2172 2176 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::umin_hl(bitblock256_t arg1, bitblock256_t arg2) 2173 2177 { … … 2175 2179 } 2176 2180 2177 //The total number of operations is 5612181 //The total number of operations is 233.0 2178 2182 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::add_hl(bitblock256_t arg1, bitblock256_t arg2) 2179 2183 { … … 2181 2185 } 2182 2186 2183 //The total number of operations is 4022187 //The total number of operations is 178.0 2184 2188 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::add_hl(bitblock256_t arg1, bitblock256_t arg2) 2185 2189 { … … 2187 2191 } 2188 2192 2189 //The total number of operations is 2282193 //The total number of operations is 106.0 2190 2194 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::add_hl(bitblock256_t arg1, bitblock256_t arg2) 2191 2195 { … … 2193 2197 } 2194 2198 2195 //The total number of operations is 382199 //The total number of operations is 25.0 2196 2200 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::add_hl(bitblock256_t arg1, bitblock256_t arg2) 2197 2201 { … … 2199 2203 } 2200 2204 2201 //The total number of operations is 82205 //The total number of operations is 5.0 2202 2206 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::add_hl(bitblock256_t arg1, bitblock256_t arg2) 2203 2207 { … … 2205 2209 } 2206 2210 2207 //The total number of operations is 82211 //The total number of operations is 5.0 2208 2212 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::add_hl(bitblock256_t arg1, bitblock256_t arg2) 2209 2213 { … … 2211 2215 } 2212 2216 2213 //The total number of operations is 4502217 //The total number of operations is 197.666666667 2214 2218 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::add_hl(bitblock256_t arg1, bitblock256_t arg2) 2215 2219 { … … 2217 2221 } 2218 2222 2219 //The total number of operations is 1312223 //The total number of operations is 60.8333333333 2220 2224 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::add_hl(bitblock256_t arg1, bitblock256_t arg2) 2221 2225 { … … 2223 2227 } 2224 2228 2225 //The total number of operations is 4142229 //The total number of operations is 226.0 2226 2230 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packss(bitblock256_t arg1, bitblock256_t arg2) 2227 2231 { … … 2231 2235 } 2232 2236 2233 //The total number of operations is 3182237 //The total number of operations is 183.0 2234 2238 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packss(bitblock256_t arg1, bitblock256_t arg2) 2235 2239 { … … 2239 2243 } 2240 2244 2241 //The total number of operations is 1502245 //The total number of operations is 80.0 2242 2246 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packss(bitblock256_t arg1, bitblock256_t arg2) 2243 2247 { … … 2247 2251 } 2248 2252 2249 //The total number of operations is 82253 //The total number of operations is 5.0 2250 2254 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packss(bitblock256_t arg1, bitblock256_t arg2) 2251 2255 { … … 2253 2257 } 2254 2258 2255 //The total number of operations is 82259 //The total number of operations is 5.0 2256 2260 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packss(bitblock256_t arg1, bitblock256_t arg2) 2257 2261 { … … 2259 2263 } 2260 2264 2261 //The total number of operations is 2662265 //The total number of operations is 120.0 2262 2266 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packss(bitblock256_t arg1, bitblock256_t arg2) 2263 2267 { … … 2267 2271 } 2268 2272 2269 //The total number of operations is 7632273 //The total number of operations is 329.333333333 2270 2274 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packss(bitblock256_t arg1, bitblock256_t arg2) 2271 2275 { … … 2275 2279 } 2276 2280 2277 //The total number of operations is 26812281 //The total number of operations is 1001.16666667 2278 2282 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packss(bitblock256_t arg1, bitblock256_t arg2) 2279 2283 { … … 2283 2287 } 2284 2288 2285 //The total number of operations is 42289 //The total number of operations is 3.0 2286 2290 template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<8>::signmask(bitblock256_t arg1) 2287 2291 { … … 2289 2293 } 2290 2294 2291 //The total number of operations is 242295 //The total number of operations is 16.0 2292 2296 template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<16>::signmask(bitblock256_t arg1) 2293 2297 { … … 2295 2299 } 2296 2300 2297 //The total number of operations is 442301 //The total number of operations is 29.0 2298 2302 template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<32>::signmask(bitblock256_t arg1) 2299 2303 { … … 2301 2305 } 2302 2306 2303 //The total number of operations is 2712307 //The total number of operations is 120.0 2304 2308 template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<64>::signmask(bitblock256_t arg1) 2305 2309 { … … 2307 2311 } 2308 2312 2309 //The total number of operations is 5862313 //The total number of operations is 264.666666667 2310 2314 template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<128>::signmask(bitblock256_t arg1) 2311 2315 { … … 2313 2317 } 2314 2318 2315 //The total number of operations is 6302319 //The total number of operations is 282.166666667 2316 2320 template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<256>::signmask(bitblock256_t arg1) 2317 2321 { … … 2319 2323 } 2320 2324 2321 //The total number of operations is 2742325 //The total number of operations is 112.0 2322 2326 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packl(bitblock256_t arg1, bitblock256_t arg2) 2323 2327 { … … 2325 2329 } 2326 2330 2327 //The total number of operations is 1862331 //The total number of operations is 77.0 2328 2332 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packl(bitblock256_t arg1, bitblock256_t arg2) 2329 2333 { … … 2331 2335 } 2332 2336 2333 //The total number of operations is 982337 //The total number of operations is 42.0 2334 2338 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packl(bitblock256_t arg1, bitblock256_t arg2) 2335 2339 { … … 2337 2341 } 2338 2342 2339 //The total number of operations is 102343 //The total number of operations is 7.0 2340 2344 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packl(bitblock256_t arg1, bitblock256_t arg2) 2341 2345 { … … 2343 2347 } 2344 2348 2345 //The total number of operations is 102349 //The total number of operations is 7.0 2346 2350 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packl(bitblock256_t arg1, bitblock256_t arg2) 2347 2351 { … … 2349 2353 } 2350 2354 2351 //The total number of operations is 2152355 //The total number of operations is 83.0 2352 2356 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packl(bitblock256_t arg1, bitblock256_t arg2) 2353 2357 { … … 2355 2359 } 2356 2360 2357 //The total number of operations is 1272361 //The total number of operations is 48.0 2358 2362 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packl(bitblock256_t arg1, bitblock256_t arg2) 2359 2363 { … … 2361 2365 } 2362 2366 2363 //The total number of operations is 432367 //The total number of operations is 17.0 2364 2368 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packl(bitblock256_t arg1, bitblock256_t arg2) 2365 2369 { … … 2367 2371 } 2368 2372 2369 //The total number of operations is 2862373 //The total number of operations is 120.0 2370 2374 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packh(bitblock256_t arg1, bitblock256_t arg2) 2371 2375 { … … 2373 2377 } 2374 2378 2375 //The total number of operations is 1982379 //The total number of operations is 85.0 2376 2380 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packh(bitblock256_t arg1, bitblock256_t arg2) 2377 2381 { … … 2379 2383 } 2380 2384 2381 //The total number of operations is 1102385 //The total number of operations is 50.0 2382 2386 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packh(bitblock256_t arg1, bitblock256_t arg2) 2383 2387 { … … 2385 2389 } 2386 2390 2387 //The total number of operations is 202391 //The total number of operations is 13.0 2388 2392 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packh(bitblock256_t arg1, bitblock256_t arg2) 2389 2393 { … … 2391 2395 } 2392 2396 2393 //The total number of operations is 202397 //The total number of operations is 13.0 2394 2398 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packh(bitblock256_t arg1, bitblock256_t arg2) 2395 2399 { … … 2397 2401 } 2398 2402 2399 //The total number of operations is 2272403 //The total number of operations is 91.0 2400 2404 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packh(bitblock256_t arg1, bitblock256_t arg2) 2401 2405 { … … 2403 2407 } 2404 2408 2405 //The total number of operations is 3152409 //The total number of operations is 144.666666667 2406 2410 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packh(bitblock256_t arg1, bitblock256_t arg2) 2407 2411 { … … 2409 2413 } 2410 2414 2411 //The total number of operations is 442415 //The total number of operations is 17.5 2412 2416 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packh(bitblock256_t arg1, bitblock256_t arg2) 2413 2417 { … … 2415 2419 } 2416 2420 2417 //The total number of operations is 5612421 //The total number of operations is 233.0 2418 2422 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::min_hl(bitblock256_t arg1, bitblock256_t arg2) 2419 2423 { … … 2421 2425 } 2422 2426 2423 //The total number of operations is 4132427 //The total number of operations is 187.0 2424 2428 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::min_hl(bitblock256_t arg1, bitblock256_t arg2) 2425 2429 { … … 2427 2431 } 2428 2432 2429 //The total number of operations is 2312433 //The total number of operations is 109.0 2430 2434 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::min_hl(bitblock256_t arg1, bitblock256_t arg2) 2431 2435 { … … 2433 2437 } 2434 2438 2435 //The total number of operations is 382439 //The total number of operations is 25.0 2436 2440 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::min_hl(bitblock256_t arg1, bitblock256_t arg2) 2437 2441 { … … 2439 2443 } 2440 2444 2441 //The total number of operations is 382445 //The total number of operations is 25.0 2442 2446 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::min_hl(bitblock256_t arg1, bitblock256_t arg2) 2443 2447 { … … 2445 2449 } 2446 2450 2447 //The total number of operations is 4502451 //The total number of operations is 179.0 2448 2452 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::min_hl(bitblock256_t arg1, bitblock256_t arg2) 2449 2453 { … … 2451 2455 } 2452 2456 2453 //The total number of operations is 4532457 //The total number of operations is 200.666666667 2454 2458 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::min_hl(bitblock256_t arg1, bitblock256_t arg2) 2455 2459 { … … 2457 2461 } 2458 2462 2459 //The total number of operations is 1752463 //The total number of operations is 89.1666666667 2460 2464 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::min_hl(bitblock256_t arg1, bitblock256_t arg2) 2461 2465 { … … 2463 2467 } 2464 2468 2465 //The total number of operations is 3442469 //The total number of operations is 170.0 2466 2470 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packus(bitblock256_t arg1, bitblock256_t arg2) 2467 2471 { … … 2473 2477 } 2474 2478 2475 //The total number of operations is 2862479 //The total number of operations is 157.0 2476 2480 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packus(bitblock256_t arg1, bitblock256_t arg2) 2477 2481 { … … 2483 2487 } 2484 2488 2485 //The total number of operations is 1442489 //The total number of operations is 76.0 2486 2490 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packus(bitblock256_t arg1, bitblock256_t arg2) 2487 2491 { … … 2493 2497 } 2494 2498 2495 //The total number of operations is 82499 //The total number of operations is 5.0 2496 2500 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packus(bitblock256_t arg1, bitblock256_t arg2) 2497 2501 { … … 2499 2503 } 2500 2504 2501 //The total number of operations is 82505 //The total number of operations is 5.0 2502 2506 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packus(bitblock256_t arg1, bitblock256_t arg2) 2503 2507 { … … 2505 2509 } 2506 2510 2507 //The total number of operations is 2412511 //The total number of operations is 103.0 2508 2512 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packus(bitblock256_t arg1, bitblock256_t arg2) 2509 2513 { … … 2515 2519 } 2516 2520 2517 //The total number of operations is 2772521 //The total number of operations is 128.0 2518 2522 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packus(bitblock256_t arg1, bitblock256_t arg2) 2519 2523 { … … 2525 2529 } 2526 2530 2527 //The total number of operations is 2622531 //The total number of operations is 112.833333333 2528 2532 template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packus(bitblock256_t arg1, bitblock256_t arg2) 2529 2533 { … … 2532 2536 } 2533 2537 2534 //The total number of operations is 642538 //The total number of operations is 49.0 2535 2539 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::mergel(bitblock256_t arg1, bitblock256_t arg2) 2536 2540 { … … 2538 2542 } 2539 2543 2540 //The total number of operations is 442544 //The total number of operations is 33.0 2541 2545 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::mergel(bitblock256_t arg1, bitblock256_t arg2) 2542 2546 { … … 2544 2548 } 2545 2549 2546 //The total number of operations is 242550 //The total number of operations is 17.0 2547 2551 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::mergel(bitblock256_t arg1, bitblock256_t arg2) 2548 2552 { … … 2550 2554 } 2551 2555 2552 //The total number of operations is 42556 //The total number of operations is 1.0 2553 2557 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::mergel(bitblock256_t arg1, bitblock256_t arg2) 2554 2558 { … … 2558 2562 } 2559 2563 2560 //The total number of operations is 42564 //The total number of operations is 1.0 2561 2565 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::mergel(bitblock256_t arg1, bitblock256_t arg2) 2562 2566 { … … 2566 2570 } 2567 2571 2568 //The total number of operations is 42572 //The total number of operations is 1.0 2569 2573 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::mergel(bitblock256_t arg1, bitblock256_t arg2) 2570 2574 { … … 2574 2578 } 2575 2579 2576 //The total number of operations is 42580 //The total number of operations is 1.0 2577 2581 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::mergel(bitblock256_t arg1, bitblock256_t arg2) 2578 2582 { … … 2582 2586 } 2583 2587 2584 //The total number of operations is 482588 //The total number of operations is 23.6666666667 2585 2589 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::mergel(bitblock256_t arg1, bitblock256_t arg2) 2586 2590 { … … 2588 2592 } 2589 2593 2590 //The total number of operations is 642594 //The total number of operations is 51.0 2591 2595 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::mergeh(bitblock256_t arg1, bitblock256_t arg2) 2592 2596 { … … 2594 2598 } 2595 2599 2596 //The total number of operations is 442600 //The total number of operations is 35.0 2597 2601 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::mergeh(bitblock256_t arg1, bitblock256_t arg2) 2598 2602 { … … 2600 2604 } 2601 2605 2602 //The total number of operations is 242606 //The total number of operations is 19.0 2603 2607 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::mergeh(bitblock256_t arg1, bitblock256_t arg2) 2604 2608 { … … 2606 2610 } 2607 2611 2608 //The total number of operations is 42612 //The total number of operations is 3.0 2609 2613 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::mergeh(bitblock256_t arg1, bitblock256_t arg2) 2610 2614 { … … 2614 2618 } 2615 2619 2616 //The total number of operations is 42620 //The total number of operations is 3.0 2617 2621 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::mergeh(bitblock256_t arg1, bitblock256_t arg2) 2618 2622 { … … 2622 2626 } 2623 2627 2624 //The total number of operations is 42628 //The total number of operations is 3.0 2625 2629 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::mergeh(bitblock256_t arg1, bitblock256_t arg2) 2626 2630 { … … 2630 2634 } 2631 2635 2632 //The total number of operations is 42636 //The total number of operations is 3.0 2633 2637 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::mergeh(bitblock256_t arg1, bitblock256_t arg2) 2634 2638 { … … 2638 2642 } 2639 2643 2640 //The total number of operations is 482644 //The total number of operations is 25.6666666667 2641 2645 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::mergeh(bitblock256_t arg1, bitblock256_t arg2) 2642 2646 { … … 2644 2648 } 2645 2649 2646 //The total number of operations is 522650 //The total number of operations is 41.0 2647 2651 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::zeroextendh(bitblock256_t arg1) 2648 2652 { … … 2650 2654 } 2651 2655 2652 //The total number of operations is 322656 //The total number of operations is 25.0 2653 2657 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::zeroextendh(bitblock256_t arg1) 2654 2658 { … … 2656 2660 } 2657 2661 2658 //The total number of operations is 122662 //The total number of operations is 9.0 2659 2663 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::zeroextendh(bitblock256_t arg1) 2660 2664 { … … 2662 2666 } 2663 2667 2664 //The total number of operations is 112668 //The total number of operations is 8.0 2665 2669 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::zeroextendh(bitblock256_t arg1) 2666 2670 { … … 2668 2672 } 2669 2673 2670 //The total number of operations is 112674 //The total number of operations is 8.0 2671 2675 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::zeroextendh(bitblock256_t arg1) 2672 2676 { … … 2674 2678 } 2675 2679 2676 //The total number of operations is 112680 //The total number of operations is 8.0 2677 2681 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::zeroextendh(bitblock256_t arg1) 2678 2682 { … … 2680 2684 } 2681 2685 2682 //The total number of operations is 682686 //The total number of operations is 35.0 2683 2687 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::zeroextendh(bitblock256_t arg1) 2684 2688 { … … 2686 2690 } 2687 2691 2688 //The total number of operations is 412692 //The total number of operations is 14.5 2689 2693 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::zeroextendh(bitblock256_t arg1) 2690 2694 { … … 2692 2696 } 2693 2697 2694 //The total number of operations is 522698 //The total number of operations is 39.0 2695 2699 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::zeroextendl(bitblock256_t arg1) 2696 2700 { … … 2698 2702 } 2699 2703 2700 //The total number of operations is 322704 //The total number of operations is 23.0 2701 2705 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::zeroextendl(bitblock256_t arg1) 2702 2706 { … … 2704 2708 } 2705 2709 2706 //The total number of operations is 122710 //The total number of operations is 7.0 2707 2711 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::zeroextendl(bitblock256_t arg1) 2708 2712 { … … 2710 2714 } 2711 2715 2712 //The total number of operations is 112716 //The total number of operations is 6.0 2713 2717 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::zeroextendl(bitblock256_t arg1) 2714 2718 { … … 2716 2720 } 2717 2721 2718 //The total number of operations is 112722 //The total number of operations is 6.0 2719 2723 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::zeroextendl(bitblock256_t arg1) 2720 2724 { … … 2722 2726 } 2723 2727 2724 //The total number of operations is 112728 //The total number of operations is 6.0 2725 2729 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::zeroextendl(bitblock256_t arg1) 2726 2730 { … … 2728 2732 } 2729 2733 2730 //The total number of operations is 682734 //The total number of operations is 33.0 2731 2735 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::zeroextendl(bitblock256_t arg1) 2732 2736 { … … 2734 2738 } 2735 2739 2736 //The total number of operations is 1 2740 //The total number of operations is 1.0 2737 2741 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::zeroextendl(bitblock256_t arg1) 2738 2742 { … … 2740 2744 } 2741 2745 2742 //The total number of operations is 692746 //The total number of operations is 54.0 2743 2747 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::signextendh(bitblock256_t arg1) 2744 2748 { … … 2746 2750 } 2747 2751 2748 //The total number of operations is 892752 //The total number of operations is 59.0 2749 2753 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::signextendh(bitblock256_t arg1) 2750 2754 { … … 2752 2756 } 2753 2757 2754 //The total number of operations is 452758 //The total number of operations is 32.0 2755 2759 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::signextendh(bitblock256_t arg1) 2756 2760 { … … 2758 2762 } 2759 2763 2760 //The total number of operations is 222764 //The total number of operations is 15.0 2761 2765 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::signextendh(bitblock256_t arg1) 2762 2766 { … … 2764 2768 } 2765 2769 2766 //The total number of operations is 222770 //The total number of operations is 15.0 2767 2771 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::signextendh(bitblock256_t arg1) 2768 2772 { … … 2770 2774 } 2771 2775 2772 //The total number of operations is 542776 //The total number of operations is 31.0 2773 2777 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::signextendh(bitblock256_t arg1) 2774 2778 { … … 2776 2780 } 2777 2781 2778 //The total number of operations is 2352782 //The total number of operations is 90.6666666667 2779 2783 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::signextendh(bitblock256_t arg1) 2780 2784 { … … 2782 2786 } 2783 2787 2784 //The total number of operations is 2202788 //The total number of operations is 59.0 2785 2789 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::signextendh(bitblock256_t arg1) 2786 2790 { … … 2788 2792 } 2789 2793 2790 //The total number of operations is 692794 //The total number of operations is 52.0 2791 2795 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::signextendl(bitblock256_t arg1) 2792 2796 { … … 2794 2798 } 2795 2799 2796 //The total number of operations is 892800 //The total number of operations is 57.0 2797 2801 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::signextendl(bitblock256_t arg1) 2798 2802 { … … 2800 2804 } 2801 2805 2802 //The total number of operations is 452806 //The total number of operations is 30.0 2803 2807 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::signextendl(bitblock256_t arg1) 2804 2808 { … … 2806 2810 } 2807 2811 2808 //The total number of operations is 222812 //The total number of operations is 13.0 2809 2813 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::signextendl(bitblock256_t arg1) 2810 2814 { … … 2812 2816 } 2813 2817 2814 //The total number of operations is 222818 //The total number of operations is 13.0 2815 2819 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::signextendl(bitblock256_t arg1) 2816 2820 { … … 2818 2822 } 2819 2823 2820 //The total number of operations is 542824 //The total number of operations is 29.0 2821 2825 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::signextendl(bitblock256_t arg1) 2822 2826 { … … 2824 2828 } 2825 2829 2826 //The total number of operations is 2352830 //The total number of operations is 88.6666666667 2827 2831 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::signextendl(bitblock256_t arg1) 2828 2832 { … … 2830 2834 } 2831 2835 2832 //The total number of operations is 2602836 //The total number of operations is 73.0 2833 2837 template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::signextendl(bitblock256_t arg1) 2834 2838 { … … 2836 2840 } 2837 2841 2838 //The total number of operations is 822842 //The total number of operations is 29.5 2839 2843 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 2840 2844 { … … 2842 2846 } 2843 2847 2844 //The total number of operations is 822848 //The total number of operations is 29.5 2845 2849 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 2846 2850 { … … 2848 2852 } 2849 2853 2850 //The total number of operations is 822854 //The total number of operations is 29.5 2851 2855 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 2852 2856 { … … 2854 2858 } 2855 2859 2856 //The total number of operations is 822860 //The total number of operations is 29.5 2857 2861 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 2858 2862 { … … 2860 2864 } 2861 2865 2862 //The total number of operations is 822866 //The total number of operations is 29.5 2863 2867 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 2864 2868 { … … 2866 2870 } 2867 2871 2868 //The total number of operations is 822872 //The total number of operations is 29.5 2869 2873 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 2870 2874 { … … 2872 2876 } 2873 2877 2874 //The total number of operations is 822878 //The total number of operations is 29.5 2875 2879 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 2876 2880 { … … 2878 2882 } 2879 2883 2880 //The total number of operations is 822884 //The total number of operations is 29.5 2881 2885 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dsrli(bitblock256_t arg1, bitblock256_t arg2) 2882 2886 { … … 2884 2888 } 2885 2889 2886 //The total number of operations is 1 2890 //The total number of operations is 1.0 2887 2891 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill(uint64_t val1) 2888 2892 { … … 2890 2894 } 2891 2895 2892 //The total number of operations is 1 2896 //The total number of operations is 1.0 2893 2897 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill(uint64_t val1) 2894 2898 { … … 2896 2900 } 2897 2901 2898 //The total number of operations is 1 2902 //The total number of operations is 1.0 2899 2903 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill(uint64_t val1) 2900 2904 { … … 2902 2906 } 2903 2907 2904 //The total number of operations is 1 2908 //The total number of operations is 1.0 2905 2909 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill(uint64_t val1) 2906 2910 { … … 2908 2912 } 2909 2913 2910 //The total number of operations is 1 2914 //The total number of operations is 1.0 2911 2915 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill(uint64_t val1) 2912 2916 { … … 2914 2918 } 2915 2919 2916 //The total number of operations is 1 2920 //The total number of operations is 1.0 2917 2921 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill(uint64_t val1) 2918 2922 { … … 2920 2924 } 2921 2925 2922 //The total number of operations is 22926 //The total number of operations is 1.5 2923 2927 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<1>::extract(bitblock256_t arg1) 2924 2928 { … … 2926 2930 } 2927 2931 2928 //The total number of operations is 22932 //The total number of operations is 1.5 2929 2933 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<2>::extract(bitblock256_t arg1) 2930 2934 { … … 2932 2936 } 2933 2937 2934 //The total number of operations is 22938 //The total number of operations is 1.5 2935 2939 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<4>::extract(bitblock256_t arg1) 2936 2940 { … … 2938 2942 } 2939 2943 2940 //The total number of operations is 22944 //The total number of operations is 1.5 2941 2945 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<8>::extract(bitblock256_t arg1) 2942 2946 { … … 2944 2948 } 2945 2949 2946 //The total number of operations is 22950 //The total number of operations is 1.5 2947 2951 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<16>::extract(bitblock256_t arg1) 2948 2952 { … … 2950 2954 } 2951 2955 2952 //The total number of operations is 22956 //The total number of operations is 1.5 2953 2957 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<32>::extract(bitblock256_t arg1) 2954 2958 { 2955 return ((pos < 4) ? (((uint64_t)((4294967296UL )1))&_mm_extract_epi32(avx_select_lo128(arg1), (int32_t)(pos))) : (((uint64_t)((4294967296UL)1))&_mm_extract_epi32(avx_select_hi128(arg1), (int32_t)((pos4)))));2956 } 2957 2958 //The total number of operations is 42959 return ((pos < 4) ? (((uint64_t)((4294967296ULL)1))&_mm_extract_epi32(avx_select_lo128(arg1), (int32_t)(pos))) : (((uint64_t)((4294967296ULL)1))&_mm_extract_epi32(avx_select_hi128(arg1), (int32_t)((pos4))))); 2960 } 2961 2962 //The total number of operations is 3.0 2959 2963 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<64>::extract(bitblock256_t arg1) 2960 2964 { … … 2962 2966 } 2963 2967 2964 //The total number of operations is 302968 //The total number of operations is 23.5 2965 2969 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::splat(bitblock256_t arg1) 2966 2970 { … … 2970 2974 } 2971 2975 2972 //The total number of operations is 212976 //The total number of operations is 16.5 2973 2977 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::splat(bitblock256_t arg1) 2974 2978 { … … 2978 2982 } 2979 2983 2980 //The total number of operations is 122984 //The total number of operations is 9.5 2981 2985 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::splat(bitblock256_t arg1) 2982 2986 { … … 2986 2990 } 2987 2991 2988 //The total number of operations is 32992 //The total number of operations is 2.5 2989 2993 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::splat(bitblock256_t arg1) 2990 2994 { … … 2992 2996 } 2993 2997 2994 //The total number of operations is 32998 //The total number of operations is 2.5 2995 2999 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::splat(bitblock256_t arg1) 2996 3000 { … … 2998 3002 } 2999 3003 3000 //The total number of operations is 33004 //The total number of operations is 2.5 3001 3005 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::splat(bitblock256_t arg1) 3002 3006 { … … 3004 3008 } 3005 3009 3006 //The total number of operations is 93010 //The total number of operations is 8.0 3007 3011 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::splat(bitblock256_t arg1) 3008 3012 { … … 3010 3014 } 3011 3015 3012 //The total number of operations is 213016 //The total number of operations is 19.0 3013 3017 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::splat(bitblock256_t arg1) 3014 3018 { … … 3016 3020 } 3017 3021 3018 //The total number of operations is 4 53022 //The total number of operations is 41.0 3019 3023 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::splat(bitblock256_t arg1) 3020 3024 { … … 3022 3026 } 3023 3027 3024 //The total number of operations is 15 3028 //The total number of operations is 15.0 3025 3029 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16) 3026 3030 { … … 3028 3032 } 3029 3033 3030 //The total number of operations is 7 3034 //The total number of operations is 7.0 3031 3035 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16) 3032 3036 { … … 3034 3038 } 3035 3039 3036 //The total number of operations is 3 3040 //The total number of operations is 3.0 3037 3041 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16) 3038 3042 { … … 3040 3044 } 3041 3045 3042 //The total number of operations is 1 3046 //The total number of operations is 1.0 3043 3047 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16) 3044 3048 { … … 3046 3050 } 3047 3051 3048 //The total number of operations is 5 3052 //The total number of operations is 5.0 3049 3053 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16) 3050 3054 { … … 3052 3056 } 3053 3057 3054 //The total number of operations is 5 3058 //The total number of operations is 5.0 3055 3059 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4) 3056 3060 { … … 3058 3062 } 3059 3063 3060 //The total number of operations is 5 3064 //The total number of operations is 5.0 3061 3065 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4) 3062 3066 { … … 3064 3068 } 3065 3069 3066 //The total number of operations is 5 3070 //The total number of operations is 5.0 3067 3071 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4) 3068 3072 { … … 3070 3074 } 3071 3075 3072 //The total number of operations is 5 3076 //The total number of operations is 5.0 3073 3077 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4) 3074 3078 { … … 3076 3080 } 3077 3081 3078 //The total number of operations is 3 3082 //The total number of operations is 3.0 3079 3083 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4) 3080 3084 { … … 3082 3086 } 3083 3087 3084 //The total number of operations is 1 3088 //The total number of operations is 1.0 3085 3089 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4) 3086 3090 { … … 3088 3092 } 3089 3093 3090 //The total number of operations is 413094 //The total number of operations is 14.5 3091 3095 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::srli(bitblock256_t arg1) 3092 3096 { … … 3094 3098 } 3095 3099 3096 //The total number of operations is 413100 //The total number of operations is 14.5 3097 3101 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::srli(bitblock256_t arg1) 3098 3102 { 3099 return mvmd256<(2)>::srli<(sh*2)>(arg1);3100 } 3101 3102 //The total number of operations is 413103 return simd256<256>::srli<(sh*4)>(arg1); 3104 } 3105 3106 //The total number of operations is 14.5 3103 3107 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::srli(bitblock256_t arg1) 3104 3108 { … … 3106 3110 } 3107 3111 3108 //The total number of operations is 413112 //The total number of operations is 14.5 3109 3113 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::srli(bitblock256_t arg1) 3110 3114 { 3111 return mvmd256<(8)>::srli<(sh*2)>(arg1);3112 } 3113 3114 //The total number of operations is 413115 return simd256<256>::srli<(sh*16)>(arg1); 3116 } 3117 3118 //The total number of operations is 14.5 3115 3119 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::srli(bitblock256_t arg1) 3116 3120 { … … 3118 3122 } 3119 3123 3120 //The total number of operations is 413124 //The total number of operations is 14.5 3121 3125 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::srli(bitblock256_t arg1) 3122 3126 { … … 3124 3128 } 3125 3129 3126 //The total number of operations is 413130 //The total number of operations is 14.5 3127 3131 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::srli(bitblock256_t arg1) 3128 3132 { 3129 return mvmd256<(64)>::srli<(sh*2)>(arg1);3130 } 3131 3132 //The total number of operations is 413133 return simd256<256>::srli<(sh*128)>(arg1); 3134 } 3135 3136 //The total number of operations is 14.5 3133 3137 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::srli(bitblock256_t arg1) 3134 3138 { … … 3136 3140 } 3137 3141 3138 //The total number of operations is 1 3142 //The total number of operations is 1.0 3139 3143 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill2(uint64_t val1, uint64_t val2) 3140 3144 { … … 3142 3146 } 3143 3147 3144 //The total number of operations is 1 3148 //The total number of operations is 1.0 3145 3149 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill2(uint64_t val1, uint64_t val2) 3146 3150 { … … 3148 3152 } 3149 3153 3150 //The total number of operations is 1 3154 //The total number of operations is 1.0 3151 3155 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill2(uint64_t val1, uint64_t val2) 3152 3156 { … … 3154 3158 } 3155 3159 3156 //The total number of operations is 1 3160 //The total number of operations is 1.0 3157 3161 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill2(uint64_t val1, uint64_t val2) 3158 3162 { … … 3160 3164 } 3161 3165 3162 //The total number of operations is 1 3166 //The total number of operations is 1.0 3163 3167 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill2(uint64_t val1, uint64_t val2) 3164 3168 { … … 3166 3170 } 3167 3171 3168 //The total number of operations is 5 3172 //The total number of operations is 5.0 3169 3173 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill2(uint64_t val1, uint64_t val2) 3170 3174 { … … 3172 3176 } 3173 3177 3174 //The total number of operations is 823178 //The total number of operations is 29.5 3175 3179 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3176 3180 { … … 3178 3182 } 3179 3183 3180 //The total number of operations is 823184 //The total number of operations is 29.5 3181 3185 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3182 3186 { … … 3184 3188 } 3185 3189 3186 //The total number of operations is 823190 //The total number of operations is 29.5 3187 3191 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3188 3192 { … … 3190 3194 } 3191 3195 3192 //The total number of operations is 823196 //The total number of operations is 29.5 3193 3197 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3194 3198 { … … 3196 3200 } 3197 3201 3198 //The total number of operations is 823202 //The total number of operations is 29.5 3199 3203 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3200 3204 { … … 3202 3206 } 3203 3207 3204 //The total number of operations is 823208 //The total number of operations is 29.5 3205 3209 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3206 3210 { … … 3208 3212 } 3209 3213 3210 //The total number of operations is 823214 //The total number of operations is 29.5 3211 3215 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3212 3216 { … … 3214 3218 } 3215 3219 3216 //The total number of operations is 823220 //The total number of operations is 29.5 3217 3221 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dslli(bitblock256_t arg1, bitblock256_t arg2) 3218 3222 { … … 3220 3224 } 3221 3225 3222 //The total number of operations is 403226 //The total number of operations is 14.0 3223 3227 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::slli(bitblock256_t arg1) 3224 3228 { … … 3226 3230 } 3227 3231 3228 //The total number of operations is 403232 //The total number of operations is 14.0 3229 3233 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::slli(bitblock256_t arg1) 3230 3234 { … … 3232 3236 } 3233 3237 3234 //The total number of operations is 403238 //The total number of operations is 14.0 3235 3239 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::slli(bitblock256_t arg1) 3236 3240 { … … 3238 3242 } 3239 3243 3240 //The total number of operations is 403244 //The total number of operations is 14.0 3241 3245 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::slli(bitblock256_t arg1) 3242 3246 { … … 3244 3248 } 3245 3249 3246 //The total number of operations is 403250 //The total number of operations is 14.0 3247 3251 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::slli(bitblock256_t arg1) 3248 3252 { … … 3250 3254 } 3251 3255 3252 //The total number of operations is 403256 //The total number of operations is 14.0 3253 3257 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::slli(bitblock256_t arg1) 3254 3258 { … … 3256 3260 } 3257 3261 3258 //The total number of operations is 403262 //The total number of operations is 14.0 3259 3263 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::slli(bitblock256_t arg1) 3260 3264 { … … 3262 3266 } 3263 3267 3264 //The total number of operations is 403268 //The total number of operations is 14.0 3265 3269 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::slli(bitblock256_t arg1) 3266 3270 { … … 3268 3272 } 3269 3273 3270 //The total number of operations is 13 3274 //The total number of operations is 13.0 3271 3275 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8) 3272 3276 { … … 3274 3278 } 3275 3279 3276 //The total number of operations is 13 3280 //The total number of operations is 13.0 3277 3281 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8) 3278 3282 { … … 3280 3284 } 3281 3285 3282 //The total number of operations is 7 3286 //The total number of operations is 7.0 3283 3287 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8) 3284 3288 { … … 3286 3290 } 3287 3291 3288 //The total number of operations is 3 3292 //The total number of operations is 3.0 3289 3293 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8) 3290 3294 { … … 3292 3296 } 3293 3297 3294 //The total number of operations is 1 3298 //The total number of operations is 1.0 3295 3299 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8) 3296 3300 { … … 3298 3302 } 3299 3303 3300 //The total number of operations is 5 3304 //The total number of operations is 5.0 3301 3305 template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8) 3302 3306 { … … 3304 3308 } 3305 3309 3306 //The total number of operations is 1 3310 //The total number of operations is 1.0 3307 3311 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_unaligned(float const* arg1) 3308 3312 { … … 3310 3314 } 3311 3315 3312 //The total number of operations is 413316 //The total number of operations is 14.5 3313 3317 template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t bitblock256::srli(bitblock256_t arg1) 3314 3318 { … … 3316 3320 } 3317 3321 3318 //The total number of operations is 1 3322 //The total number of operations is 1.0 3319 3323 IDISA_ALWAYS_INLINE void bitblock256::store_aligned(float* arg1, bitblock256_t arg2) 3320 3324 { … … 3322 3326 } 3323 3327 3324 //The total number of operations is 2093328 //The total number of operations is 118.5 3325 3329 IDISA_ALWAYS_INLINE uint64_t bitblock256::popcount(bitblock256_t arg1) 3326 3330 { … … 3328 3332 } 3329 3333 3330 //The total number of operations is 2 3334 //The total number of operations is 2.0 3331 3335 IDISA_ALWAYS_INLINE bool bitblock256::all(bitblock256_t arg1) 3332 3336 { … … 3334 3338 } 3335 3339 3336 //The total number of operations is 403340 //The total number of operations is 14.0 3337 3341 template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t bitblock256::slli(bitblock256_t arg1) 3338 3342 { … … 3340 3344 } 3341 3345 3342 //The total number of operations is 1 3346 //The total number of operations is 1.0 3343 3347 IDISA_ALWAYS_INLINE bool bitblock256::any(bitblock256_t arg1) 3344 3348 { … … 3346 3350 } 3347 3351 3348 //The total number of operations is 1 3352 //The total number of operations is 1.0 3349 3353 IDISA_ALWAYS_INLINE bitblock256_t bitblock256::load_aligned(float const* arg1) 3350 3354 { … … 3352 3356 } 3353 3357 3354 //The total number of operations is 1 3358 //The total number of operations is 1.0 3355 3359 IDISA_ALWAYS_INLINE void bitblock256::store_unaligned(float* arg1, bitblock256_t arg2) 3356 3360 { 
trunk/lib/idisa_cpp/idisa_neon.cpp
r1740 r1767 1 2 /* Copyright (c) 2011, Hua Huang and Robert D. Cameron. 3 Licensed under the Academic Free License 3.0. 4 This file is generated by the IDISA+ generator; 5 modifications should be made only by changing the 6 generator configuration and data files. */ 7 1 8 #ifndef IDISA_NEON_CPP 2 9 #define IDISA_NEON_CPP … … 468 475 #define neon_shift_left_64_bits(x) \ 469 476 vextq_u64(vdupq_n_u64(0), (bitblock128_t)(x), 1) 470 //The total number of operations is 2 477 //The total number of operations is 2.0 471 478 IDISA_ALWAYS_INLINE bitblock128_t simd_nor(bitblock128_t arg1, bitblock128_t arg2) 472 479 { … … 474 481 } 475 482 476 //The total number of operations is 1 483 //The total number of operations is 1.0 477 484 IDISA_ALWAYS_INLINE bitblock128_t simd_not(bitblock128_t arg1) 478 485 { … … 480 487 } 481 488 482 //The total number of operations is 1 489 //The total number of operations is 1.0 483 490 IDISA_ALWAYS_INLINE bitblock128_t simd_andc(bitblock128_t arg1, bitblock128_t arg2) 484 491 { … … 486 493 } 487 494 488 //The total number of operations is 1 495 //The total number of operations is 1.0 489 496 IDISA_ALWAYS_INLINE bitblock128_t simd_or(bitblock128_t arg1, bitblock128_t arg2) 490 497 { … … 492 499 } 493 500 494 //The total number of operations is 1 501 //The total number of operations is 1.0 495 502 IDISA_ALWAYS_INLINE bitblock128_t simd_and(bitblock128_t arg1, bitblock128_t arg2) 496 503 { … … 498 505 } 499 506 500 //The total number of operations is 1 507 //The total number of operations is 1.0 501 508 IDISA_ALWAYS_INLINE bitblock128_t simd_xor(bitblock128_t arg1, bitblock128_t arg2) 502 509 { … … 504 511 } 505 512 506 //The total number of operations is 1 513 //The total number of operations is 1.0 507 514 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::max(bitblock128_t arg1, bitblock128_t arg2) 508 515 { … … 510 517 } 511 518 512 //The total number of operations is 13 519 //The total number of operations is 13.0 513 520 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::max(bitblock128_t arg1, bitblock128_t arg2) 514 521 { … … 520 527 } 521 528 522 //The total number of operations is 7529 //The total number of operations is 6.0 523 530 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::max(bitblock128_t arg1, bitblock128_t arg2) 524 531 { … … 526 533 } 527 534 528 //The total number of operations is 1 535 //The total number of operations is 1.0 529 536 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::max(bitblock128_t arg1, bitblock128_t arg2) 530 537 { … … 532 539 } 533 540 534 //The total number of operations is 1 541 //The total number of operations is 1.0 535 542 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::max(bitblock128_t arg1, bitblock128_t arg2) 536 543 { … … 538 545 } 539 546 540 //The total number of operations is 1 547 //The total number of operations is 1.0 541 548 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::max(bitblock128_t arg1, bitblock128_t arg2) 542 549 { … … 544 551 } 545 552 546 //The total number of operations is 1 2553 //The total number of operations is 11.5 547 554 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::max(bitblock128_t arg1, bitblock128_t arg2) 548 555 { 549 bitblock128_t hiAns = simd128<(32)>::max(arg1, arg2); 550 bitblock128_t loAns = simd128<(32)>::umax(arg1, arg2); 551 bitblock128_t eqMask1 = simd128<64>::srli<(32)>(simd128<(32)>::eq(hiAns, arg1)); 552 bitblock128_t eqMask2 = simd128<64>::srli<(32)>(simd128<(32)>::eq(hiAns, arg2)); 553 return simd128<1>::ifh(simd128<64>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2)); 554 } 555 556 //The total number of operations is 46 556 return simd128<1>::ifh(simd128<64>::gt(arg1, arg2), arg1, arg2); 557 } 558 559 //The total number of operations is 40.6666666667 557 560 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::max(bitblock128_t arg1, bitblock128_t arg2) 558 561 { 559 bitblock128_t hiAns = simd128<(64)>::max(arg1, arg2); 560 bitblock128_t loAns = simd128<(64)>::umax(arg1, arg2); 561 bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1)); 562 bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2)); 563 return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2)); 564 } 565 566 //The total number of operations is 1 562 return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg1, arg2); 563 } 564 565 //The total number of operations is 1.0 567 566 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::mult(bitblock128_t arg1, bitblock128_t arg2) 568 567 { … … 570 569 } 571 570 572 //The total number of operations is 23571 //The total number of operations is 17.6666666667 573 572 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::mult(bitblock128_t arg1, bitblock128_t arg2) 574 573 { … … 578 577 } 579 578 580 //The total number of operations is 8579 //The total number of operations is 7.5 581 580 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::mult(bitblock128_t arg1, bitblock128_t arg2) 582 581 { … … 587 586 } 588 587 589 //The total number of operations is 1 588 //The total number of operations is 1.0 590 589 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::mult(bitblock128_t arg1, bitblock128_t arg2) 591 590 { … … 593 592 } 594 593 595 //The total number of operations is 1 594 //The total number of operations is 1.0 596 595 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::mult(bitblock128_t arg1, bitblock128_t arg2) 597 596 { … … 599 598 } 600 599 601 //The total number of operations is 1 600 //The total number of operations is 1.0 602 601 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::mult(bitblock128_t arg1, bitblock128_t arg2) 603 602 { … … 605 604 } 606 605 607 //The total number of operations is 1 606 //The total number of operations is 1.0 608 607 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::gt(bitblock128_t arg1, bitblock128_t arg2) 609 608 { … … 611 610 } 612 611 613 //The total number of operations is 1 5612 //The total number of operations is 13.5 614 613 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::gt(bitblock128_t arg1, bitblock128_t arg2) 615 614 { 616 bitblock128_t hiAns = simd128<(1)>::gt(arg1, arg2); 617 bitblock128_t loAns = simd128<(1)>::ugt(arg1, arg2); 618 bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2))); 619 mask = simd_or(mask, simd128<2>::slli<(1)>(mask)); 620 return simd_or(simd128<2>::srai<(1)>(hiAns), mask); 621 } 622 623 //The total number of operations is 6 615 bitblock128_t tmp = simd_not(arg1); 616 bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(arg1, simd_not(arg2))), simd_or(tmp, arg2))); 617 return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns)); 618 } 619 620 //The total number of operations is 5.0 624 621 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::gt(bitblock128_t arg1, bitblock128_t arg2) 625 622 { … … 627 624 } 628 625 629 //The total number of operations is 1 626 //The total number of operations is 1.0 630 627 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::gt(bitblock128_t arg1, bitblock128_t arg2) 631 628 { … … 633 630 } 634 631 635 //The total number of operations is 1 632 //The total number of operations is 1.0 636 633 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::gt(bitblock128_t arg1, bitblock128_t arg2) 637 634 { … … 639 636 } 640 637 641 //The total number of operations is 1 638 //The total number of operations is 1.0 642 639 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::gt(bitblock128_t arg1, bitblock128_t arg2) 643 640 { … … 645 642 } 646 643 647 //The total number of operations is 1 1644 //The total number of operations is 10.5 648 645 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::gt(bitblock128_t arg1, bitblock128_t arg2) 649 646 { … … 655 652 } 656 653 657 //The total number of operations is 48654 //The total number of operations is 39.6666666667 658 655 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::gt(bitblock128_t arg1, bitblock128_t arg2) 659 656 { … … 665 662 } 666 663 667 //The total number of operations is 1 664 //The total number of operations is 1.0 668 665 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ult(bitblock128_t arg1, bitblock128_t arg2) 669 666 { … … 671 668 } 672 669 673 //The total number of operations is 1 4670 //The total number of operations is 12.5 674 671 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ult(bitblock128_t arg1, bitblock128_t arg2) 675 672 { 676 bitblock128_t tmpAns = simd128<(1)>::ult(arg1, arg2); 677 bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2))); 678 mask = simd_or(mask, simd128<2>::slli<(1)>(mask)); 679 return simd_or(simd128<2>::srai<(1)>(tmpAns), mask); 680 } 681 682 //The total number of operations is 8 673 bitblock128_t tmp = simd_not(arg1); 674 bitblock128_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd128<128>::slli<1>(simd_and(tmp, arg2)), simd_or(tmp, arg2))); 675 return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns)); 676 } 677 678 //The total number of operations is 7.0 683 679 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::ult(bitblock128_t arg1, bitblock128_t arg2) 684 680 { … … 687 683 } 688 684 689 //The total number of operations is 3 685 //The total number of operations is 3.0 690 686 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::ult(bitblock128_t arg1, bitblock128_t arg2) 691 687 { … … 694 690 } 695 691 696 //The total number of operations is 3 692 //The total number of operations is 3.0 697 693 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::ult(bitblock128_t arg1, bitblock128_t arg2) 698 694 { … … 701 697 } 702 698 703 //The total number of operations is 3 699 //The total number of operations is 3.0 704 700 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ult(bitblock128_t arg1, bitblock128_t arg2) 705 701 { … … 708 704 } 709 705 710 //The total number of operations is 10706 //The total number of operations is 9.5 711 707 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ult(bitblock128_t arg1, bitblock128_t arg2) 712 708 { … … 717 713 } 718 714 719 //The total number of operations is 37715 //The total number of operations is 29.1666666667 720 716 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ult(bitblock128_t arg1, bitblock128_t arg2) 721 717 { … … 726 722 } 727 723 728 //The total number of operations is 1 724 //The total number of operations is 1.0 729 725 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::lt(bitblock128_t arg1, bitblock128_t arg2) 730 726 { … … 732 728 } 733 729 734 //The total number of operations is 1 5730 //The total number of operations is 13.5 735 731 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::lt(bitblock128_t arg1, bitblock128_t arg2) 736 732 { 737 bitblock128_t hiAns = simd128<(1)>::lt(arg1, arg2); 738 bitblock128_t loAns = simd128<(1)>::ult(arg1, arg2); 739 bitblock128_t mask = simd_and(loAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2))); 740 mask = simd_or(mask, simd128<2>::slli<(1)>(mask)); 741 return simd_or(simd128<2>::srai<(1)>(hiAns), mask); 742 } 743 744 //The total number of operations is 6 733 bitblock128_t tmp = simd_not(arg2); 734 bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(simd_not(arg1), arg2)), simd_or(arg1, tmp))); 735 return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns)); 736 } 737 738 //The total number of operations is 5.0 745 739 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::lt(bitblock128_t arg1, bitblock128_t arg2) 746 740 { … … 748 742 } 749 743 750 //The total number of operations is 1 744 //The total number of operations is 1.0 751 745 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::lt(bitblock128_t arg1, bitblock128_t arg2) 752 746 { … … 754 748 } 755 749 756 //The total number of operations is 1 750 //The total number of operations is 1.0 757 751 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::lt(bitblock128_t arg1, bitblock128_t arg2) 758 752 { … … 760 754 } 761 755 762 //The total number of operations is 1 756 //The total number of operations is 1.0 763 757 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::lt(bitblock128_t arg1, bitblock128_t arg2) 764 758 { … … 766 760 } 767 761 768 //The total number of operations is 1 1762 //The total number of operations is 10.5 769 763 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2) 770 764 { … … 776 770 } 777 771 778 //The total number of operations is 48772 //The total number of operations is 39.6666666667 779 773 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2) 780 774 { … … 786 780 } 787 781 788 //The total number of operations is 2 782 //The total number of operations is 2.0 789 783 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srli(bitblock128_t arg1) 790 784 { … … 792 786 } 793 787 794 //The total number of operations is 2 788 //The total number of operations is 2.0 795 789 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srli(bitblock128_t arg1) 796 790 { … … 798 792 } 799 793 800 //The total number of operations is 1 794 //The total number of operations is 1.0 801 795 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srli(bitblock128_t arg1) 802 796 { … … 804 798 } 805 799 806 //The total number of operations is 1 800 //The total number of operations is 1.0 807 801 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srli(bitblock128_t arg1) 808 802 { … … 810 804 } 811 805 812 //The total number of operations is 1 806 //The total number of operations is 1.0 813 807 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srli(bitblock128_t arg1) 814 808 { … … 816 810 } 817 811 818 //The total number of operations is 1 812 //The total number of operations is 1.0 819 813 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srli(bitblock128_t arg1) 820 814 { … … 822 816 } 823 817 824 //The total number of operations is 5818 //The total number of operations is 3.16666666667 825 819 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1) 826 820 { … … 828 822 } 829 823 830 //The total number of operations is 1 824 //The total number of operations is 1.0 831 825 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ctz(bitblock128_t arg1) 832 826 { … … 834 828 } 835 829 836 //The total number of operations is 14830 //The total number of operations is 9.5 837 831 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ctz(bitblock128_t arg1) 838 832 { … … 841 835 } 842 836 843 //The total number of operations is 12 837 //The total number of operations is 12.0 844 838 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::ctz(bitblock128_t arg1) 845 839 { … … 847 841 } 848 842 849 //The total number of operations is 3 843 //The total number of operations is 3.0 850 844 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::ctz(bitblock128_t arg1) 851 845 { … … 853 847 } 854 848 855 //The total number of operations is 6 849 //The total number of operations is 6.0 856 850 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::ctz(bitblock128_t arg1) 857 851 { … … 859 853 } 860 854 861 //The total number of operations is 9 855 //The total number of operations is 9.0 862 856 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ctz(bitblock128_t arg1) 863 857 { … … 865 859 } 866 860 867 //The total number of operations is 12 861 //The total number of operations is 12.0 868 862 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ctz(bitblock128_t arg1) 869 863 { … … 871 865 } 872 866 873 //The total number of operations is 30867 //The total number of operations is 25.5 874 868 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ctz(bitblock128_t arg1) 875 869 { … … 877 871 } 878 872 879 //The total number of operations is 1 873 //The total number of operations is 1.0 880 874 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ugt(bitblock128_t arg1, bitblock128_t arg2) 881 875 { … … 883 877 } 884 878 885 //The total number of operations is 1 4879 //The total number of operations is 12.5 886 880 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ugt(bitblock128_t arg1, bitblock128_t arg2) 887 881 { 888 bitblock128_t tmpAns = simd128<(1)>::ugt(arg1, arg2); 889 bitblock128_t mask = simd_and(tmpAns, simd128<2>::srli<(1)>(simd128<(1)>::eq(arg1, arg2))); 890 mask = simd_or(mask, simd128<2>::slli<(1)>(mask)); 891 return simd_or(simd128<2>::srai<(1)>(tmpAns), mask); 892 } 893 894 //The total number of operations is 8 882 bitblock128_t tmp = simd_not(arg2); 883 bitblock128_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd128<128>::slli<1>(simd_and(arg1, tmp)), simd_or(arg1, tmp))); 884 return simd128<1>::ifh(simd128<2>::himask(), tmpAns, simd128<128>::srli<1>(tmpAns)); 885 } 886 887 //The total number of operations is 7.0 895 888 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::ugt(bitblock128_t arg1, bitblock128_t arg2) 896 889 { … … 899 892 } 900 893 901 //The total number of operations is 3 894 //The total number of operations is 3.0 902 895 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::ugt(bitblock128_t arg1, bitblock128_t arg2) 903 896 { … … 906 899 } 907 900 908 //The total number of operations is 3 901 //The total number of operations is 3.0 909 902 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::ugt(bitblock128_t arg1, bitblock128_t arg2) 910 903 { … … 913 906 } 914 907 915 //The total number of operations is 3 908 //The total number of operations is 3.0 916 909 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ugt(bitblock128_t arg1, bitblock128_t arg2) 917 910 { … … 920 913 } 921 914 922 //The total number of operations is 10915 //The total number of operations is 9.5 923 916 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ugt(bitblock128_t arg1, bitblock128_t arg2) 924 917 { … … 929 922 } 930 923 931 //The total number of operations is 37924 //The total number of operations is 29.1666666667 932 925 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ugt(bitblock128_t arg1, bitblock128_t arg2) 933 926 { … … 938 931 } 939 932 940 //The total number of operations is 4 933 //The total number of operations is 4.0 941 934 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::xor_hl(bitblock128_t arg1) 942 935 { … … 944 937 } 945 938 946 //The total number of operations is 4 939 //The total number of operations is 4.0 947 940 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::xor_hl(bitblock128_t arg1) 948 941 { … … 950 943 } 951 944 952 //The total number of operations is 3 945 //The total number of operations is 3.0 953 946 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::xor_hl(bitblock128_t arg1) 954 947 { … … 956 949 } 957 950 958 //The total number of operations is 3 951 //The total number of operations is 3.0 959 952 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::xor_hl(bitblock128_t arg1) 960 953 { … … 962 955 } 963 956 964 //The total number of operations is 3 957 //The total number of operations is 3.0 965 958 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::xor_hl(bitblock128_t arg1) 966 959 { … … 968 961 } 969 962 970 //The total number of operations is 3 963 //The total number of operations is 3.0 971 964 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::xor_hl(bitblock128_t arg1) 972 965 { … … 974 967 } 975 968 976 //The total number of operations is 7969 //The total number of operations is 5.16666666667 977 970 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::xor_hl(bitblock128_t arg1) 978 971 { … … 986 979 } 987 980 988 //The total number of operations is 3 981 //The total number of operations is 3.0 989 982 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::popcount(bitblock128_t arg1) 990 983 { … … 992 985 } 993 986 994 //The total number of operations is 7 987 //The total number of operations is 7.0 995 988 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::popcount(bitblock128_t arg1) 996 989 { … … 998 991 } 999 992 1000 //The total number of operations is 1 993 //The total number of operations is 1.0 1001 994 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::popcount(bitblock128_t arg1) 1002 995 { … … 1004 997 } 1005 998 1006 //The total number of operations is 4 999 //The total number of operations is 4.0 1007 1000 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::popcount(bitblock128_t arg1) 1008 1001 { … … 1010 1003 } 1011 1004 1012 //The total number of operations is 7 1005 //The total number of operations is 7.0 1013 1006 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::popcount(bitblock128_t arg1) 1014 1007 { … … 1016 1009 } 1017 1010 1018 //The total number of operations is 10 1011 //The total number of operations is 10.0 1019 1012 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::popcount(bitblock128_t arg1) 1020 1013 { … … 1022 1015 } 1023 1016 1024 //The total number of operations is 1 71017 //The total number of operations is 15.1666666667 1025 1018 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::popcount(bitblock128_t arg1) 1026 1019 { … … 1029 1022 } 1030 1023 1031 //The total number of operations is 71024 //The total number of operations is 4.33333333333 1032 1025 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::neg(bitblock128_t arg1) 1033 1026 { … … 1035 1028 } 1036 1029 1037 //The total number of operations is 4 1030 //The total number of operations is 4.0 1038 1031 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::neg(bitblock128_t arg1) 1039 1032 { … … 1041 1034 } 1042 1035 1043 //The total number of operations is 1 1036 //The total number of operations is 1.0 1044 1037 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::neg(bitblock128_t arg1) 1045 1038 { … … 1047 1040 } 1048 1041 1049 //The total number of operations is 1 1042 //The total number of operations is 1.0 1050 1043 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::neg(bitblock128_t arg1) 1051 1044 { … … 1053 1046 } 1054 1047 1055 //The total number of operations is 1 1048 //The total number of operations is 1.0 1056 1049 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::neg(bitblock128_t arg1) 1057 1050 { … … 1059 1052 } 1060 1053 1061 //The total number of operations is 1 1054 //The total number of operations is 1.0 1062 1055 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::neg(bitblock128_t arg1) 1063 1056 { … … 1065 1058 } 1066 1059 1067 //The total number of operations is 121060 //The total number of operations is 9.33333333333 1068 1061 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::neg(bitblock128_t arg1) 1069 1062 { … … 1071 1064 } 1072 1065 1073 //The total number of operations is 21066 //The total number of operations is 1.5 1074 1067 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::slli(bitblock128_t arg1) 1075 1068 { … … 1077 1070 } 1078 1071 1079 //The total number of operations is 21072 //The total number of operations is 1.5 1080 1073 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::slli(bitblock128_t arg1) 1081 1074 { … … 1083 1076 } 1084 1077 1085 //The total number of operations is 11078 //The total number of operations is 0.5 1086 1079 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::slli(bitblock128_t arg1) 1087 1080 { … … 1089 1082 } 1090 1083 1091 //The total number of operations is 11084 //The total number of operations is 0.5 1092 1085 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::slli(bitblock128_t arg1) 1093 1086 { … … 1095 1088 } 1096 1089 1097 //The total number of operations is 11090 //The total number of operations is 0.5 1098 1091 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::slli(bitblock128_t arg1) 1099 1092 { … … 1101 1094 } 1102 1095 1103 //The total number of operations is 11096 //The total number of operations is 0.5 1104 1097 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::slli(bitblock128_t arg1) 1105 1098 { … … 1107 1100 } 1108 1101 1109 //The total number of operations is 51102 //The total number of operations is 2.33333333333 1110 1103 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1) 1111 1104 { … … 1113 1106 } 1114 1107 1115 //The total number of operations is 1 1108 //The total number of operations is 1.0 1116 1109 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3) 1117 1110 { … … 1119 1112 } 1120 1113 1121 //The total number of operations is 4 1114 //The total number of operations is 4.0 1122 1115 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3) 1123 1116 { … … 1125 1118 } 1126 1119 1127 //The total number of operations is 71120 //The total number of operations is 6.0 1128 1121 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3) 1129 1122 { … … 1131 1124 } 1132 1125 1133 //The total number of operations is 2 1126 //The total number of operations is 2.0 1134 1127 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3) 1135 1128 { … … 1137 1130 } 1138 1131 1139 //The total number of operations is 2 1132 //The total number of operations is 2.0 1140 1133 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3) 1141 1134 { … … 1143 1136 } 1144 1137 1145 //The total number of operations is 2 1138 //The total number of operations is 2.0 1146 1139 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3) 1147 1140 { … … 1149 1142 } 1150 1143 1151 //The total number of operations is 4 1144 //The total number of operations is 4.0 1152 1145 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3) 1153 1146 { … … 1155 1148 } 1156 1149 1157 //The total number of operations is 101150 //The total number of operations is 8.16666666667 1158 1151 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3) 1159 1152 { … … 1161 1154 } 1162 1155 1163 //The total number of operations is 1 1156 //The total number of operations is 1.0 1164 1157 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::sub(bitblock128_t arg1, bitblock128_t arg2) 1165 1158 { … … 1167 1160 } 1168 1161 1169 //The total number of operations is 101162 //The total number of operations is 7.33333333333 1170 1163 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::sub(bitblock128_t arg1, bitblock128_t arg2) 1171 1164 { 1172 return simd128<1>::ifh(simd128<(4)>::himask(), simd128<(4)>::sub(arg1, simd_and(simd128<(4)>::himask(), arg2)), simd128<(4)>::sub(arg1, arg2)); 1173 } 1174 1175 //The total number of operations is 4 1165 bitblock128_t tmp = simd_xor(arg1, arg2); 1166 return simd128<1>::ifh(simd128<2>::himask(), simd_xor(tmp, simd128<128>::slli<1>(simd_and(simd_not(arg1), arg2))), tmp); 1167 } 1168 1169 //The total number of operations is 4.0 1176 1170 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::sub(bitblock128_t arg1, bitblock128_t arg2) 1177 1171 { … … 1179 1173 } 1180 1174 1181 //The total number of operations is 1 1175 //The total number of operations is 1.0 1182 1176 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::sub(bitblock128_t arg1, bitblock128_t arg2) 1183 1177 { … … 1185 1179 } 1186 1180 1187 //The total number of operations is 1 1181 //The total number of operations is 1.0 1188 1182 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::sub(bitblock128_t arg1, bitblock128_t arg2) 1189 1183 { … … 1191 1185 } 1192 1186 1193 //The total number of operations is 1 1187 //The total number of operations is 1.0 1194 1188 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::sub(bitblock128_t arg1, bitblock128_t arg2) 1195 1189 { … … 1197 1191 } 1198 1192 1199 //The total number of operations is 1 1193 //The total number of operations is 1.0 1200 1194 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::sub(bitblock128_t arg1, bitblock128_t arg2) 1201 1195 { … … 1203 1197 } 1204 1198 1205 //The total number of operations is 121199 //The total number of operations is 9.33333333333 1206 1200 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::sub(bitblock128_t arg1, bitblock128_t arg2) 1207 1201 { … … 1212 1206 } 1213 1207 1214 //The total number of operations is 3 1208 //The total number of operations is 3.0 1215 1209 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add_hl(bitblock128_t arg1) 1216 1210 { … … 1218 1212 } 1219 1213 1220 //The total number of operations is 4 1214 //The total number of operations is 4.0 1221 1215 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add_hl(bitblock128_t arg1) 1222 1216 { … … 1224 1218 } 1225 1219 1226 //The total number of operations is 3 1220 //The total number of operations is 3.0 1227 1221 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add_hl(bitblock128_t arg1) 1228 1222 { … … 1230 1224 } 1231 1225 1232 //The total number of operations is 3 1226 //The total number of operations is 3.0 1233 1227 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add_hl(bitblock128_t arg1) 1234 1228 { … … 1236 1230 } 1237 1231 1238 //The total number of operations is 3 1232 //The total number of operations is 3.0 1239 1233 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add_hl(bitblock128_t arg1) 1240 1234 { … … 1242 1236 } 1243 1237 1244 //The total number of operations is 3 1238 //The total number of operations is 3.0 1245 1239 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add_hl(bitblock128_t arg1) 1246 1240 { … … 1248 1242 } 1249 1243 1250 //The total number of operations is 1 81244 //The total number of operations is 13.5 1251 1245 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add_hl(bitblock128_t arg1) 1252 1246 { … … 1302 1296 } 1303 1297 1304 //The total number of operations is 1 1298 //The total number of operations is 1.0 1305 1299 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::min(bitblock128_t arg1, bitblock128_t arg2) 1306 1300 { … … 1308 1302 } 1309 1303 1310 //The total number of operations is 13 1304 //The total number of operations is 13.0 1311 1305 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::min(bitblock128_t arg1, bitblock128_t arg2) 1312 1306 { … … 1318 1312 } 1319 1313 1320 //The total number of operations is 71314 //The total number of operations is 6.0 1321 1315 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::min(bitblock128_t arg1, bitblock128_t arg2) 1322 1316 { … … 1324 1318 } 1325 1319 1326 //The total number of operations is 1 1320 //The total number of operations is 1.0 1327 1321 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::min(bitblock128_t arg1, bitblock128_t arg2) 1328 1322 { … … 1330 1324 } 1331 1325 1332 //The total number of operations is 1 1326 //The total number of operations is 1.0 1333 1327 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::min(bitblock128_t arg1, bitblock128_t arg2) 1334 1328 { … … 1336 1330 } 1337 1331 1338 //The total number of operations is 1 1332 //The total number of operations is 1.0 1339 1333 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::min(bitblock128_t arg1, bitblock128_t arg2) 1340 1334 { … … 1342 1336 } 1343 1337 1344 //The total number of operations is 1 21338 //The total number of operations is 11.5 1345 1339 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::min(bitblock128_t arg1, bitblock128_t arg2) 1346 1340 { 1347 bitblock128_t hiAns = simd128<(32)>::min(arg1, arg2); 1348 bitblock128_t loAns = simd128<(32)>::umin(arg1, arg2); 1349 bitblock128_t eqMask1 = simd128<64>::srli<(32)>(simd128<(32)>::eq(hiAns, arg1)); 1350 bitblock128_t eqMask2 = simd128<64>::srli<(32)>(simd128<(32)>::eq(hiAns, arg2)); 1351 return simd128<1>::ifh(simd128<64>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2)); 1352 } 1353 1354 //The total number of operations is 46 1341 return simd128<1>::ifh(simd128<64>::gt(arg1, arg2), arg2, arg1); 1342 } 1343 1344 //The total number of operations is 40.6666666667 1355 1345 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::min(bitblock128_t arg1, bitblock128_t arg2) 1356 1346 { 1357 bitblock128_t hiAns = simd128<(64)>::min(arg1, arg2); 1358 bitblock128_t loAns = simd128<(64)>::umin(arg1, arg2); 1359 bitblock128_t eqMask1 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg1)); 1360 bitblock128_t eqMask2 = simd128<128>::srli<(64)>(simd128<(64)>::eq(hiAns, arg2)); 1361 return simd128<1>::ifh(simd128<128>::himask(), hiAns, simd128<1>::ifh(eqMask1, simd128<1>::ifh(eqMask2, loAns, arg1), arg2)); 1347 return simd128<1>::ifh(simd128<128>::gt(arg1, arg2), arg2, arg1); 1362 1348 } 1363 1349 … … 1404 1390 } 1405 1391 1406 //The total number of operations is 1 1392 //The total number of operations is 1.0 1407 1393 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umin(bitblock128_t arg1, bitblock128_t arg2) 1408 1394 { … … 1410 1396 } 1411 1397 1412 //The total number of operations is 12 1398 //The total number of operations is 12.0 1413 1399 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umin(bitblock128_t arg1, bitblock128_t arg2) 1414 1400 { … … 1419 1405 } 1420 1406 1421 //The total number of operations is 101407 //The total number of operations is 9.0 1422 1408 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umin(bitblock128_t arg1, bitblock128_t arg2) 1423 1409 { … … 1426 1412 } 1427 1413 1428 //The total number of operations is 4 1414 //The total number of operations is 4.0 1429 1415 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umin(bitblock128_t arg1, bitblock128_t arg2) 1430 1416 { … … 1433 1419 } 1434 1420 1435 //The total number of operations is 4 1421 //The total number of operations is 4.0 1436 1422 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umin(bitblock128_t arg1, bitblock128_t arg2) 1437 1423 { … … 1440 1426 } 1441 1427 1442 //The total number of operations is 4 1428 //The total number of operations is 4.0 1443 1429 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umin(bitblock128_t arg1, bitblock128_t arg2) 1444 1430 { … … 1447 1433 } 1448 1434 1449 //The total number of operations is 11 1435 //The total number of operations is 11.0 1450 1436 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umin(bitblock128_t arg1, bitblock128_t arg2) 1451 1437 { … … 1456 1442 } 1457 1443 1458 //The total number of operations is 341444 //The total number of operations is 29.3333333333 1459 1445 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umin(bitblock128_t arg1, bitblock128_t arg2) 1460 1446 { … … 1465 1451 } 1466 1452 1467 //The total number of operations is 81453 //The total number of operations is 5.33333333333 1468 1454 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::abs(bitblock128_t arg1) 1469 1455 { … … 1471 1457 } 1472 1458 1473 //The total number of operations is 1 11459 //The total number of operations is 10.0 1474 1460 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::abs(bitblock128_t arg1) 1475 1461 { … … 1478 1464 } 1479 1465 1480 //The total number of operations is 1 1466 //The total number of operations is 1.0 1481 1467 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::abs(bitblock128_t arg1) 1482 1468 { … … 1484 1470 } 1485 1471 1486 //The total number of operations is 1 1472 //The total number of operations is 1.0 1487 1473 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::abs(bitblock128_t arg1) 1488 1474 { … … 1490 1476 } 1491 1477 1492 //The total number of operations is 1 1478 //The total number of operations is 1.0 1493 1479 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::abs(bitblock128_t arg1) 1494 1480 { … … 1496 1482 } 1497 1483 1498 //The total number of operations is 91484 //The total number of operations is 8.5 1499 1485 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::abs(bitblock128_t arg1) 1500 1486 { … … 1503 1489 } 1504 1490 1505 //The total number of operations is 401491 //The total number of operations is 31.8333333333 1506 1492 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::abs(bitblock128_t arg1) 1507 1493 { … … 1510 1496 } 1511 1497 1512 //The total number of operations is 2 1498 //The total number of operations is 2.0 1513 1499 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::eq(bitblock128_t arg1, bitblock128_t arg2) 1514 1500 { … … 1516 1502 } 1517 1503 1518 //The total number of operations is 81504 //The total number of operations is 7.5 1519 1505 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::eq(bitblock128_t arg1, bitblock128_t arg2) 1520 1506 { … … 1525 1511 } 1526 1512 1527 //The total number of operations is 9 1513 //The total number of operations is 9.0 1528 1514 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::eq(bitblock128_t arg1, bitblock128_t arg2) 1529 1515 { … … 1531 1517 } 1532 1518 1533 //The total number of operations is 1 1519 //The total number of operations is 1.0 1534 1520 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::eq(bitblock128_t arg1, bitblock128_t arg2) 1535 1521 { … … 1537 1523 } 1538 1524 1539 //The total number of operations is 1 1525 //The total number of operations is 1.0 1540 1526 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::eq(bitblock128_t arg1, bitblock128_t arg2) 1541 1527 { … … 1543 1529 } 1544 1530 1545 //The total number of operations is 1 1531 //The total number of operations is 1.0 1546 1532 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::eq(bitblock128_t arg1, bitblock128_t arg2) 1547 1533 { … … 1549 1535 } 1550 1536 1551 //The total number of operations is 51537 //The total number of operations is 4.5 1552 1538 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2) 1553 1539 { … … 1558 1544 } 1559 1545 1560 //The total number of operations is 1 71546 //The total number of operations is 12.0 1561 1547 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2) 1562 1548 { … … 1567 1553 } 1568 1554 1569 //The total number of operations is 4 1555 //The total number of operations is 4.0 1570 1556 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1) 1571 1557 { … … 1573 1559 } 1574 1560 1575 //The total number of operations is 8 1561 //The total number of operations is 8.0 1576 1562 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1) 1577 1563 { … … 1580 1566 } 1581 1567 1582 //The total number of operations is 1 1568 //The total number of operations is 1.0 1583 1569 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1) 1584 1570 { … … 1586 1572 } 1587 1573 1588 //The total number of operations is 1 1574 //The total number of operations is 1.0 1589 1575 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1) 1590 1576 { … … 1592 1578 } 1593 1579 1594 //The total number of operations is 1 1580 //The total number of operations is 1.0 1595 1581 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1) 1596 1582 { … … 1598 1584 } 1599 1585 1600 //The total number of operations is 1 1586 //The total number of operations is 1.0 1601 1587 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1) 1602 1588 { … … 1604 1590 } 1605 1591 1606 //The total number of operations is 91592 //The total number of operations is 6.66666666667 1607 1593 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1) 1608 1594 { … … 1652 1638 } 1653 1639 1654 //The total number of operations is 1 1640 //The total number of operations is 1.0 1655 1641 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::add(bitblock128_t arg1, bitblock128_t arg2) 1656 1642 { … … 1658 1644 } 1659 1645 1660 //The total number of operations is 91646 //The total number of operations is 6.33333333333 1661 1647 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::add(bitblock128_t arg1, bitblock128_t arg2) 1662 1648 { … … 1665 1651 } 1666 1652 1667 //The total number of operations is 4 1653 //The total number of operations is 4.0 1668 1654 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::add(bitblock128_t arg1, bitblock128_t arg2) 1669 1655 { … … 1671 1657 } 1672 1658 1673 //The total number of operations is 1 1659 //The total number of operations is 1.0 1674 1660 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::add(bitblock128_t arg1, bitblock128_t arg2) 1675 1661 { … … 1677 1663 } 1678 1664 1679 //The total number of operations is 1 1665 //The total number of operations is 1.0 1680 1666 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::add(bitblock128_t arg1, bitblock128_t arg2) 1681 1667 { … … 1683 1669 } 1684 1670 1685 //The total number of operations is 1 1671 //The total number of operations is 1.0 1686 1672 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::add(bitblock128_t arg1, bitblock128_t arg2) 1687 1673 { … … 1689 1675 } 1690 1676 1691 //The total number of operations is 1 1677 //The total number of operations is 1.0 1692 1678 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::add(bitblock128_t arg1, bitblock128_t arg2) 1693 1679 { … … 1695 1681 } 1696 1682 1697 //The total number of operations is 121683 //The total number of operations is 9.33333333333 1698 1684 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::add(bitblock128_t arg1, bitblock128_t arg2) 1699 1685 { … … 1704 1690 } 1705 1691 1706 //The total number of operations is 1 1692 //The total number of operations is 1.0 1707 1693 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::umax(bitblock128_t arg1, bitblock128_t arg2) 1708 1694 { … … 1710 1696 } 1711 1697 1712 //The total number of operations is 12 1698 //The total number of operations is 12.0 1713 1699 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::umax(bitblock128_t arg1, bitblock128_t arg2) 1714 1700 { … … 1719 1705 } 1720 1706 1721 //The total number of operations is 101707 //The total number of operations is 9.0 1722 1708 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::umax(bitblock128_t arg1, bitblock128_t arg2) 1723 1709 { … … 1726 1712 } 1727 1713 1728 //The total number of operations is 4 1714 //The total number of operations is 4.0 1729 1715 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::umax(bitblock128_t arg1, bitblock128_t arg2) 1730 1716 { … … 1733 1719 } 1734 1720 1735 //The total number of operations is 4 1721 //The total number of operations is 4.0 1736 1722 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::umax(bitblock128_t arg1, bitblock128_t arg2) 1737 1723 { … … 1740 1726 } 1741 1727 1742 //The total number of operations is 4 1728 //The total number of operations is 4.0 1743 1729 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::umax(bitblock128_t arg1, bitblock128_t arg2) 1744 1730 { … … 1747 1733 } 1748 1734 1749 //The total number of operations is 11 1735 //The total number of operations is 11.0 1750 1736 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::umax(bitblock128_t arg1, bitblock128_t arg2) 1751 1737 { … … 1756 1742 } 1757 1743 1758 //The total number of operations is 341744 //The total number of operations is 29.3333333333 1759 1745 template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::umax(bitblock128_t arg1, bitblock128_t arg2) 1760 1746 { … … 1765 1751 } 1766 1752 1767 //The total number of operations is 771753 //The total number of operations is 55.0 1768 1754 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::umin_hl(bitblock128_t arg1, bitblock128_t arg2) 1769 1755 { … … 1771 1757 } 1772 1758 1773 //The total number of operations is 641759 //The total number of operations is 49.3333333333 1774 1760 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::umin_hl(bitblock128_t arg1, bitblock128_t arg2) 1775 1761 { … … 1777 1763 } 1778 1764 1779 //The total number of operations is 381765 //The total number of operations is 29.6666666667 1780 1766 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<8>::umin_hl(bitblock128_t arg1, bitblock128_t arg2) 1781 1767 { … … 1783 1769 } 1784 1770 1785 //The total number of operations is 6 1771 //The total number of operations is 6.0 1786 1772 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<16>::umin_hl(bitblock128_t arg1, bitblock128_t arg2) 1787 1773 { … … 1789 1775 } 1790 1776 1791 //The total number of operations is 6 1777 //The total number of operations is 6.0 1792 1778 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<32>::umin_hl(bitblock128_t arg1, bitblock128_t arg2) 1793 1779 { … … 1795 1781 } 1796 1782 1797 //The total number of operations is 6 1783 //The total number of operations is 6.0 1798 1784 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<64>::umin_hl(bitblock128_t arg1, bitblock128_t arg2) 1799 1785 { … … 1801 1787 } 1802 1788 1803 //The total number of operations is 231789 //The total number of operations is 18.5 1804 1790 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::umin_hl(bitblock128_t arg1, bitblock128_t arg2) 1805 1791 { … … 1807 1793 } 1808 1794 1809 //The total number of operations is 771795 //The total number of operations is 55.0 1810 1796 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::add_hl(bitblock128_t arg1, bitblock128_t arg2) 1811 1797 { … … 1813 1799 } 1814 1800 1815 //The total number of operations is 611801 //The total number of operations is 43.6666666667 1816 1802 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::add_hl(bitblock128_t arg1, bitblock128_t arg2) 1817 1803 { … … 1819 1805 } 1820 1806 1821 //The total number of operations is 321807 //The total number of operations is 24.6666666667 1822 1808 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<8>::add_hl(bitblock128_t arg1, bitblock128_t arg2) 1823 1809 { … … 1825 1811 } 1826 1812 1827 //The total number of operations is 3 1813 //The total number of operations is 3.0 1828 1814 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<16>::add_hl(bitblock128_t arg1, bitblock128_t arg2) 1829 1815 { … … 1831 1817 } 1832 1818 1833 //The total number of operations is 3 1819 //The total number of operations is 3.0 1834 1820 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<32>::add_hl(bitblock128_t arg1, bitblock128_t arg2) 1835 1821 { … … 1837 1823 } 1838 1824 1839 //The total number of operations is 3 1825 //The total number of operations is 3.0 1840 1826 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<64>::add_hl(bitblock128_t arg1, bitblock128_t arg2) 1841 1827 { … … 1843 1829 } 1844 1830 1845 //The total number of operations is 131831 //The total number of operations is 8.5 1846 1832 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::add_hl(bitblock128_t arg1, bitblock128_t arg2) 1847 1833 { … … 1849 1835 } 1850 1836 1851 //The total number of operations is 1041837 //The total number of operations is 87.0 1852 1838 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packss(bitblock128_t arg1, bitblock128_t arg2) 1853 1839 { … … 1857 1843 } 1858 1844 1859 //The total number of operations is 561845 //The total number of operations is 44.6666666667 1860 1846 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::packss(bitblock128_t arg1, bitblock128_t arg2) 1861 1847 { … … 1865 1851 } 1866 1852 1867 //The total number of operations is 231853 //The total number of operations is 19.3333333333 1868 1854 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<8>::packss(bitblock128_t arg1, bitblock128_t arg2) 1869 1855 { … … 1873 1859 } 1874 1860 1875 //The total number of operations is 11 1861 //The total number of operations is 11.0 1876 1862 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<16>::packss(bitblock128_t arg1, bitblock128_t arg2) 1877 1863 { … … 1881 1867 } 1882 1868 1883 //The total number of operations is 11 1869 //The total number of operations is 11.0 1884 1870 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<32>::packss(bitblock128_t arg1, bitblock128_t arg2) 1885 1871 { … … 1889 1875 } 1890 1876 1891 //The total number of operations is 511877 //The total number of operations is 49.0 1892 1878 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<64>::packss(bitblock128_t arg1, bitblock128_t arg2) 1893 1879 { … … 1897 1883 } 1898 1884 1899 //The total number of operations is 2081885 //The total number of operations is 170.166666667 1900 1886 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2) 1901 1887 { … … 1905 1891 } 1906 1892 1907 //The total number of operations is 4 61893 //The total number of operations is 45.0 1908 1894 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<4>::signmask(bitblock128_t arg1) 1909 1895 { … … 1913 1899 } 1914 1900 1915 //The total number of operations is 18 1901 //The total number of operations is 18.0 1916 1902 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<8>::signmask(bitblock128_t arg1) 1917 1903 { … … 1921 1907 } 1922 1908 1923 //The total number of operations is 8 1909 //The total number of operations is 8.0 1924 1910 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<16>::signmask(bitblock128_t arg1) 1925 1911 { … … 1927 1913 } 1928 1914 1929 //The total number of operations is 4 1915 //The total number of operations is 4.0 1930 1916 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1) 1931 1917 { … … 1933 1919 } 1934 1920 1935 //The total number of operations is 2 1921 //The total number of operations is 2.0 1936 1922 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1) 1937 1923 { … … 1939 1925 } 1940 1926 1941 //The total number of operations is 81927 //The total number of operations is 6.16666666667 1942 1928 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1) 1943 1929 { … … 1945 1931 } 1946 1932 1947 //The total number of operations is 371933 //The total number of operations is 26.0 1948 1934 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packl(bitblock128_t arg1, bitblock128_t arg2) 1949 1935 { … … 1951 1937 } 1952 1938 1953 //The total number of operations is 251939 //The total number of operations is 17.6666666667 1954 1940 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::packl(bitblock128_t arg1, bitblock128_t arg2) 1955 1941 { … … 1957 1943 } 1958 1944 1959 //The total number of operations is 131945 //The total number of operations is 9.33333333333 1960 1946 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<8>::packl(bitblock128_t arg1, bitblock128_t arg2) 1961 1947 { … … 1963 1949 } 1964 1950 1965 //The total number of operations is 1 1951 //The total number of operations is 1.0 1966 1952 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<16>::packl(bitblock128_t arg1, bitblock128_t arg2) 1967 1953 { … … 1969 1955 } 1970 1956 1971 //The total number of operations is 1 1957 //The total number of operations is 1.0 1972 1958 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<32>::packl(bitblock128_t arg1, bitblock128_t arg2) 1973 1959 { … … 1975 1961 } 1976 1962 1977 //The total number of operations is 1 1963 //The total number of operations is 1.0 1978 1964 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<64>::packl(bitblock128_t arg1, bitblock128_t arg2) 1979 1965 { … … 1981 1967 } 1982 1968 1983 //The total number of operations is 61969 //The total number of operations is 3.33333333333 1984 1970 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packl(bitblock128_t arg1, bitblock128_t arg2) 1985 1971 { … … 1987 1973 } 1988 1974 1989 //The total number of operations is 391975 //The total number of operations is 28.0 1990 1976 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packh(bitblock128_t arg1, bitblock128_t arg2) 1991 1977 { … … 1993 1979 } 1994 1980 1995 //The total number of operations is 271981 //The total number of operations is 19.6666666667 1996 1982 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::packh(bitblock128_t arg1, bitblock128_t arg2) 1997 1983 { … … 1999 1985 } 2000 1986 2001 //The total number of operations is 1 51987 //The total number of operations is 11.3333333333 2002 1988 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<8>::packh(bitblock128_t arg1, bitblock128_t arg2) 2003 1989 { … … 2005 1991 } 2006 1992 2007 //The total number of operations is 1 1993 //The total number of operations is 1.0 2008 1994 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<16>::packh(bitblock128_t arg1, bitblock128_t arg2) 2009 1995 { … … 2011 1997 } 2012 1998 2013 //The total number of operations is 1 1999 //The total number of operations is 1.0 2014 2000 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<32>::packh(bitblock128_t arg1, bitblock128_t arg2) 2015 2001 { … … 2017 2003 } 2018 2004 2019 //The total number of operations is 1 2005 //The total number of operations is 1.0 2020 2006 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<64>::packh(bitblock128_t arg1, bitblock128_t arg2) 2021 2007 { … … 2023 2009 } 2024 2010 2025 //The total number of operations is 62011 //The total number of operations is 4.16666666667 2026 2012 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packh(bitblock128_t arg1, bitblock128_t arg2) 2027 2013 { … … 2029 2015 } 2030 2016 2031 //The total number of operations is 772017 //The total number of operations is 55.0 2032 2018 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::min_hl(bitblock128_t arg1, bitblock128_t arg2) 2033 2019 { … … 2035 2021 } 2036 2022 2037 //The total number of operations is 652023 //The total number of operations is 50.3333333333 2038 2024 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::min_hl(bitblock128_t arg1, bitblock128_t arg2) 2039 2025 { … … 2041 2027 } 2042 2028 2043 //The total number of operations is 352029 //The total number of operations is 26.6666666667 2044 2030 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<8>::min_hl(bitblock128_t arg1, bitblock128_t arg2) 2045 2031 { … … 2047 2033 } 2048 2034 2049 //The total number of operations is 3 2035 //The total number of operations is 3.0 2050 2036 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<16>::min_hl(bitblock128_t arg1, bitblock128_t arg2) 2051 2037 { … … 2053 2039 } 2054 2040 2055 //The total number of operations is 3 2041 //The total number of operations is 3.0 2056 2042 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<32>::min_hl(bitblock128_t arg1, bitblock128_t arg2) 2057 2043 { … … 2059 2045 } 2060 2046 2061 //The total number of operations is 3 2047 //The total number of operations is 3.0 2062 2048 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<64>::min_hl(bitblock128_t arg1, bitblock128_t arg2) 2063 2049 { … … 2065 2051 } 2066 2052 2067 //The total number of operations is 242053 //The total number of operations is 19.0 2068 2054 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::min_hl(bitblock128_t arg1, bitblock128_t arg2) 2069 2055 { … … 2071 2057 } 2072 2058 2073 //The total number of operations is 652059 //The total number of operations is 53.0 2074 2060 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packus(bitblock128_t arg1, bitblock128_t arg2) 2075 2061 { … … 2081 2067 } 2082 2068 2083 //The total number of operations is 612069 //The total number of operations is 51.6666666667 2084 2070 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::packus(bitblock128_t arg1, bitblock128_t arg2) 2085 2071 { … … 2091 2077 } 2092 2078 2093 //The total number of operations is 232079 //The total number of operations is 19.3333333333 2094 2080 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<8>::packus(bitblock128_t arg1, bitblock128_t arg2) 2095 2081 { … … 2101 2087 } 2102 2088 2103 //The total number of operations is 6 2089 //The total number of operations is 6.0 2104 2090 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<16>::packus(bitblock128_t arg1, bitblock128_t arg2) 2105 2091 { … … 2108 2094 } 2109 2095 2110 //The total number of operations is 6 2096 //The total number of operations is 6.0 2111 2097 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<32>::packus(bitblock128_t arg1, bitblock128_t arg2) 2112 2098 { … … 2115 2101 } 2116 2102 2117 //The total number of operations is 6 2103 //The total number of operations is 6.0 2118 2104 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<64>::packus(bitblock128_t arg1, bitblock128_t arg2) 2119 2105 { … … 2122 2108 } 2123 2109 2124 //The total number of operations is 2 82110 //The total number of operations is 23.0 2125 2111 template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packus(bitblock128_t arg1, bitblock128_t arg2) 2126 2112 { … … 2129 2115 } 2130 2116 2131 //The total number of operations is 1 72117 //The total number of operations is 15.5 2132 2118 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<1>::mergel(bitblock128_t arg1, bitblock128_t arg2) 2133 2119 { … … 2135 2121 } 2136 2122 2137 //The total number of operations is 1 12123 //The total number of operations is 10.0 2138 2124 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<2>::mergel(bitblock128_t arg1, bitblock128_t arg2) 2139 2125 { … … 2141 2127 } 2142 2128 2143 //The total number of operations is 52129 //The total number of operations is 4.5 2144 2130 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<4>::mergel(bitblock128_t arg1, bitblock128_t arg2) 2145 2131 { … … 2147 2133 } 2148 2134 2149 //The total number of operations is 1 2135 //The total number of operations is 1.0 2150 2136 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<8>::mergel(bitblock128_t arg1, bitblock128_t arg2) 2151 2137 { … … 2153 2139 } 2154 2140 2155 //The total number of operations is 1 2141 //The total number of operations is 1.0 2156 2142 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<16>::mergel(bitblock128_t arg1, bitblock128_t arg2) 2157 2143 { … … 2159 2145 } 2160 2146 2161 //The total number of operations is 1 2147 //The total number of operations is 1.0 2162 2148 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<32>::mergel(bitblock128_t arg1, bitblock128_t arg2) 2163 2149 { … … 2165 2151 } 2166 2152 2167 //The total number of operations is 3 2153 //The total number of operations is 3.0 2168 2154 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::mergel(bitblock128_t arg1, bitblock128_t arg2) 2169 2155 { … … 2171 2157 } 2172 2158 2173 //The total number of operations is 1 72159 //The total number of operations is 15.5 2174 2160 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<1>::mergeh(bitblock128_t arg1, bitblock128_t arg2) 2175 2161 { … … 2177 2163 } 2178 2164 2179 //The total number of operations is 1 12165 //The total number of operations is 10.0 2180 2166 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<2>::mergeh(bitblock128_t arg1, bitblock128_t arg2) 2181 2167 { … … 2183 2169 } 2184 2170 2185 //The total number of operations is 52171 //The total number of operations is 4.5 2186 2172 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<4>::mergeh(bitblock128_t arg1, bitblock128_t arg2) 2187 2173 { … … 2189 2175 } 2190 2176 2191 //The total number of operations is 1 2177 //The total number of operations is 1.0 2192 2178 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<8>::mergeh(bitblock128_t arg1, bitblock128_t arg2) 2193 2179 { … … 2195 2181 } 2196 2182 2197 //The total number of operations is 1 2183 //The total number of operations is 1.0 2198 2184 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<16>::mergeh(bitblock128_t arg1, bitblock128_t arg2) 2199 2185 { … … 2201 2187 } 2202 2188 2203 //The total number of operations is 1 2189 //The total number of operations is 1.0 2204 2190 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<32>::mergeh(bitblock128_t arg1, bitblock128_t arg2) 2205 2191 { … … 2207 2193 } 2208 2194 2209 //The total number of operations is 3 2195 //The total number of operations is 3.0 2210 2196 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::mergeh(bitblock128_t arg1, bitblock128_t arg2) 2211 2197 { … … 2213 2199 } 2214 2200 2215 //The total number of operations is 1 42201 //The total number of operations is 13.0 2216 2202 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<1>::zeroextendh(bitblock128_t arg1) 2217 2203 { … … 2219 2205 } 2220 2206 2221 //The total number of operations is 82207 //The total number of operations is 7.5 2222 2208 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<2>::zeroextendh(bitblock128_t arg1) 2223 2209 { … … 2225 2211 } 2226 2212 2227 //The total number of operations is 3 2213 //The total number of operations is 3.0 2228 2214 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<4>::zeroextendh(bitblock128_t arg1) 2229 2215 { … … 2231 2217 } 2232 2218 2233 //The total number of operations is 3 2219 //The total number of operations is 3.0 2234 2220 template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<8>::zeroextendh(bitblock128_t arg1) 2235 2221 { … …