source: trunk/lib/idisa_cpp/idisa_avx.cpp @ 1556

Last change on this file since 1556 was 1556, checked in by cameron, 8 years ago

bitblock::popcount; clean libgen/idisa_lib out

File size: 173.6 KB
Line 
1#ifndef IDISA_AVX_CPP
2#define IDISA_AVX_CPP
3#include <stdint.h>
4#include "../config.hpp"
5
6#include "immintrin.h"
7
8typedef __m256 bitblock256_t;
9template <uint32_t fw>
10class simd256
11{
12public:
13        static IDISA_ALWAYS_INLINE bitblock256_t max(bitblock256_t arg1, bitblock256_t arg2);
14        static IDISA_ALWAYS_INLINE bitblock256_t mult(bitblock256_t arg1, bitblock256_t arg2);
15        static IDISA_ALWAYS_INLINE bitblock256_t gt(bitblock256_t arg1, bitblock256_t arg2);
16        static IDISA_ALWAYS_INLINE bitblock256_t umult(bitblock256_t arg1, bitblock256_t arg2);
17        static IDISA_ALWAYS_INLINE bitblock256_t ult(bitblock256_t arg1, bitblock256_t arg2);
18        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t srli(bitblock256_t arg1);
19        static IDISA_ALWAYS_INLINE bitblock256_t ctz(bitblock256_t arg1);
20        static IDISA_ALWAYS_INLINE bitblock256_t eq(bitblock256_t arg1, bitblock256_t arg2);
21        static IDISA_ALWAYS_INLINE bitblock256_t popcount(bitblock256_t arg1);
22        static IDISA_ALWAYS_INLINE bitblock256_t neg(bitblock256_t arg1);
23        static IDISA_ALWAYS_INLINE bitblock256_t himask();
24        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t slli(bitblock256_t arg1);
25        static IDISA_ALWAYS_INLINE bitblock256_t ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
26        static IDISA_ALWAYS_INLINE bitblock256_t sub(bitblock256_t arg1, bitblock256_t arg2);
27        static IDISA_ALWAYS_INLINE bitblock256_t add_hl(bitblock256_t arg1);
28        static IDISA_ALWAYS_INLINE bitblock256_t umin(bitblock256_t arg1, bitblock256_t arg2);
29        template <uint64_t val> static IDISA_ALWAYS_INLINE bitblock256_t constant();
30        static IDISA_ALWAYS_INLINE bitblock256_t min(bitblock256_t arg1, bitblock256_t arg2);
31        static IDISA_ALWAYS_INLINE bitblock256_t lomask();
32        static IDISA_ALWAYS_INLINE bitblock256_t umax(bitblock256_t arg1, bitblock256_t arg2);
33        static IDISA_ALWAYS_INLINE bitblock256_t abs(bitblock256_t arg1);
34        static IDISA_ALWAYS_INLINE bitblock256_t xor_hl(bitblock256_t arg1);
35        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t srai(bitblock256_t arg1);
36        static IDISA_ALWAYS_INLINE bitblock256_t lt(bitblock256_t arg1, bitblock256_t arg2);
37        static IDISA_ALWAYS_INLINE bitblock256_t add(bitblock256_t arg1, bitblock256_t arg2);
38        static IDISA_ALWAYS_INLINE bitblock256_t ugt(bitblock256_t arg1, bitblock256_t arg2);
39};
40
41template <uint32_t fw>
42class hsimd256
43{
44public:
45        static IDISA_ALWAYS_INLINE bitblock256_t umin_hl(bitblock256_t arg1, bitblock256_t arg2);
46        static IDISA_ALWAYS_INLINE bitblock256_t add_hl(bitblock256_t arg1, bitblock256_t arg2);
47        static IDISA_ALWAYS_INLINE bitblock256_t packss(bitblock256_t arg1, bitblock256_t arg2);
48        static IDISA_ALWAYS_INLINE bitblock256_t packh(bitblock256_t arg1, bitblock256_t arg2);
49        static IDISA_ALWAYS_INLINE uint64_t signmask(bitblock256_t arg1);
50        static IDISA_ALWAYS_INLINE bitblock256_t packl(bitblock256_t arg1, bitblock256_t arg2);
51        static IDISA_ALWAYS_INLINE bitblock256_t min_hl(bitblock256_t arg1, bitblock256_t arg2);
52        static IDISA_ALWAYS_INLINE bitblock256_t packus(bitblock256_t arg1, bitblock256_t arg2);
53};
54
55template <uint32_t fw>
56class esimd256
57{
58public:
59        static IDISA_ALWAYS_INLINE bitblock256_t mergel(bitblock256_t arg1, bitblock256_t arg2);
60        static IDISA_ALWAYS_INLINE bitblock256_t mergeh(bitblock256_t arg1, bitblock256_t arg2);
61        static IDISA_ALWAYS_INLINE bitblock256_t zeroextendh(bitblock256_t arg1);
62        static IDISA_ALWAYS_INLINE bitblock256_t zeroextendl(bitblock256_t arg1);
63        static IDISA_ALWAYS_INLINE bitblock256_t signextendh(bitblock256_t arg1);
64        static IDISA_ALWAYS_INLINE bitblock256_t signextendl(bitblock256_t arg1);
65};
66
67template <uint32_t fw>
68class mvmd256
69{
70public:
71        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t dsrli(bitblock256_t arg1, bitblock256_t arg2);
72        static IDISA_ALWAYS_INLINE bitblock256_t fill(uint64_t val1);
73        template <uint64_t pos> static IDISA_ALWAYS_INLINE uint64_t extract(bitblock256_t arg1);
74        template <uint64_t pos> static IDISA_ALWAYS_INLINE bitblock256_t splat(bitblock256_t arg1);
75        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t slli(bitblock256_t arg1);
76        static IDISA_ALWAYS_INLINE bitblock256_t fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
77        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t srli(bitblock256_t arg1);
78        static IDISA_ALWAYS_INLINE bitblock256_t fill2(uint64_t val1, uint64_t val2);
79        template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock256_t dslli(bitblock256_t arg1, bitblock256_t arg2);
80        static IDISA_ALWAYS_INLINE bitblock256_t fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
81        static IDISA_ALWAYS_INLINE bitblock256_t fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
82};
83
84class bitblock256
85{
86public:
87        static IDISA_ALWAYS_INLINE bool all(bitblock256_t arg1);
88        static IDISA_ALWAYS_INLINE bool any(bitblock256_t arg1);
89        static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock256_t arg1);
90};
91
92//Declaration Part
93IDISA_ALWAYS_INLINE bitblock256_t simd_nor(bitblock256_t arg1, bitblock256_t arg2);
94IDISA_ALWAYS_INLINE bitblock256_t simd_not(bitblock256_t arg1);
95IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2);
96IDISA_ALWAYS_INLINE bitblock256_t simd_or(bitblock256_t arg1, bitblock256_t arg2);
97IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2);
98IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2);
99template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::max(bitblock256_t arg1, bitblock256_t arg2);
100template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::max(bitblock256_t arg1, bitblock256_t arg2);
101template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::max(bitblock256_t arg1, bitblock256_t arg2);
102template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::max(bitblock256_t arg1, bitblock256_t arg2);
103template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::max(bitblock256_t arg1, bitblock256_t arg2);
104template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::max(bitblock256_t arg1, bitblock256_t arg2);
105template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::max(bitblock256_t arg1, bitblock256_t arg2);
106template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::max(bitblock256_t arg1, bitblock256_t arg2);
107template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::max(bitblock256_t arg1, bitblock256_t arg2);
108template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::mult(bitblock256_t arg1, bitblock256_t arg2);
109template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::mult(bitblock256_t arg1, bitblock256_t arg2);
110template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::mult(bitblock256_t arg1, bitblock256_t arg2);
111template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::mult(bitblock256_t arg1, bitblock256_t arg2);
112template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::mult(bitblock256_t arg1, bitblock256_t arg2);
113template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::mult(bitblock256_t arg1, bitblock256_t arg2);
114template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::mult(bitblock256_t arg1, bitblock256_t arg2);
115template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::mult(bitblock256_t arg1, bitblock256_t arg2);
116template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::mult(bitblock256_t arg1, bitblock256_t arg2);
117template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::gt(bitblock256_t arg1, bitblock256_t arg2);
118template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::gt(bitblock256_t arg1, bitblock256_t arg2);
119template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::gt(bitblock256_t arg1, bitblock256_t arg2);
120template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::gt(bitblock256_t arg1, bitblock256_t arg2);
121template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::gt(bitblock256_t arg1, bitblock256_t arg2);
122template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::gt(bitblock256_t arg1, bitblock256_t arg2);
123template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::gt(bitblock256_t arg1, bitblock256_t arg2);
124template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::gt(bitblock256_t arg1, bitblock256_t arg2);
125template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::gt(bitblock256_t arg1, bitblock256_t arg2);
126template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umult(bitblock256_t arg1, bitblock256_t arg2);
127template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umult(bitblock256_t arg1, bitblock256_t arg2);
128template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umult(bitblock256_t arg1, bitblock256_t arg2);
129template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umult(bitblock256_t arg1, bitblock256_t arg2);
130template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umult(bitblock256_t arg1, bitblock256_t arg2);
131template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umult(bitblock256_t arg1, bitblock256_t arg2);
132template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umult(bitblock256_t arg1, bitblock256_t arg2);
133template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umult(bitblock256_t arg1, bitblock256_t arg2);
134template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ult(bitblock256_t arg1, bitblock256_t arg2);
135template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ult(bitblock256_t arg1, bitblock256_t arg2);
136template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ult(bitblock256_t arg1, bitblock256_t arg2);
137template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ult(bitblock256_t arg1, bitblock256_t arg2);
138template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ult(bitblock256_t arg1, bitblock256_t arg2);
139template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ult(bitblock256_t arg1, bitblock256_t arg2);
140template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ult(bitblock256_t arg1, bitblock256_t arg2);
141template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ult(bitblock256_t arg1, bitblock256_t arg2);
142template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ult(bitblock256_t arg1, bitblock256_t arg2);
143template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::lt(bitblock256_t arg1, bitblock256_t arg2);
144template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lt(bitblock256_t arg1, bitblock256_t arg2);
145template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lt(bitblock256_t arg1, bitblock256_t arg2);
146template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lt(bitblock256_t arg1, bitblock256_t arg2);
147template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lt(bitblock256_t arg1, bitblock256_t arg2);
148template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lt(bitblock256_t arg1, bitblock256_t arg2);
149template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lt(bitblock256_t arg1, bitblock256_t arg2);
150template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lt(bitblock256_t arg1, bitblock256_t arg2);
151template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lt(bitblock256_t arg1, bitblock256_t arg2);
152template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srli(bitblock256_t arg1);
153template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srli(bitblock256_t arg1);
154template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srli(bitblock256_t arg1);
155template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srli(bitblock256_t arg1);
156template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srli(bitblock256_t arg1);
157template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srli(bitblock256_t arg1);
158template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srli(bitblock256_t arg1);
159template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srli(bitblock256_t arg1);
160template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ctz(bitblock256_t arg1);
161template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ctz(bitblock256_t arg1);
162template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ctz(bitblock256_t arg1);
163template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ctz(bitblock256_t arg1);
164template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ctz(bitblock256_t arg1);
165template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ctz(bitblock256_t arg1);
166template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ctz(bitblock256_t arg1);
167template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ctz(bitblock256_t arg1);
168template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ctz(bitblock256_t arg1);
169template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ugt(bitblock256_t arg1, bitblock256_t arg2);
170template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ugt(bitblock256_t arg1, bitblock256_t arg2);
171template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ugt(bitblock256_t arg1, bitblock256_t arg2);
172template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ugt(bitblock256_t arg1, bitblock256_t arg2);
173template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ugt(bitblock256_t arg1, bitblock256_t arg2);
174template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ugt(bitblock256_t arg1, bitblock256_t arg2);
175template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ugt(bitblock256_t arg1, bitblock256_t arg2);
176template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ugt(bitblock256_t arg1, bitblock256_t arg2);
177template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ugt(bitblock256_t arg1, bitblock256_t arg2);
178template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::xor_hl(bitblock256_t arg1);
179template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::xor_hl(bitblock256_t arg1);
180template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::xor_hl(bitblock256_t arg1);
181template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::xor_hl(bitblock256_t arg1);
182template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::xor_hl(bitblock256_t arg1);
183template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::xor_hl(bitblock256_t arg1);
184template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::xor_hl(bitblock256_t arg1);
185template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::xor_hl(bitblock256_t arg1);
186template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::popcount(bitblock256_t arg1);
187template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::popcount(bitblock256_t arg1);
188template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::popcount(bitblock256_t arg1);
189template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::popcount(bitblock256_t arg1);
190template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::popcount(bitblock256_t arg1);
191template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::popcount(bitblock256_t arg1);
192template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::popcount(bitblock256_t arg1);
193template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::popcount(bitblock256_t arg1);
194template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::popcount(bitblock256_t arg1);
195template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::neg(bitblock256_t arg1);
196template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::neg(bitblock256_t arg1);
197template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::neg(bitblock256_t arg1);
198template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::neg(bitblock256_t arg1);
199template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::neg(bitblock256_t arg1);
200template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::neg(bitblock256_t arg1);
201template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::neg(bitblock256_t arg1);
202template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::neg(bitblock256_t arg1);
203template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::neg(bitblock256_t arg1);
204template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::slli(bitblock256_t arg1);
205template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::slli(bitblock256_t arg1);
206template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::slli(bitblock256_t arg1);
207template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::slli(bitblock256_t arg1);
208template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::slli(bitblock256_t arg1);
209template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::slli(bitblock256_t arg1);
210template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::slli(bitblock256_t arg1);
211template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::slli(bitblock256_t arg1);
212template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
213template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
214template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
215template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
216template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
217template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
218template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
219template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
220template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3);
221template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::sub(bitblock256_t arg1, bitblock256_t arg2);
222template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::sub(bitblock256_t arg1, bitblock256_t arg2);
223template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::sub(bitblock256_t arg1, bitblock256_t arg2);
224template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::sub(bitblock256_t arg1, bitblock256_t arg2);
225template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::sub(bitblock256_t arg1, bitblock256_t arg2);
226template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::sub(bitblock256_t arg1, bitblock256_t arg2);
227template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::sub(bitblock256_t arg1, bitblock256_t arg2);
228template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::sub(bitblock256_t arg1, bitblock256_t arg2);
229template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::sub(bitblock256_t arg1, bitblock256_t arg2);
230template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add_hl(bitblock256_t arg1);
231template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::add_hl(bitblock256_t arg1);
232template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::add_hl(bitblock256_t arg1);
233template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::add_hl(bitblock256_t arg1);
234template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::add_hl(bitblock256_t arg1);
235template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::add_hl(bitblock256_t arg1);
236template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add_hl(bitblock256_t arg1);
237template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add_hl(bitblock256_t arg1);
238template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask();
239template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask();
240template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask();
241template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask();
242template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask();
243template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask();
244template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask();
245template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask();
246template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::constant();
247template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::constant();
248template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::constant();
249template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::constant();
250template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::constant();
251template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::constant();
252template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::constant();
253template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::constant();
254template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::constant();
255template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::min(bitblock256_t arg1, bitblock256_t arg2);
256template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::min(bitblock256_t arg1, bitblock256_t arg2);
257template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::min(bitblock256_t arg1, bitblock256_t arg2);
258template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::min(bitblock256_t arg1, bitblock256_t arg2);
259template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::min(bitblock256_t arg1, bitblock256_t arg2);
260template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::min(bitblock256_t arg1, bitblock256_t arg2);
261template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::min(bitblock256_t arg1, bitblock256_t arg2);
262template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::min(bitblock256_t arg1, bitblock256_t arg2);
263template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::min(bitblock256_t arg1, bitblock256_t arg2);
264template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2);
265template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umin(bitblock256_t arg1, bitblock256_t arg2);
266template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umin(bitblock256_t arg1, bitblock256_t arg2);
267template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umin(bitblock256_t arg1, bitblock256_t arg2);
268template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umin(bitblock256_t arg1, bitblock256_t arg2);
269template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umin(bitblock256_t arg1, bitblock256_t arg2);
270template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umin(bitblock256_t arg1, bitblock256_t arg2);
271template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umin(bitblock256_t arg1, bitblock256_t arg2);
272template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umin(bitblock256_t arg1, bitblock256_t arg2);
273template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2);
274template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2);
275template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2);
276template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2);
277template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2);
278template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2);
279template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2);
280template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2);
281template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2);
282template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2);
283template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2);
284template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2);
285template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2);
286template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2);
287template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2);
288template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2);
289template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2);
290template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2);
291template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1);
292template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1);
293template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1);
294template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1);
295template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1);
296template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1);
297template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1);
298template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1);
299template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::himask();
300template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::himask();
301template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::himask();
302template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::himask();
303template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::himask();
304template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::himask();
305template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::himask();
306template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::himask();
307template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::add(bitblock256_t arg1, bitblock256_t arg2);
308template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add(bitblock256_t arg1, bitblock256_t arg2);
309template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::add(bitblock256_t arg1, bitblock256_t arg2);
310template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::add(bitblock256_t arg1, bitblock256_t arg2);
311template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::add(bitblock256_t arg1, bitblock256_t arg2);
312template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::add(bitblock256_t arg1, bitblock256_t arg2);
313template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::add(bitblock256_t arg1, bitblock256_t arg2);
314template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add(bitblock256_t arg1, bitblock256_t arg2);
315template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add(bitblock256_t arg1, bitblock256_t arg2);
316template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::abs(bitblock256_t arg1);
317template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1);
318template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1);
319template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1);
320template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1);
321template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1);
322template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1);
323template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1);
324template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1);
325template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
326template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
327template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
328template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
329template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
330template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
331template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
332template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::umin_hl(bitblock256_t arg1, bitblock256_t arg2);
333template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::add_hl(bitblock256_t arg1, bitblock256_t arg2);
334template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::add_hl(bitblock256_t arg1, bitblock256_t arg2);
335template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::add_hl(bitblock256_t arg1, bitblock256_t arg2);
336template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::add_hl(bitblock256_t arg1, bitblock256_t arg2);
337template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::add_hl(bitblock256_t arg1, bitblock256_t arg2);
338template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::add_hl(bitblock256_t arg1, bitblock256_t arg2);
339template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::add_hl(bitblock256_t arg1, bitblock256_t arg2);
340template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::add_hl(bitblock256_t arg1, bitblock256_t arg2);
341template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packss(bitblock256_t arg1, bitblock256_t arg2);
342template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packss(bitblock256_t arg1, bitblock256_t arg2);
343template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packss(bitblock256_t arg1, bitblock256_t arg2);
344template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packss(bitblock256_t arg1, bitblock256_t arg2);
345template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packss(bitblock256_t arg1, bitblock256_t arg2);
346template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packss(bitblock256_t arg1, bitblock256_t arg2);
347template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packss(bitblock256_t arg1, bitblock256_t arg2);
348template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packss(bitblock256_t arg1, bitblock256_t arg2);
349template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<8>::signmask(bitblock256_t arg1);
350template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<16>::signmask(bitblock256_t arg1);
351template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<32>::signmask(bitblock256_t arg1);
352template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<64>::signmask(bitblock256_t arg1);
353template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<128>::signmask(bitblock256_t arg1);
354template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<256>::signmask(bitblock256_t arg1);
355template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packl(bitblock256_t arg1, bitblock256_t arg2);
356template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packl(bitblock256_t arg1, bitblock256_t arg2);
357template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packl(bitblock256_t arg1, bitblock256_t arg2);
358template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packl(bitblock256_t arg1, bitblock256_t arg2);
359template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packl(bitblock256_t arg1, bitblock256_t arg2);
360template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packl(bitblock256_t arg1, bitblock256_t arg2);
361template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packl(bitblock256_t arg1, bitblock256_t arg2);
362template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packl(bitblock256_t arg1, bitblock256_t arg2);
363template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packh(bitblock256_t arg1, bitblock256_t arg2);
364template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packh(bitblock256_t arg1, bitblock256_t arg2);
365template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packh(bitblock256_t arg1, bitblock256_t arg2);
366template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packh(bitblock256_t arg1, bitblock256_t arg2);
367template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packh(bitblock256_t arg1, bitblock256_t arg2);
368template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packh(bitblock256_t arg1, bitblock256_t arg2);
369template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packh(bitblock256_t arg1, bitblock256_t arg2);
370template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packh(bitblock256_t arg1, bitblock256_t arg2);
371template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::min_hl(bitblock256_t arg1, bitblock256_t arg2);
372template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::min_hl(bitblock256_t arg1, bitblock256_t arg2);
373template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::min_hl(bitblock256_t arg1, bitblock256_t arg2);
374template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::min_hl(bitblock256_t arg1, bitblock256_t arg2);
375template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::min_hl(bitblock256_t arg1, bitblock256_t arg2);
376template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::min_hl(bitblock256_t arg1, bitblock256_t arg2);
377template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::min_hl(bitblock256_t arg1, bitblock256_t arg2);
378template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::min_hl(bitblock256_t arg1, bitblock256_t arg2);
379template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packus(bitblock256_t arg1, bitblock256_t arg2);
380template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packus(bitblock256_t arg1, bitblock256_t arg2);
381template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packus(bitblock256_t arg1, bitblock256_t arg2);
382template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packus(bitblock256_t arg1, bitblock256_t arg2);
383template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packus(bitblock256_t arg1, bitblock256_t arg2);
384template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packus(bitblock256_t arg1, bitblock256_t arg2);
385template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packus(bitblock256_t arg1, bitblock256_t arg2);
386template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packus(bitblock256_t arg1, bitblock256_t arg2);
387template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::mergel(bitblock256_t arg1, bitblock256_t arg2);
388template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::mergel(bitblock256_t arg1, bitblock256_t arg2);
389template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::mergel(bitblock256_t arg1, bitblock256_t arg2);
390template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::mergel(bitblock256_t arg1, bitblock256_t arg2);
391template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::mergel(bitblock256_t arg1, bitblock256_t arg2);
392template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::mergel(bitblock256_t arg1, bitblock256_t arg2);
393template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::mergel(bitblock256_t arg1, bitblock256_t arg2);
394template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::mergel(bitblock256_t arg1, bitblock256_t arg2);
395template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::mergeh(bitblock256_t arg1, bitblock256_t arg2);
396template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::mergeh(bitblock256_t arg1, bitblock256_t arg2);
397template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::mergeh(bitblock256_t arg1, bitblock256_t arg2);
398template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::mergeh(bitblock256_t arg1, bitblock256_t arg2);
399template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::mergeh(bitblock256_t arg1, bitblock256_t arg2);
400template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::mergeh(bitblock256_t arg1, bitblock256_t arg2);
401template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::mergeh(bitblock256_t arg1, bitblock256_t arg2);
402template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::mergeh(bitblock256_t arg1, bitblock256_t arg2);
403template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::zeroextendh(bitblock256_t arg1);
404template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::zeroextendh(bitblock256_t arg1);
405template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::zeroextendh(bitblock256_t arg1);
406template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::zeroextendh(bitblock256_t arg1);
407template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::zeroextendh(bitblock256_t arg1);
408template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::zeroextendh(bitblock256_t arg1);
409template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::zeroextendh(bitblock256_t arg1);
410template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::zeroextendh(bitblock256_t arg1);
411template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::zeroextendl(bitblock256_t arg1);
412template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::zeroextendl(bitblock256_t arg1);
413template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::zeroextendl(bitblock256_t arg1);
414template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::zeroextendl(bitblock256_t arg1);
415template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::zeroextendl(bitblock256_t arg1);
416template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::zeroextendl(bitblock256_t arg1);
417template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::zeroextendl(bitblock256_t arg1);
418template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::zeroextendl(bitblock256_t arg1);
419template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::signextendh(bitblock256_t arg1);
420template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::signextendh(bitblock256_t arg1);
421template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::signextendh(bitblock256_t arg1);
422template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::signextendh(bitblock256_t arg1);
423template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::signextendh(bitblock256_t arg1);
424template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::signextendh(bitblock256_t arg1);
425template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::signextendh(bitblock256_t arg1);
426template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::signextendh(bitblock256_t arg1);
427template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::signextendl(bitblock256_t arg1);
428template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::signextendl(bitblock256_t arg1);
429template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::signextendl(bitblock256_t arg1);
430template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::signextendl(bitblock256_t arg1);
431template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::signextendl(bitblock256_t arg1);
432template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::signextendl(bitblock256_t arg1);
433template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::signextendl(bitblock256_t arg1);
434template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::signextendl(bitblock256_t arg1);
435template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dsrli(bitblock256_t arg1, bitblock256_t arg2);
436template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::dsrli(bitblock256_t arg1, bitblock256_t arg2);
437template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::dsrli(bitblock256_t arg1, bitblock256_t arg2);
438template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::dsrli(bitblock256_t arg1, bitblock256_t arg2);
439template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::dsrli(bitblock256_t arg1, bitblock256_t arg2);
440template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::dsrli(bitblock256_t arg1, bitblock256_t arg2);
441template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dsrli(bitblock256_t arg1, bitblock256_t arg2);
442template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dsrli(bitblock256_t arg1, bitblock256_t arg2);
443template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill(uint64_t val1);
444template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill(uint64_t val1);
445template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill(uint64_t val1);
446template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill(uint64_t val1);
447template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill(uint64_t val1);
448template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill(uint64_t val1);
449template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<1>::extract(bitblock256_t arg1);
450template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<2>::extract(bitblock256_t arg1);
451template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<4>::extract(bitblock256_t arg1);
452template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<8>::extract(bitblock256_t arg1);
453template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<16>::extract(bitblock256_t arg1);
454template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<32>::extract(bitblock256_t arg1);
455template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<64>::extract(bitblock256_t arg1);
456template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::splat(bitblock256_t arg1);
457template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::splat(bitblock256_t arg1);
458template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::splat(bitblock256_t arg1);
459template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::splat(bitblock256_t arg1);
460template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::splat(bitblock256_t arg1);
461template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::splat(bitblock256_t arg1);
462template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::splat(bitblock256_t arg1);
463template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::splat(bitblock256_t arg1);
464template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::splat(bitblock256_t arg1);
465template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
466template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
467template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
468template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
469template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
470template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::slli(bitblock256_t arg1);
471template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::slli(bitblock256_t arg1);
472template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::slli(bitblock256_t arg1);
473template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::slli(bitblock256_t arg1);
474template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::slli(bitblock256_t arg1);
475template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::slli(bitblock256_t arg1);
476template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::slli(bitblock256_t arg1);
477template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::slli(bitblock256_t arg1);
478template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
479template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
480template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
481template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
482template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
483template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
484template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::srli(bitblock256_t arg1);
485template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::srli(bitblock256_t arg1);
486template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::srli(bitblock256_t arg1);
487template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::srli(bitblock256_t arg1);
488template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::srli(bitblock256_t arg1);
489template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::srli(bitblock256_t arg1);
490template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::srli(bitblock256_t arg1);
491template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::srli(bitblock256_t arg1);
492template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill2(uint64_t val1, uint64_t val2);
493template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill2(uint64_t val1, uint64_t val2);
494template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill2(uint64_t val1, uint64_t val2);
495template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill2(uint64_t val1, uint64_t val2);
496template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill2(uint64_t val1, uint64_t val2);
497template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill2(uint64_t val1, uint64_t val2);
498template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dslli(bitblock256_t arg1, bitblock256_t arg2);
499template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::dslli(bitblock256_t arg1, bitblock256_t arg2);
500template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::dslli(bitblock256_t arg1, bitblock256_t arg2);
501template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::dslli(bitblock256_t arg1, bitblock256_t arg2);
502template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::dslli(bitblock256_t arg1, bitblock256_t arg2);
503template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::dslli(bitblock256_t arg1, bitblock256_t arg2);
504template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dslli(bitblock256_t arg1, bitblock256_t arg2);
505template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dslli(bitblock256_t arg1, bitblock256_t arg2);
506template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
507template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
508template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
509template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
510template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
511template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
512
513//Implementation Part
514
515#define avx_move_lo128_to_hi128(x) \
516        _mm256_permute2f128_ps(x, x, 0 + 8)
517
518#define avx_select_lo128(x) \
519        ((__m128i) _mm256_castps256_ps128(x))
520
521#define avx_move_hi128_to_lo128(x) \
522        _mm256_permute2f128_ps(x, x, 1 + 128)
523
524#define avx_select_hi128(x) \
525        ((__m128i)(_mm256_extractf128_ps(x, 1)))
526
527#define avx_byte_shift_right(x, y) \
528        ((bitblock256_t)avx_general_combine256(_mm_srli_si128(avx_select_hi128(x), y), _mm_srli_si128(avx_select_lo128(x), y)))
529
530#define avx_byte_shift_left(x, y) \
531        ((bitblock256_t)avx_general_combine256(_mm_slli_si128(avx_select_hi128(x), y), _mm_slli_si128(avx_select_lo128(x), y)))
532
533#define avx_general_combine256(x, y) \
534   (_mm256_insertf128_ps(_mm256_castps128_ps256((__m128) y), (__m128) x, 1))
535//The total number of operations is 2
536IDISA_ALWAYS_INLINE bitblock256_t simd_nor(bitblock256_t arg1, bitblock256_t arg2)
537{
538        return simd_not(simd_or(arg1, arg2));
539}
540
541//The total number of operations is 1
542IDISA_ALWAYS_INLINE bitblock256_t simd_not(bitblock256_t arg1)
543{
544        return simd_xor(arg1, simd256<32>::constant<-1>());
545}
546
547//The total number of operations is 1
548IDISA_ALWAYS_INLINE bitblock256_t simd_andc(bitblock256_t arg1, bitblock256_t arg2)
549{
550        return _mm256_andnot_ps(arg2, arg1);
551}
552
553//The total number of operations is 1
554IDISA_ALWAYS_INLINE bitblock256_t simd_or(bitblock256_t arg1, bitblock256_t arg2)
555{
556        return _mm256_or_ps(arg1, arg2);
557}
558
559//The total number of operations is 1
560IDISA_ALWAYS_INLINE bitblock256_t simd_and(bitblock256_t arg1, bitblock256_t arg2)
561{
562        return _mm256_and_ps(arg1, arg2);
563}
564
565//The total number of operations is 1
566IDISA_ALWAYS_INLINE bitblock256_t simd_xor(bitblock256_t arg1, bitblock256_t arg2)
567{
568        return _mm256_xor_ps(arg1, arg2);
569}
570
571//The total number of operations is 95
572template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::max(bitblock256_t arg1, bitblock256_t arg2)
573{
574        bitblock256_t high_bit = simd256<1>::constant<(1)>();
575        return simd_xor(simd256<1>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
576}
577
578//The total number of operations is 47
579template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::max(bitblock256_t arg1, bitblock256_t arg2)
580{
581        bitblock256_t high_bit = simd256<2>::constant<(2)>();
582        return simd_xor(simd256<2>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
583}
584
585//The total number of operations is 23
586template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::max(bitblock256_t arg1, bitblock256_t arg2)
587{
588        bitblock256_t high_bit = simd256<4>::constant<(8)>();
589        return simd_xor(simd256<4>::umax(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
590}
591
592//The total number of operations is 8
593template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::max(bitblock256_t arg1, bitblock256_t arg2)
594{
595        return avx_general_combine256(_mm_max_epi8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epi8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
596}
597
598//The total number of operations is 8
599template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::max(bitblock256_t arg1, bitblock256_t arg2)
600{
601        return avx_general_combine256(_mm_max_epi16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epi16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
602}
603
604//The total number of operations is 8
605template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::max(bitblock256_t arg1, bitblock256_t arg2)
606{
607        return avx_general_combine256(_mm_max_epi32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epi32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
608}
609
610//The total number of operations is 11
611template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::max(bitblock256_t arg1, bitblock256_t arg2)
612{
613        return simd256<1>::ifh(simd256<64>::gt(arg1, arg2), arg1, arg2);
614}
615
616//The total number of operations is 88
617template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::max(bitblock256_t arg1, bitblock256_t arg2)
618{
619        bitblock256_t hiAns = simd256<(64)>::max(arg1, arg2);
620        bitblock256_t loAns = simd256<(64)>::umax(arg1, arg2);
621        bitblock256_t eqMask1 = simd256<128>::srli<(64)>(simd256<(64)>::eq(hiAns, arg1));
622        bitblock256_t eqMask2 = simd256<128>::srli<(64)>(simd256<(64)>::eq(hiAns, arg2));
623        return simd256<1>::ifh(simd256<128>::himask(), hiAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, loAns, arg1), arg2));
624}
625
626//The total number of operations is 352
627template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::max(bitblock256_t arg1, bitblock256_t arg2)
628{
629        bitblock256_t hiAns = simd256<(128)>::max(arg1, arg2);
630        bitblock256_t loAns = simd256<(128)>::umax(arg1, arg2);
631        bitblock256_t eqMask1 = simd256<256>::srli<(128)>(simd256<(128)>::eq(hiAns, arg1));
632        bitblock256_t eqMask2 = simd256<256>::srli<(128)>(simd256<(128)>::eq(hiAns, arg2));
633        return simd256<1>::ifh(simd256<256>::himask(), hiAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, loAns, arg1), arg2));
634}
635
636//The total number of operations is 216
637template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::mult(bitblock256_t arg1, bitblock256_t arg2)
638{
639        bitblock256_t loMask = simd256<(2)>::lomask();
640        bitblock256_t tmpAns1 = simd256<(2)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2));
641        bitblock256_t tmpAns2 = simd256<(2)>::mult(simd256<(2)>::srli<1>(arg1), simd256<(2)>::srli<1>(arg2));
642        return simd256<1>::ifh(loMask, tmpAns1, simd256<(2)>::slli<1>(tmpAns2));
643}
644
645//The total number of operations is 95
646template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::mult(bitblock256_t arg1, bitblock256_t arg2)
647{
648        bitblock256_t tmp1 = simd256<256>::slli<1>(arg1);
649        bitblock256_t tmp2 = simd256<256>::slli<1>(arg2);
650        return simd256<1>::ifh(simd256<2>::himask(), simd_or(simd_and(tmp1, simd_and(arg2, simd_or(simd_not(arg1), simd_not(tmp2)))), simd_and(arg1, simd_and(tmp2, simd_or(simd_not(tmp1), simd_not(arg2))))), simd_and(arg1, arg2));
651}
652
653//The total number of operations is 104
654template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::mult(bitblock256_t arg1, bitblock256_t arg2)
655{
656        bitblock256_t loMask = simd256<(8)>::lomask();
657        bitblock256_t tmpAns1 = simd256<(8)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2));
658        bitblock256_t tmpAns2 = simd256<(8)>::mult(simd256<(8)>::srli<4>(arg1), simd256<(8)>::srli<4>(arg2));
659        return simd256<1>::ifh(loMask, tmpAns1, simd256<(8)>::slli<4>(tmpAns2));
660}
661
662//The total number of operations is 39
663template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::mult(bitblock256_t arg1, bitblock256_t arg2)
664{
665        bitblock256_t loMask = simd256<(16)>::lomask();
666        bitblock256_t tmpAns1 = simd256<(16)>::mult(simd_and(loMask, arg1), simd_and(loMask, arg2));
667        bitblock256_t tmpAns2 = simd256<(16)>::mult(simd256<(16)>::srli<8>(arg1), simd256<(16)>::srli<8>(arg2));
668        return simd256<1>::ifh(loMask, tmpAns1, simd256<(16)>::slli<8>(tmpAns2));
669}
670
671//The total number of operations is 8
672template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::mult(bitblock256_t arg1, bitblock256_t arg2)
673{
674        return avx_general_combine256(_mm_mullo_epi16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_mullo_epi16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
675}
676
677//The total number of operations is 8
678template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::mult(bitblock256_t arg1, bitblock256_t arg2)
679{
680        return avx_general_combine256(_mm_mullo_epi32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_mullo_epi32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
681}
682
683//The total number of operations is 66
684template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::mult(bitblock256_t arg1, bitblock256_t arg2)
685{
686        bitblock256_t loMask = simd256<64>::lomask();
687        bitblock256_t arg1_low = simd_and(arg1, loMask);
688        bitblock256_t arg1_high = simd256<64>::srli<(32)>(arg1);
689        bitblock256_t arg2_low = simd_and(arg2, loMask);
690        bitblock256_t arg2_high = simd256<64>::srli<(32)>(arg2);
691        bitblock256_t tmpAns1 = simd256<(32)>::umult(arg1_low, arg2_low);
692        bitblock256_t tmpAns2 = simd256<64>::slli<(32)>(simd256<(32)>::umult(arg1_low, arg2_high));
693        bitblock256_t tmpAns3 = simd256<64>::slli<(32)>(simd256<(32)>::umult(arg1_high, arg2_low));
694        return simd256<64>::add(tmpAns1, simd256<64>::add(tmpAns2, tmpAns3));
695}
696
697//The total number of operations is 877
698template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::mult(bitblock256_t arg1, bitblock256_t arg2)
699{
700        bitblock256_t loMask = simd256<128>::lomask();
701        bitblock256_t arg1_low = simd_and(arg1, loMask);
702        bitblock256_t arg1_high = simd256<128>::srli<(64)>(arg1);
703        bitblock256_t arg2_low = simd_and(arg2, loMask);
704        bitblock256_t arg2_high = simd256<128>::srli<(64)>(arg2);
705        bitblock256_t tmpAns1 = simd256<(64)>::umult(arg1_low, arg2_low);
706        bitblock256_t tmpAns2 = simd256<128>::slli<(64)>(simd256<(64)>::umult(arg1_low, arg2_high));
707        bitblock256_t tmpAns3 = simd256<128>::slli<(64)>(simd256<(64)>::umult(arg1_high, arg2_low));
708        return simd256<128>::add(tmpAns1, simd256<128>::add(tmpAns2, tmpAns3));
709}
710
711//The total number of operations is 5001
712template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::mult(bitblock256_t arg1, bitblock256_t arg2)
713{
714        bitblock256_t loMask = simd256<256>::lomask();
715        bitblock256_t arg1_low = simd_and(arg1, loMask);
716        bitblock256_t arg1_high = simd256<256>::srli<(128)>(arg1);
717        bitblock256_t arg2_low = simd_and(arg2, loMask);
718        bitblock256_t arg2_high = simd256<256>::srli<(128)>(arg2);
719        bitblock256_t tmpAns1 = simd256<(128)>::umult(arg1_low, arg2_low);
720        bitblock256_t tmpAns2 = simd256<256>::slli<(128)>(simd256<(128)>::umult(arg1_low, arg2_high));
721        bitblock256_t tmpAns3 = simd256<256>::slli<(128)>(simd256<(128)>::umult(arg1_high, arg2_low));
722        return simd256<256>::add(tmpAns1, simd256<256>::add(tmpAns2, tmpAns3));
723}
724
725//The total number of operations is 131
726template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::gt(bitblock256_t arg1, bitblock256_t arg2)
727{
728        bitblock256_t high_bit = simd256<1>::constant<(1)>();
729        return simd256<1>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
730}
731
732//The total number of operations is 63
733template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::gt(bitblock256_t arg1, bitblock256_t arg2)
734{
735        bitblock256_t high_bit = simd256<2>::constant<(2)>();
736        return simd256<2>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
737}
738
739//The total number of operations is 29
740template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::gt(bitblock256_t arg1, bitblock256_t arg2)
741{
742        bitblock256_t high_bit = simd256<4>::constant<(8)>();
743        return simd256<4>::ugt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
744}
745
746//The total number of operations is 8
747template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::gt(bitblock256_t arg1, bitblock256_t arg2)
748{
749        return avx_general_combine256(_mm_cmpgt_epi8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpgt_epi8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
750}
751
752//The total number of operations is 8
753template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::gt(bitblock256_t arg1, bitblock256_t arg2)
754{
755        return avx_general_combine256(_mm_cmpgt_epi16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpgt_epi16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
756}
757
758//The total number of operations is 8
759template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::gt(bitblock256_t arg1, bitblock256_t arg2)
760{
761        return avx_general_combine256(_mm_cmpgt_epi32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpgt_epi32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
762}
763
764//The total number of operations is 8
765template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::gt(bitblock256_t arg1, bitblock256_t arg2)
766{
767        return avx_general_combine256(_mm_cmpgt_epi64(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpgt_epi64(avx_select_lo128(arg1), avx_select_lo128(arg2)));
768}
769
770//The total number of operations is 151
771template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::gt(bitblock256_t arg1, bitblock256_t arg2)
772{
773        bitblock256_t hiAns = simd256<(64)>::gt(arg1, arg2);
774        bitblock256_t loAns = simd256<(64)>::ugt(arg1, arg2);
775        bitblock256_t mask = simd_and(loAns, simd256<128>::srli<(64)>(simd256<(64)>::eq(arg1, arg2)));
776        mask = simd_or(mask, simd256<128>::slli<(64)>(mask));
777        return simd_or(simd256<128>::srai<(64)>(hiAns), mask);
778}
779
780//The total number of operations is 646
781template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::gt(bitblock256_t arg1, bitblock256_t arg2)
782{
783        bitblock256_t hiAns = simd256<(128)>::gt(arg1, arg2);
784        bitblock256_t loAns = simd256<(128)>::ugt(arg1, arg2);
785        bitblock256_t mask = simd_and(loAns, simd256<256>::srli<(128)>(simd256<(128)>::eq(arg1, arg2)));
786        mask = simd_or(mask, simd256<256>::slli<(128)>(mask));
787        return simd_or(simd256<256>::srai<(128)>(hiAns), mask);
788}
789
790//The total number of operations is 978
791template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umult(bitblock256_t arg1, bitblock256_t arg2)
792{
793        bitblock256_t loMask = simd256<(2)>::lomask();
794        bitblock256_t tmpAns1 = simd256<(2)>::umult(simd_and(loMask, arg1), simd_and(loMask, arg2));
795        bitblock256_t tmpAns2 = simd256<(2)>::umult(simd_and(loMask, simd256<(4)>::srli<(2)>(arg1)), simd_and(loMask, simd256<(4)>::srli<(2)>(arg2)));
796        return simd_or(tmpAns1, simd256<(4)>::slli<(2)>(tmpAns2));
797}
798
799//The total number of operations is 476
800template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umult(bitblock256_t arg1, bitblock256_t arg2)
801{
802        bitblock256_t loMask = simd256<(4)>::lomask();
803        bitblock256_t tmpAns1 = simd256<(4)>::umult(simd_and(loMask, arg1), simd_and(loMask, arg2));
804        bitblock256_t tmpAns2 = simd256<(4)>::umult(simd_and(loMask, simd256<(8)>::srli<(4)>(arg1)), simd_and(loMask, simd256<(8)>::srli<(4)>(arg2)));
805        return simd_or(tmpAns1, simd256<(8)>::slli<(4)>(tmpAns2));
806}
807
808//The total number of operations is 225
809template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umult(bitblock256_t arg1, bitblock256_t arg2)
810{
811        bitblock256_t loMask = simd256<(8)>::lomask();
812        bitblock256_t tmpAns1 = simd256<(8)>::umult(simd_and(loMask, arg1), simd_and(loMask, arg2));
813        bitblock256_t tmpAns2 = simd256<(8)>::umult(simd_and(loMask, simd256<(16)>::srli<(8)>(arg1)), simd_and(loMask, simd256<(16)>::srli<(8)>(arg2)));
814        return simd_or(tmpAns1, simd256<(16)>::slli<(8)>(tmpAns2));
815}
816
817//The total number of operations is 101
818template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umult(bitblock256_t arg1, bitblock256_t arg2)
819{
820        bitblock256_t loMask = simd256<(16)>::lomask();
821        bitblock256_t tmpAns1 = simd256<(16)>::umult(simd_and(loMask, arg1), simd_and(loMask, arg2));
822        bitblock256_t tmpAns2 = simd256<(16)>::umult(simd_and(loMask, simd256<(32)>::srli<(16)>(arg1)), simd_and(loMask, simd256<(32)>::srli<(16)>(arg2)));
823        return simd_or(tmpAns1, simd256<(32)>::slli<(16)>(tmpAns2));
824}
825
826//The total number of operations is 39
827template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umult(bitblock256_t arg1, bitblock256_t arg2)
828{
829        bitblock256_t loMask = simd256<(32)>::lomask();
830        bitblock256_t tmpAns1 = simd256<(32)>::umult(simd_and(loMask, arg1), simd_and(loMask, arg2));
831        bitblock256_t tmpAns2 = simd256<(32)>::umult(simd_and(loMask, simd256<(64)>::srli<(32)>(arg1)), simd_and(loMask, simd256<(64)>::srli<(32)>(arg2)));
832        return simd_or(tmpAns1, simd256<(64)>::slli<(32)>(tmpAns2));
833}
834
835//The total number of operations is 8
836template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umult(bitblock256_t arg1, bitblock256_t arg2)
837{
838        return avx_general_combine256(_mm_mul_epu32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_mul_epu32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
839}
840
841//The total number of operations is 237
842template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umult(bitblock256_t arg1, bitblock256_t arg2)
843{
844        bitblock256_t loMask1 = simd256<(128)>::lomask();
845        bitblock256_t arg11 = simd_and(arg1, loMask1);
846        bitblock256_t arg22 = simd_and(arg2, loMask1);
847        bitblock256_t loMask2 = simd256<64>::lomask();
848        bitblock256_t arg1_low = simd_and(arg11, loMask2);
849        bitblock256_t arg1_high = simd256<64>::srli<(32)>(arg11);
850        bitblock256_t arg2_low = simd_and(arg22, loMask2);
851        bitblock256_t arg2_high = simd256<64>::srli<(32)>(arg22);
852        bitblock256_t tmpAns1 = simd256<(32)>::umult(arg1_low, arg2_low);
853        bitblock256_t tmpAns2 = simd256<(128)>::slli<(32)>(simd256<(32)>::umult(arg1_low, arg2_high));
854        bitblock256_t tmpAns3 = simd256<(128)>::slli<(32)>(simd256<(32)>::umult(arg1_high, arg2_low));
855        bitblock256_t tmpAns4 = simd256<(128)>::slli<64>(simd256<(32)>::umult(arg1_high, arg2_high));
856        return simd256<(128)>::add(tmpAns1, simd256<(128)>::add(tmpAns2, simd256<(128)>::add(tmpAns3, tmpAns4)));
857}
858
859//The total number of operations is 1521
860template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umult(bitblock256_t arg1, bitblock256_t arg2)
861{
862        bitblock256_t loMask1 = simd256<(256)>::lomask();
863        bitblock256_t arg11 = simd_and(arg1, loMask1);
864        bitblock256_t arg22 = simd_and(arg2, loMask1);
865        bitblock256_t loMask2 = simd256<128>::lomask();
866        bitblock256_t arg1_low = simd_and(arg11, loMask2);
867        bitblock256_t arg1_high = simd256<128>::srli<(64)>(arg11);
868        bitblock256_t arg2_low = simd_and(arg22, loMask2);
869        bitblock256_t arg2_high = simd256<128>::srli<(64)>(arg22);
870        bitblock256_t tmpAns1 = simd256<(64)>::umult(arg1_low, arg2_low);
871        bitblock256_t tmpAns2 = simd256<(256)>::slli<(64)>(simd256<(64)>::umult(arg1_low, arg2_high));
872        bitblock256_t tmpAns3 = simd256<(256)>::slli<(64)>(simd256<(64)>::umult(arg1_high, arg2_low));
873        bitblock256_t tmpAns4 = simd256<(256)>::slli<128>(simd256<(64)>::umult(arg1_high, arg2_high));
874        return simd256<(256)>::add(tmpAns1, simd256<(256)>::add(tmpAns2, simd256<(256)>::add(tmpAns3, tmpAns4)));
875}
876
877//The total number of operations is 187
878template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ult(bitblock256_t arg1, bitblock256_t arg2)
879{
880        return simd_or(simd_and(simd256<(2)>::himask(), simd256<(2)>::ult(simd_and(simd256<(2)>::himask(), arg1), simd_and(simd256<(2)>::himask(), arg2))), simd_and(simd256<(2)>::lomask(), simd256<(2)>::ult(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2))));
881}
882
883//The total number of operations is 90
884template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ult(bitblock256_t arg1, bitblock256_t arg2)
885{
886        bitblock256_t tmp = simd_not(arg1);
887        bitblock256_t tmpAns = simd_or(simd_and(tmp, arg2), simd_and(simd256<256>::slli<1>(simd_and(tmp, arg2)), simd_or(tmp, arg2)));
888        return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<256>::srli<1>(tmpAns));
889}
890
891//The total number of operations is 49
892template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ult(bitblock256_t arg1, bitblock256_t arg2)
893{
894        return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::ult(simd_and(simd256<(8)>::himask(), arg1), simd_and(simd256<(8)>::himask(), arg2))), simd_and(simd256<(8)>::lomask(), simd256<(8)>::ult(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2))));
895}
896
897//The total number of operations is 21
898template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ult(bitblock256_t arg1, bitblock256_t arg2)
899{
900        bitblock256_t high_bit = simd256<8>::constant<(128)>();
901        return simd256<8>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
902}
903
904//The total number of operations is 21
905template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ult(bitblock256_t arg1, bitblock256_t arg2)
906{
907        bitblock256_t high_bit = simd256<16>::constant<(32768)>();
908        return simd256<16>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
909}
910
911//The total number of operations is 21
912template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ult(bitblock256_t arg1, bitblock256_t arg2)
913{
914        bitblock256_t high_bit = simd256<32>::constant<(2147483648UL)>();
915        return simd256<32>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
916}
917
918//The total number of operations is 21
919template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ult(bitblock256_t arg1, bitblock256_t arg2)
920{
921        bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808UL)>();
922        return simd256<64>::lt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
923}
924
925//The total number of operations is 154
926template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ult(bitblock256_t arg1, bitblock256_t arg2)
927{
928        bitblock256_t tmpAns = simd256<(64)>::ult(arg1, arg2);
929        bitblock256_t mask = simd_and(tmpAns, simd256<128>::srli<(64)>(simd256<(64)>::eq(arg1, arg2)));
930        mask = simd_or(mask, simd256<128>::slli<(64)>(mask));
931        return simd_or(simd256<128>::srai<(64)>(tmpAns), mask);
932}
933
934//The total number of operations is 496
935template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ult(bitblock256_t arg1, bitblock256_t arg2)
936{
937        return simd_and(simd256<256>::srai<(255)>(simd_or(simd_and(simd_not(arg1), arg2), simd_and(simd_not(simd_xor(arg1, arg2)), simd256<256>::sub(arg1, arg2)))), simd_not(simd256<256>::eq(arg1, arg2)));
938}
939
940//The total number of operations is 189
941template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::lt(bitblock256_t arg1, bitblock256_t arg2)
942{
943        bitblock256_t high_bit = simd256<1>::constant<(1)>();
944        return simd256<1>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
945}
946
947//The total number of operations is 91
948template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lt(bitblock256_t arg1, bitblock256_t arg2)
949{
950        bitblock256_t tmp = simd_not(arg2);
951        bitblock256_t tmpAns = simd_or(simd_and(arg1, tmp), simd_and(simd256<256>::slli<1>(simd_and(simd_not(arg1), arg2)), simd_or(arg1, tmp)));
952        return simd256<1>::ifh(simd256<2>::himask(), tmpAns, simd256<256>::srli<1>(tmpAns));
953}
954
955//The total number of operations is 51
956template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lt(bitblock256_t arg1, bitblock256_t arg2)
957{
958        bitblock256_t high_bit = simd256<4>::constant<(8)>();
959        return simd256<4>::ult(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
960}
961
962//The total number of operations is 19
963template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lt(bitblock256_t arg1, bitblock256_t arg2)
964{
965        return simd_and(simd_not(simd256<8>::gt(arg1, arg2)), simd_not(simd256<8>::eq(arg1, arg2)));
966}
967
968//The total number of operations is 19
969template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lt(bitblock256_t arg1, bitblock256_t arg2)
970{
971        return simd_and(simd_not(simd256<16>::gt(arg1, arg2)), simd_not(simd256<16>::eq(arg1, arg2)));
972}
973
974//The total number of operations is 19
975template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lt(bitblock256_t arg1, bitblock256_t arg2)
976{
977        return simd_and(simd_not(simd256<32>::gt(arg1, arg2)), simd_not(simd256<32>::eq(arg1, arg2)));
978}
979
980//The total number of operations is 19
981template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lt(bitblock256_t arg1, bitblock256_t arg2)
982{
983        return simd_and(simd_not(simd256<64>::gt(arg1, arg2)), simd_not(simd256<64>::eq(arg1, arg2)));
984}
985
986//The total number of operations is 173
987template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lt(bitblock256_t arg1, bitblock256_t arg2)
988{
989        bitblock256_t hiAns = simd256<(64)>::lt(arg1, arg2);
990        bitblock256_t loAns = simd256<(64)>::ult(arg1, arg2);
991        bitblock256_t mask = simd_and(loAns, simd256<128>::srli<(64)>(simd256<(64)>::eq(arg1, arg2)));
992        mask = simd_or(mask, simd256<128>::slli<(64)>(mask));
993        return simd_or(simd256<128>::srai<(64)>(hiAns), mask);
994}
995
996//The total number of operations is 679
997template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lt(bitblock256_t arg1, bitblock256_t arg2)
998{
999        bitblock256_t hiAns = simd256<(128)>::lt(arg1, arg2);
1000        bitblock256_t loAns = simd256<(128)>::ult(arg1, arg2);
1001        bitblock256_t mask = simd_and(loAns, simd256<256>::srli<(128)>(simd256<(128)>::eq(arg1, arg2)));
1002        mask = simd_or(mask, simd256<256>::slli<(128)>(mask));
1003        return simd_or(simd256<256>::srai<(128)>(hiAns), mask);
1004}
1005
1006//The total number of operations is 7
1007template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srli(bitblock256_t arg1)
1008{
1009        return simd_and(simd256<32>::srli<sh>(arg1), simd256<2>::constant<((3)>>sh)>());
1010}
1011
1012//The total number of operations is 7
1013template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srli(bitblock256_t arg1)
1014{
1015        return simd_and(simd256<32>::srli<sh>(arg1), simd256<4>::constant<((15)>>sh)>());
1016}
1017
1018//The total number of operations is 7
1019template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srli(bitblock256_t arg1)
1020{
1021        return simd_and(simd256<32>::srli<sh>(arg1), simd256<8>::constant<((255)>>sh)>());
1022}
1023
1024//The total number of operations is 6
1025template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srli(bitblock256_t arg1)
1026{
1027        return avx_general_combine256(_mm_srli_epi16(avx_select_hi128(arg1), (int32_t)(sh)), _mm_srli_epi16(avx_select_lo128(arg1), (int32_t)(sh)));
1028}
1029
1030//The total number of operations is 6
1031template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srli(bitblock256_t arg1)
1032{
1033        return avx_general_combine256(_mm_srli_epi32(avx_select_hi128(arg1), (int32_t)(sh)), _mm_srli_epi32(avx_select_lo128(arg1), (int32_t)(sh)));
1034}
1035
1036//The total number of operations is 6
1037template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srli(bitblock256_t arg1)
1038{
1039        return avx_general_combine256(_mm_srli_epi64(avx_select_hi128(arg1), (int32_t)(sh)), _mm_srli_epi64(avx_select_lo128(arg1), (int32_t)(sh)));
1040}
1041
1042//The total number of operations is 19
1043template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srli(bitblock256_t arg1)
1044{
1045        return (((sh%8) == 0) ? avx_byte_shift_right(arg1, (sh/8)) : ((sh >= 64) ? simd256<64>::srli<(sh-64)>(avx_byte_shift_right(arg1, 8)) : simd_or(simd256<64>::srli<sh>(arg1), avx_byte_shift_right(simd256<64>::slli<(64-sh)>(arg1), 8))));
1046}
1047
1048//The total number of operations is 41
1049template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srli(bitblock256_t arg1)
1050{
1051        return ((sh < 128) ? simd_or(simd256<128>::srli<sh>(arg1), simd256<128>::slli<(128-sh)>(((bitblock256_t)_mm256_castsi128_si256(avx_select_hi128(arg1))))) : simd256<128>::srli<(sh-128)>(avx_move_hi128_to_lo128(arg1)));
1052}
1053
1054//The total number of operations is 1
1055template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ctz(bitblock256_t arg1)
1056{
1057        return simd_not(arg1);
1058}
1059
1060//The total number of operations is 60
1061template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ctz(bitblock256_t arg1)
1062{
1063        return simd256<2>::popcount(simd_andc(simd256<2>::sub(arg1, simd256<2>::constant<1>()), arg1));
1064}
1065
1066//The total number of operations is 52
1067template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ctz(bitblock256_t arg1)
1068{
1069        return simd256<4>::popcount(simd_andc(simd256<4>::sub(arg1, simd256<4>::constant<1>()), arg1));
1070}
1071
1072//The total number of operations is 56
1073template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ctz(bitblock256_t arg1)
1074{
1075        return simd256<8>::popcount(simd_andc(simd256<8>::sub(arg1, simd256<8>::constant<1>()), arg1));
1076}
1077
1078//The total number of operations is 71
1079template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ctz(bitblock256_t arg1)
1080{
1081        return simd256<16>::popcount(simd_andc(simd256<16>::sub(arg1, simd256<16>::constant<1>()), arg1));
1082}
1083
1084//The total number of operations is 86
1085template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ctz(bitblock256_t arg1)
1086{
1087        return simd256<32>::popcount(simd_andc(simd256<32>::sub(arg1, simd256<32>::constant<1>()), arg1));
1088}
1089
1090//The total number of operations is 64
1091template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ctz(bitblock256_t arg1)
1092{
1093        return simd256<64>::popcount(simd_andc(simd256<64>::sub(arg1, simd256<64>::constant<1>()), arg1));
1094}
1095
1096//The total number of operations is 164
1097template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ctz(bitblock256_t arg1)
1098{
1099        return simd256<128>::popcount(simd_andc(simd256<128>::sub(arg1, simd256<128>::constant<1>()), arg1));
1100}
1101
1102//The total number of operations is 343
1103template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ctz(bitblock256_t arg1)
1104{
1105        return simd256<256>::popcount(simd_andc(simd256<256>::sub(arg1, simd256<256>::constant<1>()), arg1));
1106}
1107
1108//The total number of operations is 129
1109template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ugt(bitblock256_t arg1, bitblock256_t arg2)
1110{
1111        return simd_or(simd_and(simd256<(2)>::himask(), simd256<(2)>::ugt(simd_and(simd256<(2)>::himask(), arg1), simd_and(simd256<(2)>::himask(), arg2))), simd_and(simd256<(2)>::lomask(), simd256<(2)>::ugt(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2))));
1112}
1113
1114//The total number of operations is 61
1115template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ugt(bitblock256_t arg1, bitblock256_t arg2)
1116{
1117        return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::ugt(simd_and(simd256<(4)>::himask(), arg1), simd_and(simd256<(4)>::himask(), arg2))), simd_and(simd256<(4)>::lomask(), simd256<(4)>::ugt(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2))));
1118}
1119
1120//The total number of operations is 27
1121template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ugt(bitblock256_t arg1, bitblock256_t arg2)
1122{
1123        return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::ugt(simd_and(simd256<(8)>::himask(), arg1), simd_and(simd256<(8)>::himask(), arg2))), simd_and(simd256<(8)>::lomask(), simd256<(8)>::ugt(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2))));
1124}
1125
1126//The total number of operations is 10
1127template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ugt(bitblock256_t arg1, bitblock256_t arg2)
1128{
1129        bitblock256_t high_bit = simd256<8>::constant<(128)>();
1130        return simd256<8>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
1131}
1132
1133//The total number of operations is 10
1134template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ugt(bitblock256_t arg1, bitblock256_t arg2)
1135{
1136        bitblock256_t high_bit = simd256<16>::constant<(32768)>();
1137        return simd256<16>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
1138}
1139
1140//The total number of operations is 10
1141template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ugt(bitblock256_t arg1, bitblock256_t arg2)
1142{
1143        bitblock256_t high_bit = simd256<32>::constant<(2147483648UL)>();
1144        return simd256<32>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
1145}
1146
1147//The total number of operations is 10
1148template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ugt(bitblock256_t arg1, bitblock256_t arg2)
1149{
1150        bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808UL)>();
1151        return simd256<64>::gt(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit));
1152}
1153
1154//The total number of operations is 143
1155template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ugt(bitblock256_t arg1, bitblock256_t arg2)
1156{
1157        bitblock256_t tmpAns = simd256<(64)>::ugt(arg1, arg2);
1158        bitblock256_t mask = simd_and(tmpAns, simd256<128>::srli<(64)>(simd256<(64)>::eq(arg1, arg2)));
1159        mask = simd_or(mask, simd256<128>::slli<(64)>(mask));
1160        return simd_or(simd256<128>::srai<(64)>(tmpAns), mask);
1161}
1162
1163//The total number of operations is 495
1164template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ugt(bitblock256_t arg1, bitblock256_t arg2)
1165{
1166        bitblock256_t tmpAns = simd256<(128)>::ugt(arg1, arg2);
1167        bitblock256_t mask = simd_and(tmpAns, simd256<256>::srli<(128)>(simd256<(128)>::eq(arg1, arg2)));
1168        mask = simd_or(mask, simd256<256>::slli<(128)>(mask));
1169        return simd_or(simd256<256>::srai<(128)>(tmpAns), mask);
1170}
1171
1172//The total number of operations is 9
1173template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::xor_hl(bitblock256_t arg1)
1174{
1175        return simd_xor(simd256<2>::srli<(1)>(arg1), simd_and(arg1, simd256<2>::lomask()));
1176}
1177
1178//The total number of operations is 9
1179template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::xor_hl(bitblock256_t arg1)
1180{
1181        return simd_xor(simd256<4>::srli<(2)>(arg1), simd_and(arg1, simd256<4>::lomask()));
1182}
1183
1184//The total number of operations is 9
1185template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::xor_hl(bitblock256_t arg1)
1186{
1187        return simd_xor(simd256<8>::srli<(4)>(arg1), simd_and(arg1, simd256<8>::lomask()));
1188}
1189
1190//The total number of operations is 8
1191template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::xor_hl(bitblock256_t arg1)
1192{
1193        return simd_xor(simd256<16>::srli<(8)>(arg1), simd_and(arg1, simd256<16>::lomask()));
1194}
1195
1196//The total number of operations is 8
1197template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::xor_hl(bitblock256_t arg1)
1198{
1199        return simd_xor(simd256<32>::srli<(16)>(arg1), simd_and(arg1, simd256<32>::lomask()));
1200}
1201
1202//The total number of operations is 8
1203template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::xor_hl(bitblock256_t arg1)
1204{
1205        return simd_xor(simd256<64>::srli<(32)>(arg1), simd_and(arg1, simd256<64>::lomask()));
1206}
1207
1208//The total number of operations is 21
1209template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::xor_hl(bitblock256_t arg1)
1210{
1211        return simd_xor(simd256<128>::srli<(64)>(arg1), simd_and(arg1, simd256<128>::lomask()));
1212}
1213
1214//The total number of operations is 43
1215template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::xor_hl(bitblock256_t arg1)
1216{
1217        return simd_xor(simd256<256>::srli<(128)>(arg1), simd_and(arg1, simd256<256>::lomask()));
1218}
1219
1220//The total number of operations is 0
1221template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::popcount(bitblock256_t arg1)
1222{
1223        return arg1;
1224}
1225
1226//The total number of operations is 15
1227template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::popcount(bitblock256_t arg1)
1228{
1229        return simd256<2>::add_hl(simd256<(1)>::popcount(arg1));
1230}
1231
1232//The total number of operations is 31
1233template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::popcount(bitblock256_t arg1)
1234{
1235        return simd256<4>::add_hl(simd256<(2)>::popcount(arg1));
1236}
1237
1238//The total number of operations is 47
1239template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::popcount(bitblock256_t arg1)
1240{
1241        return simd256<8>::add_hl(simd256<(4)>::popcount(arg1));
1242}
1243
1244//The total number of operations is 62
1245template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::popcount(bitblock256_t arg1)
1246{
1247        return simd256<16>::add_hl(simd256<(8)>::popcount(arg1));
1248}
1249
1250//The total number of operations is 77
1251template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::popcount(bitblock256_t arg1)
1252{
1253        return simd256<32>::add_hl(simd256<(16)>::popcount(arg1));
1254}
1255
1256//The total number of operations is 55
1257template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::popcount(bitblock256_t arg1)
1258{
1259        bitblock256_t tmpAns = simd256<8>::popcount(arg1);
1260        return avx_general_combine256(_mm_sad_epu8(avx_select_hi128(tmpAns), _mm_set1_epi32((int32_t)(0))), _mm_sad_epu8(avx_select_lo128(tmpAns), _mm_set1_epi32((int32_t)(0))));
1261}
1262
1263//The total number of operations is 119
1264template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::popcount(bitblock256_t arg1)
1265{
1266        return simd256<128>::add_hl(simd256<(64)>::popcount(arg1));
1267}
1268
1269//The total number of operations is 205
1270template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::popcount(bitblock256_t arg1)
1271{
1272        bitblock256_t tmpAns = simd256<(128)>::popcount(arg1);
1273        return simd256<(128)>::add(simd_and(tmpAns, simd256<256>::lomask()), simd256<256>::srli<(128)>(tmpAns));
1274}
1275
1276//The total number of operations is 92
1277template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::neg(bitblock256_t arg1)
1278{
1279        return simd256<1>::sub(simd256<1>::constant<0>(), arg1);
1280}
1281
1282//The total number of operations is 44
1283template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::neg(bitblock256_t arg1)
1284{
1285        return simd256<2>::sub(simd256<2>::constant<0>(), arg1);
1286}
1287
1288//The total number of operations is 20
1289template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::neg(bitblock256_t arg1)
1290{
1291        return simd256<4>::sub(simd256<4>::constant<0>(), arg1);
1292}
1293
1294//The total number of operations is 8
1295template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::neg(bitblock256_t arg1)
1296{
1297        return simd256<8>::sub(simd256<8>::constant<0>(), arg1);
1298}
1299
1300//The total number of operations is 8
1301template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::neg(bitblock256_t arg1)
1302{
1303        return simd256<16>::sub(simd256<16>::constant<0>(), arg1);
1304}
1305
1306//The total number of operations is 8
1307template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::neg(bitblock256_t arg1)
1308{
1309        return simd256<32>::sub(simd256<32>::constant<0>(), arg1);
1310}
1311
1312//The total number of operations is 8
1313template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::neg(bitblock256_t arg1)
1314{
1315        return simd256<64>::sub(simd256<64>::constant<0>(), arg1);
1316}
1317
1318//The total number of operations is 44
1319template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::neg(bitblock256_t arg1)
1320{
1321        return simd256<128>::sub(simd256<128>::constant<0>(), arg1);
1322}
1323
1324//The total number of operations is 137
1325template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::neg(bitblock256_t arg1)
1326{
1327        return simd256<256>::sub(simd256<256>::constant<0>(), arg1);
1328}
1329
1330//The total number of operations is 7
1331template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::slli(bitblock256_t arg1)
1332{
1333        return simd_and(simd256<32>::slli<sh>(arg1), simd256<2>::constant<(((3)<<sh)&(3))>());
1334}
1335
1336//The total number of operations is 7
1337template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::slli(bitblock256_t arg1)
1338{
1339        return simd_and(simd256<32>::slli<sh>(arg1), simd256<4>::constant<(((15)<<sh)&(15))>());
1340}
1341
1342//The total number of operations is 7
1343template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::slli(bitblock256_t arg1)
1344{
1345        return simd_and(simd256<32>::slli<sh>(arg1), simd256<8>::constant<(((255)<<sh)&(255))>());
1346}
1347
1348//The total number of operations is 6
1349template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::slli(bitblock256_t arg1)
1350{
1351        return avx_general_combine256(_mm_slli_epi16(avx_select_hi128(arg1), (int32_t)(sh)), _mm_slli_epi16(avx_select_lo128(arg1), (int32_t)(sh)));
1352}
1353
1354//The total number of operations is 6
1355template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::slli(bitblock256_t arg1)
1356{
1357        return avx_general_combine256(_mm_slli_epi32(avx_select_hi128(arg1), (int32_t)(sh)), _mm_slli_epi32(avx_select_lo128(arg1), (int32_t)(sh)));
1358}
1359
1360//The total number of operations is 6
1361template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::slli(bitblock256_t arg1)
1362{
1363        return avx_general_combine256(_mm_slli_epi64(avx_select_hi128(arg1), (int32_t)(sh)), _mm_slli_epi64(avx_select_lo128(arg1), (int32_t)(sh)));
1364}
1365
1366//The total number of operations is 19
1367template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::slli(bitblock256_t arg1)
1368{
1369        return (((sh%8) == 0) ? avx_byte_shift_left(arg1, (sh/8)) : ((sh >= 64) ? simd256<64>::slli<(sh-64)>(avx_byte_shift_left(arg1, 8)) : simd_or(simd256<64>::slli<sh>(arg1), avx_byte_shift_left(simd256<64>::srli<(64-sh)>(arg1), 8))));
1370}
1371
1372//The total number of operations is 40
1373template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::slli(bitblock256_t arg1)
1374{
1375        return ((sh < 128) ? simd_or(simd256<128>::slli<sh>(arg1), avx_move_lo128_to_hi128(simd256<128>::srli<(128-sh)>(arg1))) : simd256<128>::slli<(sh-128)>(avx_move_lo128_to_hi128(arg1)));
1376}
1377
1378//The total number of operations is 3
1379template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3)
1380{
1381        return simd_or(simd_and(arg2, arg1), simd_andc(arg3, arg1));
1382}
1383
1384//The total number of operations is 13
1385template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3)
1386{
1387        return simd256<(1)>::ifh(simd256<1>::ifh(simd256<2>::himask(), arg1, simd256<2>::srli<(1)>(arg1)), arg2, arg3);
1388}
1389
1390//The total number of operations is 23
1391template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3)
1392{
1393        return simd256<(2)>::ifh(simd256<1>::ifh(simd256<4>::himask(), arg1, simd256<4>::srli<(2)>(arg1)), arg2, arg3);
1394}
1395
1396//The total number of operations is 11
1397template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3)
1398{
1399        return simd256<1>::ifh(simd256<8>::gt(simd256<8>::constant<0>(), arg1), arg2, arg3);
1400}
1401
1402//The total number of operations is 11
1403template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3)
1404{
1405        return simd256<1>::ifh(simd256<16>::gt(simd256<16>::constant<0>(), arg1), arg2, arg3);
1406}
1407
1408//The total number of operations is 11
1409template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3)
1410{
1411        return simd256<1>::ifh(simd256<32>::gt(simd256<32>::constant<0>(), arg1), arg2, arg3);
1412}
1413
1414//The total number of operations is 1
1415template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3)
1416{
1417        return (bitblock256_t)_mm256_blendv_pd((__m256d)(arg3), (__m256d)(arg2), (__m256d)(arg1));
1418}
1419
1420//The total number of operations is 23
1421template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3)
1422{
1423        return simd256<(64)>::ifh(simd256<1>::ifh(simd256<128>::himask(), arg1, simd256<128>::srli<(64)>(arg1)), arg2, arg3);
1424}
1425
1426//The total number of operations is 67
1427template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::ifh(bitblock256_t arg1, bitblock256_t arg2, bitblock256_t arg3)
1428{
1429        return simd256<(128)>::ifh(simd256<1>::ifh(simd256<256>::himask(), arg1, simd256<256>::srli<(128)>(arg1)), arg2, arg3);
1430}
1431
1432//The total number of operations is 92
1433template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::sub(bitblock256_t arg1, bitblock256_t arg2)
1434{
1435        return simd256<1>::ifh(simd256<(2)>::himask(), simd256<(2)>::sub(arg1, simd_and(simd256<(2)>::himask(), arg2)), simd256<(2)>::sub(arg1, arg2));
1436}
1437
1438//The total number of operations is 44
1439template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::sub(bitblock256_t arg1, bitblock256_t arg2)
1440{
1441        return simd256<1>::ifh(simd256<(4)>::himask(), simd256<(4)>::sub(arg1, simd_and(simd256<(4)>::himask(), arg2)), simd256<(4)>::sub(arg1, arg2));
1442}
1443
1444//The total number of operations is 20
1445template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::sub(bitblock256_t arg1, bitblock256_t arg2)
1446{
1447        return simd256<1>::ifh(simd256<(8)>::himask(), simd256<(8)>::sub(arg1, simd_and(simd256<(8)>::himask(), arg2)), simd256<(8)>::sub(arg1, arg2));
1448}
1449
1450//The total number of operations is 8
1451template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::sub(bitblock256_t arg1, bitblock256_t arg2)
1452{
1453        return avx_general_combine256(_mm_sub_epi8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_sub_epi8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1454}
1455
1456//The total number of operations is 8
1457template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::sub(bitblock256_t arg1, bitblock256_t arg2)
1458{
1459        return avx_general_combine256(_mm_sub_epi16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_sub_epi16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1460}
1461
1462//The total number of operations is 8
1463template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::sub(bitblock256_t arg1, bitblock256_t arg2)
1464{
1465        return avx_general_combine256(_mm_sub_epi32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_sub_epi32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1466}
1467
1468//The total number of operations is 8
1469template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::sub(bitblock256_t arg1, bitblock256_t arg2)
1470{
1471        return avx_general_combine256(_mm_sub_epi64(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_sub_epi64(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1472}
1473
1474//The total number of operations is 44
1475template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::sub(bitblock256_t arg1, bitblock256_t arg2)
1476{
1477        bitblock256_t ans = simd256<(64)>::sub(arg1, arg2);
1478        bitblock256_t borrowMask = simd_or(simd_andc(arg2, arg1), simd_and(simd_not(simd_xor(arg1, arg2)), ans));
1479        bitblock256_t loMask = simd256<128>::lomask();
1480        bitblock256_t borrow = simd256<128>::slli<1>(simd_and(borrowMask, loMask));
1481        return simd256<1>::ifh(loMask, ans, simd256<(64)>::sub(ans, borrow));
1482}
1483
1484//The total number of operations is 137
1485template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::sub(bitblock256_t arg1, bitblock256_t arg2)
1486{
1487        bitblock256_t ans = simd256<(128)>::sub(arg1, arg2);
1488        bitblock256_t borrowMask = simd_or(simd_andc(arg2, arg1), simd_and(simd_not(simd_xor(arg1, arg2)), ans));
1489        bitblock256_t loMask = simd256<256>::lomask();
1490        bitblock256_t borrow = simd256<256>::slli<1>(simd_and(borrowMask, loMask));
1491        return simd256<1>::ifh(loMask, ans, simd256<(128)>::sub(ans, borrow));
1492}
1493
1494//The total number of operations is 15
1495template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add_hl(bitblock256_t arg1)
1496{
1497        return simd256<16>::sub(arg1, simd_and(simd256<2>::lomask(), simd256<16>::srli<1>(arg1)));
1498}
1499
1500//The total number of operations is 16
1501template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::add_hl(bitblock256_t arg1)
1502{
1503        return simd256<(8)>::add(simd256<4>::srli<(2)>(arg1), simd_and(arg1, simd256<4>::lomask()));
1504}
1505
1506//The total number of operations is 16
1507template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::add_hl(bitblock256_t arg1)
1508{
1509        return simd256<(16)>::add(simd256<8>::srli<(4)>(arg1), simd_and(arg1, simd256<8>::lomask()));
1510}
1511
1512//The total number of operations is 15
1513template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::add_hl(bitblock256_t arg1)
1514{
1515        return simd256<(32)>::add(simd256<16>::srli<(8)>(arg1), simd_and(arg1, simd256<16>::lomask()));
1516}
1517
1518//The total number of operations is 15
1519template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::add_hl(bitblock256_t arg1)
1520{
1521        return simd256<(64)>::add(simd256<32>::srli<(16)>(arg1), simd_and(arg1, simd256<32>::lomask()));
1522}
1523
1524//The total number of operations is 15
1525template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::add_hl(bitblock256_t arg1)
1526{
1527        return simd256<64>::add(simd256<64>::srli<(32)>(arg1), simd_and(arg1, simd256<64>::lomask()));
1528}
1529
1530//The total number of operations is 64
1531template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add_hl(bitblock256_t arg1)
1532{
1533        return simd256<128>::add(simd256<128>::srli<(64)>(arg1), simd_and(arg1, simd256<128>::lomask()));
1534}
1535
1536//The total number of operations is 179
1537template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add_hl(bitblock256_t arg1)
1538{
1539        return simd256<256>::add(simd256<256>::srli<(128)>(arg1), simd_and(arg1, simd256<256>::lomask()));
1540}
1541
1542//The total number of operations is 0
1543template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::lomask()
1544{
1545        return simd256<2>::constant<(1)>();
1546}
1547
1548//The total number of operations is 0
1549template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::lomask()
1550{
1551        return simd256<4>::constant<(3)>();
1552}
1553
1554//The total number of operations is 0
1555template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::lomask()
1556{
1557        return simd256<8>::constant<(15)>();
1558}
1559
1560//The total number of operations is 0
1561template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::lomask()
1562{
1563        return simd256<16>::constant<(255)>();
1564}
1565
1566//The total number of operations is 0
1567template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::lomask()
1568{
1569        return simd256<32>::constant<(65535)>();
1570}
1571
1572//The total number of operations is 0
1573template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::lomask()
1574{
1575        return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1)));
1576}
1577
1578//The total number of operations is 0
1579template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::lomask()
1580{
1581        return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1)));
1582}
1583
1584//The total number of operations is 0
1585template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::lomask()
1586{
1587        return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1)));
1588}
1589
1590//The total number of operations is 0
1591template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::constant()
1592{
1593        return simd256<32>::constant<(-1*val)>();
1594}
1595
1596//The total number of operations is 0
1597template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::constant()
1598{
1599        return ((val < 0) ? simd256<(4)>::constant<((val<<2)|(val^(-4)))>() : simd256<(4)>::constant<((val<<2)|val)>());
1600}
1601
1602//The total number of operations is 0
1603template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::constant()
1604{
1605        return ((val < 0) ? simd256<(8)>::constant<((val<<4)|(val^(-16)))>() : simd256<(8)>::constant<((val<<4)|val)>());
1606}
1607
1608//The total number of operations is 0
1609template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::constant()
1610{
1611        return (bitblock256_t)_mm256_set1_epi8((int32_t)(val));
1612}
1613
1614//The total number of operations is 0
1615template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::constant()
1616{
1617        return (bitblock256_t)_mm256_set1_epi16((int32_t)(val));
1618}
1619
1620//The total number of operations is 0
1621template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::constant()
1622{
1623        return (bitblock256_t)_mm256_set1_epi32((int32_t)(val));
1624}
1625
1626//The total number of operations is 0
1627template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::constant()
1628{
1629        return ((bitblock256_t)_mm256_set_epi32((int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val)));
1630}
1631
1632//The total number of operations is 0
1633template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::constant()
1634{
1635        return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val), (int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val)));
1636}
1637
1638//The total number of operations is 0
1639template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::constant()
1640{
1641        return ((bitblock256_t)_mm256_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val)));
1642}
1643
1644//The total number of operations is 95
1645template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::min(bitblock256_t arg1, bitblock256_t arg2)
1646{
1647        bitblock256_t high_bit = simd256<1>::constant<(1)>();
1648        return simd_xor(simd256<1>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
1649}
1650
1651//The total number of operations is 47
1652template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::min(bitblock256_t arg1, bitblock256_t arg2)
1653{
1654        bitblock256_t high_bit = simd256<2>::constant<(2)>();
1655        return simd_xor(simd256<2>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
1656}
1657
1658//The total number of operations is 23
1659template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::min(bitblock256_t arg1, bitblock256_t arg2)
1660{
1661        bitblock256_t high_bit = simd256<4>::constant<(8)>();
1662        return simd_xor(simd256<4>::umin(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
1663}
1664
1665//The total number of operations is 8
1666template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::min(bitblock256_t arg1, bitblock256_t arg2)
1667{
1668        return avx_general_combine256(_mm_min_epi8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_min_epi8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1669}
1670
1671//The total number of operations is 8
1672template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::min(bitblock256_t arg1, bitblock256_t arg2)
1673{
1674        return avx_general_combine256(_mm_min_epi16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_min_epi16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1675}
1676
1677//The total number of operations is 8
1678template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::min(bitblock256_t arg1, bitblock256_t arg2)
1679{
1680        return avx_general_combine256(_mm_min_epi32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_min_epi32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1681}
1682
1683//The total number of operations is 11
1684template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::min(bitblock256_t arg1, bitblock256_t arg2)
1685{
1686        return simd256<1>::ifh(simd256<64>::gt(arg1, arg2), arg2, arg1);
1687}
1688
1689//The total number of operations is 88
1690template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::min(bitblock256_t arg1, bitblock256_t arg2)
1691{
1692        bitblock256_t hiAns = simd256<(64)>::min(arg1, arg2);
1693        bitblock256_t loAns = simd256<(64)>::umin(arg1, arg2);
1694        bitblock256_t eqMask1 = simd256<128>::srli<(64)>(simd256<(64)>::eq(hiAns, arg1));
1695        bitblock256_t eqMask2 = simd256<128>::srli<(64)>(simd256<(64)>::eq(hiAns, arg2));
1696        return simd256<1>::ifh(simd256<128>::himask(), hiAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, loAns, arg1), arg2));
1697}
1698
1699//The total number of operations is 352
1700template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::min(bitblock256_t arg1, bitblock256_t arg2)
1701{
1702        bitblock256_t hiAns = simd256<(128)>::min(arg1, arg2);
1703        bitblock256_t loAns = simd256<(128)>::umin(arg1, arg2);
1704        bitblock256_t eqMask1 = simd256<256>::srli<(128)>(simd256<(128)>::eq(hiAns, arg1));
1705        bitblock256_t eqMask2 = simd256<256>::srli<(128)>(simd256<(128)>::eq(hiAns, arg2));
1706        return simd256<1>::ifh(simd256<256>::himask(), hiAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, loAns, arg1), arg2));
1707}
1708
1709//The total number of operations is 92
1710template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umin(bitblock256_t arg1, bitblock256_t arg2)
1711{
1712        return simd_or(simd_and(simd256<(2)>::himask(), simd256<(2)>::umin(arg1, arg2)), simd256<(2)>::umin(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2)));
1713}
1714
1715//The total number of operations is 44
1716template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umin(bitblock256_t arg1, bitblock256_t arg2)
1717{
1718        return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::umin(arg1, arg2)), simd256<(4)>::umin(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2)));
1719}
1720
1721//The total number of operations is 20
1722template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umin(bitblock256_t arg1, bitblock256_t arg2)
1723{
1724        return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::umin(arg1, arg2)), simd256<(8)>::umin(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2)));
1725}
1726
1727//The total number of operations is 8
1728template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umin(bitblock256_t arg1, bitblock256_t arg2)
1729{
1730        return avx_general_combine256(_mm_min_epu8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_min_epu8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1731}
1732
1733//The total number of operations is 8
1734template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umin(bitblock256_t arg1, bitblock256_t arg2)
1735{
1736        return avx_general_combine256(_mm_min_epu16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_min_epu16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1737}
1738
1739//The total number of operations is 8
1740template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umin(bitblock256_t arg1, bitblock256_t arg2)
1741{
1742        return avx_general_combine256(_mm_min_epu32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_min_epu32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1743}
1744
1745//The total number of operations is 14
1746template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umin(bitblock256_t arg1, bitblock256_t arg2)
1747{
1748        bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808UL)>();
1749        return simd_xor(simd256<64>::min(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
1750}
1751
1752//The total number of operations is 77
1753template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umin(bitblock256_t arg1, bitblock256_t arg2)
1754{
1755        bitblock256_t tmpAns = simd256<(64)>::umin(arg1, arg2);
1756        bitblock256_t eqMask1 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg1));
1757        bitblock256_t eqMask2 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg2));
1758        return simd256<1>::ifh(simd256<128>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
1759}
1760
1761//The total number of operations is 264
1762template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umin(bitblock256_t arg1, bitblock256_t arg2)
1763{
1764        bitblock256_t tmpAns = simd256<(128)>::umin(arg1, arg2);
1765        bitblock256_t eqMask1 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg1));
1766        bitblock256_t eqMask2 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg2));
1767        return simd256<1>::ifh(simd256<256>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
1768}
1769
1770//The total number of operations is 92
1771template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::umax(bitblock256_t arg1, bitblock256_t arg2)
1772{
1773        return simd_or(simd_and(simd256<(2)>::himask(), simd256<(2)>::umax(arg1, arg2)), simd256<(2)>::umax(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2)));
1774}
1775
1776//The total number of operations is 44
1777template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::umax(bitblock256_t arg1, bitblock256_t arg2)
1778{
1779        return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::umax(arg1, arg2)), simd256<(4)>::umax(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2)));
1780}
1781
1782//The total number of operations is 20
1783template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::umax(bitblock256_t arg1, bitblock256_t arg2)
1784{
1785        return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::umax(arg1, arg2)), simd256<(8)>::umax(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2)));
1786}
1787
1788//The total number of operations is 8
1789template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::umax(bitblock256_t arg1, bitblock256_t arg2)
1790{
1791        return avx_general_combine256(_mm_max_epu8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1792}
1793
1794//The total number of operations is 8
1795template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::umax(bitblock256_t arg1, bitblock256_t arg2)
1796{
1797        return avx_general_combine256(_mm_max_epu16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1798}
1799
1800//The total number of operations is 8
1801template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::umax(bitblock256_t arg1, bitblock256_t arg2)
1802{
1803        return avx_general_combine256(_mm_max_epu32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_max_epu32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1804}
1805
1806//The total number of operations is 14
1807template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::umax(bitblock256_t arg1, bitblock256_t arg2)
1808{
1809        bitblock256_t high_bit = simd256<64>::constant<(9223372036854775808UL)>();
1810        return simd_xor(simd256<64>::max(simd_xor(arg1, high_bit), simd_xor(arg2, high_bit)), high_bit);
1811}
1812
1813//The total number of operations is 77
1814template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::umax(bitblock256_t arg1, bitblock256_t arg2)
1815{
1816        bitblock256_t tmpAns = simd256<(64)>::umax(arg1, arg2);
1817        bitblock256_t eqMask1 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg1));
1818        bitblock256_t eqMask2 = simd256<128>::srli<(64)>(simd256<(64)>::eq(tmpAns, arg2));
1819        return simd256<1>::ifh(simd256<128>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
1820}
1821
1822//The total number of operations is 264
1823template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::umax(bitblock256_t arg1, bitblock256_t arg2)
1824{
1825        bitblock256_t tmpAns = simd256<(128)>::umax(arg1, arg2);
1826        bitblock256_t eqMask1 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg1));
1827        bitblock256_t eqMask2 = simd256<256>::srli<(128)>(simd256<(128)>::eq(tmpAns, arg2));
1828        return simd256<1>::ifh(simd256<256>::himask(), tmpAns, simd256<1>::ifh(eqMask1, simd256<1>::ifh(eqMask2, tmpAns, arg1), arg2));
1829}
1830
1831//The total number of operations is 113
1832template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::eq(bitblock256_t arg1, bitblock256_t arg2)
1833{
1834        return simd_or(simd_and(simd256<(2)>::himask(), simd256<(2)>::eq(simd_and(simd256<(2)>::himask(), arg1), simd_and(simd256<(2)>::himask(), arg2))), simd_and(simd256<(2)>::lomask(), simd256<(2)>::eq(simd_and(simd256<(2)>::lomask(), arg1), simd_and(simd256<(2)>::lomask(), arg2))));
1835}
1836
1837//The total number of operations is 53
1838template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::eq(bitblock256_t arg1, bitblock256_t arg2)
1839{
1840        return simd_or(simd_and(simd256<(4)>::himask(), simd256<(4)>::eq(simd_and(simd256<(4)>::himask(), arg1), simd_and(simd256<(4)>::himask(), arg2))), simd_and(simd256<(4)>::lomask(), simd256<(4)>::eq(simd_and(simd256<(4)>::lomask(), arg1), simd_and(simd256<(4)>::lomask(), arg2))));
1841}
1842
1843//The total number of operations is 23
1844template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::eq(bitblock256_t arg1, bitblock256_t arg2)
1845{
1846        return simd_or(simd_and(simd256<(8)>::himask(), simd256<(8)>::eq(simd_and(simd256<(8)>::himask(), arg1), simd_and(simd256<(8)>::himask(), arg2))), simd_and(simd256<(8)>::lomask(), simd256<(8)>::eq(simd_and(simd256<(8)>::lomask(), arg1), simd_and(simd256<(8)>::lomask(), arg2))));
1847}
1848
1849//The total number of operations is 8
1850template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::eq(bitblock256_t arg1, bitblock256_t arg2)
1851{
1852        return avx_general_combine256(_mm_cmpeq_epi8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1853}
1854
1855//The total number of operations is 8
1856template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::eq(bitblock256_t arg1, bitblock256_t arg2)
1857{
1858        return avx_general_combine256(_mm_cmpeq_epi16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1859}
1860
1861//The total number of operations is 8
1862template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::eq(bitblock256_t arg1, bitblock256_t arg2)
1863{
1864        return avx_general_combine256(_mm_cmpeq_epi32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1865}
1866
1867//The total number of operations is 8
1868template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::eq(bitblock256_t arg1, bitblock256_t arg2)
1869{
1870        return avx_general_combine256(_mm_cmpeq_epi64(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_cmpeq_epi64(avx_select_lo128(arg1), avx_select_lo128(arg2)));
1871}
1872
1873//The total number of operations is 48
1874template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::eq(bitblock256_t arg1, bitblock256_t arg2)
1875{
1876        bitblock256_t tmpAns = simd256<(64)>::eq(arg1, arg2);
1877        bitblock256_t loMask = simd_and(tmpAns, simd256<128>::srli<(64)>(tmpAns));
1878        bitblock256_t hiMask = simd256<128>::slli<(64)>(loMask);
1879        return simd_or(loMask, hiMask);
1880}
1881
1882//The total number of operations is 131
1883template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::eq(bitblock256_t arg1, bitblock256_t arg2)
1884{
1885        bitblock256_t tmpAns = simd256<(128)>::eq(arg1, arg2);
1886        bitblock256_t loMask = simd_and(tmpAns, simd256<256>::srli<(128)>(tmpAns));
1887        bitblock256_t hiMask = simd256<256>::slli<(128)>(loMask);
1888        return simd_or(loMask, hiMask);
1889}
1890
1891//The total number of operations is 9
1892template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::srai(bitblock256_t arg1)
1893{
1894        return ((sh == 0) ? arg1 : simd_or(simd_and(simd256<2>::himask(), arg1), simd256<2>::srli<1>(arg1)));
1895}
1896
1897//The total number of operations is 29
1898template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::srai(bitblock256_t arg1)
1899{
1900        bitblock256_t tmp = simd256<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
1901        return simd_or(tmp, simd256<4>::sub(simd256<4>::constant<0>(), simd_and(simd256<4>::constant<(1<<((4-((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
1902}
1903
1904//The total number of operations is 17
1905template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::srai(bitblock256_t arg1)
1906{
1907        bitblock256_t tmp = simd256<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
1908        return simd_or(tmp, simd256<8>::sub(simd256<8>::constant<0>(), simd_and(simd256<8>::constant<(1<<((8-((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh)))-1))>(), tmp)));
1909}
1910
1911//The total number of operations is 6
1912template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::srai(bitblock256_t arg1)
1913{
1914        return avx_general_combine256(_mm_srai_epi16(avx_select_hi128(arg1), (int32_t)(sh)), _mm_srai_epi16(avx_select_lo128(arg1), (int32_t)(sh)));
1915}
1916
1917//The total number of operations is 6
1918template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::srai(bitblock256_t arg1)
1919{
1920        return avx_general_combine256(_mm_srai_epi32(avx_select_hi128(arg1), (int32_t)(sh)), _mm_srai_epi32(avx_select_lo128(arg1), (int32_t)(sh)));
1921}
1922
1923//The total number of operations is 22
1924template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::srai(bitblock256_t arg1)
1925{
1926        bitblock256_t tmp = simd256<64>::srli<((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh))>(arg1);
1927        return simd_or(tmp, simd256<64>::sub(simd256<64>::constant<0>(), simd_and(simd256<64>::slli<((64-((sh >= 64) ? (63) : ((sh < 0) ? 0 : sh)))-1)>(simd256<64>::constant<1>()), tmp)));
1928}
1929
1930//The total number of operations is 84
1931template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::srai(bitblock256_t arg1)
1932{
1933        bitblock256_t tmp = simd256<128>::srli<((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh))>(arg1);
1934        return simd_or(tmp, simd256<128>::sub(simd256<128>::constant<0>(), simd_and(simd256<128>::slli<((128-((sh >= 128) ? (127) : ((sh < 0) ? 0 : sh)))-1)>(simd256<128>::constant<1>()), tmp)));
1935}
1936
1937//The total number of operations is 220
1938template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::srai(bitblock256_t arg1)
1939{
1940        bitblock256_t tmp = simd256<256>::srli<((sh >= 256) ? (255) : ((sh < 0) ? 0 : sh))>(arg1);
1941        return simd_or(tmp, simd256<256>::sub(simd256<256>::constant<0>(), simd_and(simd256<256>::slli<((256-((sh >= 256) ? (255) : ((sh < 0) ? 0 : sh)))-1)>(simd256<256>::constant<1>()), tmp)));
1942}
1943
1944//The total number of operations is 0
1945template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::himask()
1946{
1947        return simd256<2>::constant<(2)>();
1948}
1949
1950//The total number of operations is 0
1951template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::himask()
1952{
1953        return simd256<4>::constant<(12)>();
1954}
1955
1956//The total number of operations is 0
1957template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::himask()
1958{
1959        return simd256<8>::constant<(240)>();
1960}
1961
1962//The total number of operations is 0
1963template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::himask()
1964{
1965        return simd256<16>::constant<(65280)>();
1966}
1967
1968//The total number of operations is 0
1969template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::himask()
1970{
1971        return simd256<32>::constant<-65536>();
1972}
1973
1974//The total number of operations is 0
1975template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::himask()
1976{
1977        return ((bitblock256_t)_mm256_set_epi32((int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0), (int32_t)(-1), (int32_t)(0)));
1978}
1979
1980//The total number of operations is 0
1981template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::himask()
1982{
1983        return ((bitblock256_t)_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0)));
1984}
1985
1986//The total number of operations is 0
1987template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::himask()
1988{
1989        return ((bitblock256_t)_mm256_set_epi32((int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(-1), (int32_t)(0), (int32_t)(0), (int32_t)(0), (int32_t)(0)));
1990}
1991
1992//The total number of operations is 92
1993template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::add(bitblock256_t arg1, bitblock256_t arg2)
1994{
1995        return simd256<1>::ifh(simd256<(2)>::himask(), simd256<(2)>::add(arg1, simd_and(simd256<(2)>::himask(), arg2)), simd256<(2)>::add(arg1, arg2));
1996}
1997
1998//The total number of operations is 44
1999template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::add(bitblock256_t arg1, bitblock256_t arg2)
2000{
2001        return simd256<1>::ifh(simd256<(4)>::himask(), simd256<(4)>::add(arg1, simd_and(simd256<(4)>::himask(), arg2)), simd256<(4)>::add(arg1, arg2));
2002}
2003
2004//The total number of operations is 20
2005template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::add(bitblock256_t arg1, bitblock256_t arg2)
2006{
2007        return simd256<1>::ifh(simd256<(8)>::himask(), simd256<(8)>::add(arg1, simd_and(simd256<(8)>::himask(), arg2)), simd256<(8)>::add(arg1, arg2));
2008}
2009
2010//The total number of operations is 8
2011template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::add(bitblock256_t arg1, bitblock256_t arg2)
2012{
2013        return avx_general_combine256(_mm_add_epi8(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_add_epi8(avx_select_lo128(arg1), avx_select_lo128(arg2)));
2014}
2015
2016//The total number of operations is 8
2017template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::add(bitblock256_t arg1, bitblock256_t arg2)
2018{
2019        return avx_general_combine256(_mm_add_epi16(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_add_epi16(avx_select_lo128(arg1), avx_select_lo128(arg2)));
2020}
2021
2022//The total number of operations is 8
2023template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::add(bitblock256_t arg1, bitblock256_t arg2)
2024{
2025        return avx_general_combine256(_mm_add_epi32(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_add_epi32(avx_select_lo128(arg1), avx_select_lo128(arg2)));
2026}
2027
2028//The total number of operations is 8
2029template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::add(bitblock256_t arg1, bitblock256_t arg2)
2030{
2031        return avx_general_combine256(_mm_add_epi64(avx_select_hi128(arg1), avx_select_hi128(arg2)), _mm_add_epi64(avx_select_lo128(arg1), avx_select_lo128(arg2)));
2032}
2033
2034//The total number of operations is 44
2035template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::add(bitblock256_t arg1, bitblock256_t arg2)
2036{
2037        bitblock256_t ans = simd256<(64)>::add(arg1, arg2);
2038        bitblock256_t carryMask = simd_or(simd_and(arg1, arg2), simd_and(simd_xor(arg1, arg2), simd_not(ans)));
2039        bitblock256_t loMask = simd256<128>::lomask();
2040        bitblock256_t carry = simd256<128>::slli<1>(simd_and(carryMask, loMask));
2041        return simd256<1>::ifh(loMask, ans, simd256<(64)>::add(ans, carry));
2042}
2043
2044//The total number of operations is 137
2045template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::add(bitblock256_t arg1, bitblock256_t arg2)
2046{
2047        bitblock256_t ans = simd256<(128)>::add(arg1, arg2);
2048        bitblock256_t carryMask = simd_or(simd_and(arg1, arg2), simd_and(simd_xor(arg1, arg2), simd_not(ans)));
2049        bitblock256_t loMask = simd256<256>::lomask();
2050        bitblock256_t carry = simd256<256>::slli<1>(simd_and(carryMask, loMask));
2051        return simd256<1>::ifh(loMask, ans, simd256<(128)>::add(ans, carry));
2052}
2053
2054//The total number of operations is 226
2055template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<1>::abs(bitblock256_t arg1)
2056{
2057        bitblock256_t gtMask = simd256<1>::gt(arg1, simd256<1>::constant<0>());
2058        return simd256<1>::ifh(gtMask, arg1, simd256<1>::sub(gtMask, arg1));
2059}
2060
2061//The total number of operations is 45
2062template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<2>::abs(bitblock256_t arg1)
2063{
2064        return simd256<1>::ifh(simd256<2>::himask(), simd_and(arg1, simd256<256>::slli<1>(simd_not(arg1))), arg1);
2065}
2066
2067//The total number of operations is 52
2068template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<4>::abs(bitblock256_t arg1)
2069{
2070        bitblock256_t gtMask = simd256<4>::gt(arg1, simd256<4>::constant<0>());
2071        return simd256<1>::ifh(gtMask, arg1, simd256<4>::sub(gtMask, arg1));
2072}
2073
2074//The total number of operations is 6
2075template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<8>::abs(bitblock256_t arg1)
2076{
2077        return avx_general_combine256(_mm_abs_epi8(avx_select_hi128(arg1)), _mm_abs_epi8(avx_select_lo128(arg1)));
2078}
2079
2080//The total number of operations is 6
2081template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<16>::abs(bitblock256_t arg1)
2082{
2083        return avx_general_combine256(_mm_abs_epi16(avx_select_hi128(arg1)), _mm_abs_epi16(avx_select_lo128(arg1)));
2084}
2085
2086//The total number of operations is 6
2087template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<32>::abs(bitblock256_t arg1)
2088{
2089        return avx_general_combine256(_mm_abs_epi32(avx_select_hi128(arg1)), _mm_abs_epi32(avx_select_lo128(arg1)));
2090}
2091
2092//The total number of operations is 19
2093template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<64>::abs(bitblock256_t arg1)
2094{
2095        bitblock256_t gtMask = simd256<64>::gt(arg1, simd256<64>::constant<0>());
2096        return simd256<1>::ifh(gtMask, arg1, simd256<64>::sub(gtMask, arg1));
2097}
2098
2099//The total number of operations is 117
2100template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<128>::abs(bitblock256_t arg1)
2101{
2102        bitblock256_t eqMask = simd256<128>::eq(simd256<1>::ifh(simd256<128>::himask(), simd256<(64)>::abs(arg1), arg1), arg1);
2103        return simd256<1>::ifh(eqMask, arg1, simd256<128>::sub(eqMask, arg1));
2104}
2105
2106//The total number of operations is 391
2107template <> IDISA_ALWAYS_INLINE bitblock256_t simd256<256>::abs(bitblock256_t arg1)
2108{
2109        bitblock256_t eqMask = simd256<256>::eq(simd256<1>::ifh(simd256<256>::himask(), simd256<(128)>::abs(arg1), arg1), arg1);
2110        return simd256<1>::ifh(eqMask, arg1, simd256<256>::sub(eqMask, arg1));
2111}
2112
2113//The total number of operations is 652
2114template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::umin_hl(bitblock256_t arg1, bitblock256_t arg2)
2115{
2116        return simd256<(1)>::umin(hsimd256<2>::packh(arg1, arg2), hsimd256<2>::packl(arg1, arg2));
2117}
2118
2119//The total number of operations is 428
2120template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::umin_hl(bitblock256_t arg1, bitblock256_t arg2)
2121{
2122        return simd256<(2)>::umin(hsimd256<4>::packh(arg1, arg2), hsimd256<4>::packl(arg1, arg2));
2123}
2124
2125//The total number of operations is 228
2126template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::umin_hl(bitblock256_t arg1, bitblock256_t arg2)
2127{
2128        return simd256<(4)>::umin(hsimd256<8>::packh(arg1, arg2), hsimd256<8>::packl(arg1, arg2));
2129}
2130
2131//The total number of operations is 38
2132template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::umin_hl(bitblock256_t arg1, bitblock256_t arg2)
2133{
2134        return simd256<(8)>::umin(hsimd256<16>::packh(arg1, arg2), hsimd256<16>::packl(arg1, arg2));
2135}
2136
2137//The total number of operations is 38
2138template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::umin_hl(bitblock256_t arg1, bitblock256_t arg2)
2139{
2140        return simd256<(16)>::umin(hsimd256<32>::packh(arg1, arg2), hsimd256<32>::packl(arg1, arg2));
2141}
2142
2143//The total number of operations is 450
2144template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::umin_hl(bitblock256_t arg1, bitblock256_t arg2)
2145{
2146        return simd256<(32)>::umin(hsimd256<64>::packh(arg1, arg2), hsimd256<64>::packl(arg1, arg2));
2147}
2148
2149//The total number of operations is 456
2150template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::umin_hl(bitblock256_t arg1, bitblock256_t arg2)
2151{
2152        return simd256<(64)>::umin(hsimd256<128>::packh(arg1, arg2), hsimd256<128>::packl(arg1, arg2));
2153}
2154
2155//The total number of operations is 164
2156template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::umin_hl(bitblock256_t arg1, bitblock256_t arg2)
2157{
2158        return simd256<(128)>::umin(hsimd256<256>::packh(arg1, arg2), hsimd256<256>::packl(arg1, arg2));
2159}
2160
2161//The total number of operations is 652
2162template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::add_hl(bitblock256_t arg1, bitblock256_t arg2)
2163{
2164        return simd256<(1)>::add(hsimd256<2>::packh(arg1, arg2), hsimd256<2>::packl(arg1, arg2));
2165}
2166
2167//The total number of operations is 428
2168template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::add_hl(bitblock256_t arg1, bitblock256_t arg2)
2169{
2170        return simd256<(2)>::add(hsimd256<4>::packh(arg1, arg2), hsimd256<4>::packl(arg1, arg2));
2171}
2172
2173//The total number of operations is 228
2174template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::add_hl(bitblock256_t arg1, bitblock256_t arg2)
2175{
2176        return simd256<(4)>::add(hsimd256<8>::packh(arg1, arg2), hsimd256<8>::packl(arg1, arg2));
2177}
2178
2179//The total number of operations is 38
2180template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::add_hl(bitblock256_t arg1, bitblock256_t arg2)
2181{
2182        return simd256<(8)>::add(hsimd256<16>::packh(arg1, arg2), hsimd256<16>::packl(arg1, arg2));
2183}
2184
2185//The total number of operations is 8
2186template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::add_hl(bitblock256_t arg1, bitblock256_t arg2)
2187{
2188        return avx_general_combine256(_mm_hadd_epi16(avx_select_lo128(arg1), avx_select_hi128(arg1)),_mm_hadd_epi16(avx_select_lo128(arg2), avx_select_hi128(arg2)));
2189}
2190
2191//The total number of operations is 8
2192template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::add_hl(bitblock256_t arg1, bitblock256_t arg2)
2193{
2194        return avx_general_combine256(_mm_hadd_epi32(avx_select_lo128(arg1), avx_select_hi128(arg1)),_mm_hadd_epi32(avx_select_lo128(arg2), avx_select_hi128(arg2)));
2195}
2196
2197//The total number of operations is 450
2198template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::add_hl(bitblock256_t arg1, bitblock256_t arg2)
2199{
2200        return simd256<(64)>::add(hsimd256<128>::packh(arg1, arg2), hsimd256<128>::packl(arg1, arg2));
2201}
2202
2203//The total number of operations is 131
2204template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::add_hl(bitblock256_t arg1, bitblock256_t arg2)
2205{
2206        return simd256<(128)>::add(hsimd256<256>::packh(arg1, arg2), hsimd256<256>::packl(arg1, arg2));
2207}
2208
2209//The total number of operations is 546
2210template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packss(bitblock256_t arg1, bitblock256_t arg2)
2211{
2212        bitblock256_t hiBound = simd256<2>::srli<1>(simd256<2>::lomask());
2213        bitblock256_t loBound = simd_not(hiBound);
2214        return hsimd256<2>::packl(simd256<1>::ifh(simd256<2>::gt(arg1, hiBound), hiBound, simd256<1>::ifh(simd256<2>::gt(arg1, loBound), arg1, loBound)), simd256<1>::ifh(simd256<2>::gt(arg2, hiBound), hiBound, simd256<1>::ifh(simd256<2>::gt(arg2, loBound), arg2, loBound)));
2215}
2216
2217//The total number of operations is 322
2218template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packss(bitblock256_t arg1, bitblock256_t arg2)
2219{
2220        bitblock256_t hiBound = simd256<4>::srli<1>(simd256<4>::lomask());
2221        bitblock256_t loBound = simd_not(hiBound);
2222        return hsimd256<4>::packl(simd256<1>::ifh(simd256<4>::gt(arg1, hiBound), hiBound, simd256<1>::ifh(simd256<4>::gt(arg1, loBound), arg1, loBound)), simd256<1>::ifh(simd256<4>::gt(arg2, hiBound), hiBound, simd256<1>::ifh(simd256<4>::gt(arg2, loBound), arg2, loBound)));
2223}
2224
2225//The total number of operations is 150
2226template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packss(bitblock256_t arg1, bitblock256_t arg2)
2227{
2228        bitblock256_t hiBound = simd256<8>::srli<1>(simd256<8>::lomask());
2229        bitblock256_t loBound = simd_not(hiBound);
2230        return hsimd256<8>::packl(simd256<1>::ifh(simd256<8>::gt(arg1, hiBound), hiBound, simd256<1>::ifh(simd256<8>::gt(arg1, loBound), arg1, loBound)), simd256<1>::ifh(simd256<8>::gt(arg2, hiBound), hiBound, simd256<1>::ifh(simd256<8>::gt(arg2, loBound), arg2, loBound)));
2231}
2232
2233//The total number of operations is 8
2234template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packss(bitblock256_t arg1, bitblock256_t arg2)
2235{
2236        return avx_general_combine256(_mm_packs_epi16(avx_select_lo128(arg1), avx_select_hi128(arg1)),_mm_packs_epi16(avx_select_lo128(arg2), avx_select_hi128(arg2)));
2237}
2238
2239//The total number of operations is 8
2240template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packss(bitblock256_t arg1, bitblock256_t arg2)
2241{
2242        return avx_general_combine256(_mm_packs_epi32(avx_select_lo128(arg1), avx_select_hi128(arg1)),_mm_packs_epi32(avx_select_lo128(arg2), avx_select_hi128(arg2)));
2243}
2244
2245//The total number of operations is 266
2246template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packss(bitblock256_t arg1, bitblock256_t arg2)
2247{
2248        bitblock256_t hiBound = simd256<64>::srli<1>(simd256<64>::lomask());
2249        bitblock256_t loBound = simd_not(hiBound);
2250        return hsimd256<64>::packl(simd256<1>::ifh(simd256<64>::gt(arg1, hiBound), hiBound, simd256<1>::ifh(simd256<64>::gt(arg1, loBound), arg1, loBound)), simd256<1>::ifh(simd256<64>::gt(arg2, hiBound), hiBound, simd256<1>::ifh(simd256<64>::gt(arg2, loBound), arg2, loBound)));
2251}
2252
2253//The total number of operations is 763
2254template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packss(bitblock256_t arg1, bitblock256_t arg2)
2255{
2256        bitblock256_t hiBound = simd256<128>::srli<1>(simd256<128>::lomask());
2257        bitblock256_t loBound = simd_not(hiBound);
2258        return hsimd256<128>::packl(simd256<1>::ifh(simd256<128>::gt(arg1, hiBound), hiBound, simd256<1>::ifh(simd256<128>::gt(arg1, loBound), arg1, loBound)), simd256<1>::ifh(simd256<128>::gt(arg2, hiBound), hiBound, simd256<1>::ifh(simd256<128>::gt(arg2, loBound), arg2, loBound)));
2259}
2260
2261//The total number of operations is 2681
2262template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packss(bitblock256_t arg1, bitblock256_t arg2)
2263{
2264        bitblock256_t hiBound = simd256<256>::srli<1>(simd256<256>::lomask());
2265        bitblock256_t loBound = simd_not(hiBound);
2266        return hsimd256<256>::packl(simd256<1>::ifh(simd256<256>::gt(arg1, hiBound), hiBound, simd256<1>::ifh(simd256<256>::gt(arg1, loBound), arg1, loBound)), simd256<1>::ifh(simd256<256>::gt(arg2, hiBound), hiBound, simd256<1>::ifh(simd256<256>::gt(arg2, loBound), arg2, loBound)));
2267}
2268
2269//The total number of operations is 4
2270template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<8>::signmask(bitblock256_t arg1)
2271{
2272        return ((((uint64_t)_mm_movemask_epi8(((__m128i)avx_select_hi128(arg1))))<<16)|((uint64_t)_mm_movemask_epi8(((__m128i)avx_select_lo128(arg1)))));
2273}
2274
2275//The total number of operations is 24
2276template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<16>::signmask(bitblock256_t arg1)
2277{
2278        return hsimd256<(8)>::signmask(hsimd256<16>::packh(simd256<16>::constant<0>(), arg1));
2279}
2280
2281//The total number of operations is 44
2282template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<32>::signmask(bitblock256_t arg1)
2283{
2284        return hsimd256<(16)>::signmask(hsimd256<32>::packh(simd256<32>::constant<0>(), arg1));
2285}
2286
2287//The total number of operations is 271
2288template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<64>::signmask(bitblock256_t arg1)
2289{
2290        return hsimd256<(32)>::signmask(hsimd256<64>::packh(simd256<64>::constant<0>(), arg1));
2291}
2292
2293//The total number of operations is 586
2294template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<128>::signmask(bitblock256_t arg1)
2295{
2296        return hsimd256<(64)>::signmask(hsimd256<128>::packh(simd256<128>::constant<0>(), arg1));
2297}
2298
2299//The total number of operations is 630
2300template <> IDISA_ALWAYS_INLINE uint64_t hsimd256<256>::signmask(bitblock256_t arg1)
2301{
2302        return hsimd256<(128)>::signmask(hsimd256<256>::packh(simd256<256>::constant<0>(), arg1));
2303}
2304
2305//The total number of operations is 274
2306template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packl(bitblock256_t arg1, bitblock256_t arg2)
2307{
2308        return hsimd256<(4)>::packl(simd256<1>::ifh(simd256<2>::himask(), simd256<256>::srli<(1)>(arg1), arg1), simd256<1>::ifh(simd256<2>::himask(), simd256<256>::srli<(1)>(arg2), arg2));
2309}
2310
2311//The total number of operations is 186
2312template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packl(bitblock256_t arg1, bitblock256_t arg2)
2313{
2314        return hsimd256<(8)>::packl(simd256<1>::ifh(simd256<4>::himask(), simd256<256>::srli<(2)>(arg1), arg1), simd256<1>::ifh(simd256<4>::himask(), simd256<256>::srli<(2)>(arg2), arg2));
2315}
2316
2317//The total number of operations is 98
2318template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packl(bitblock256_t arg1, bitblock256_t arg2)
2319{
2320        return hsimd256<(16)>::packl(simd256<1>::ifh(simd256<8>::himask(), simd256<256>::srli<(4)>(arg1), arg1), simd256<1>::ifh(simd256<8>::himask(), simd256<256>::srli<(4)>(arg2), arg2));
2321}
2322
2323//The total number of operations is 10
2324template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packl(bitblock256_t arg1, bitblock256_t arg2)
2325{
2326        return hsimd256<16>::packus(simd_and(arg1, simd256<16>::lomask()), simd_and(arg2, simd256<16>::lomask()));
2327}
2328
2329//The total number of operations is 10
2330template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packl(bitblock256_t arg1, bitblock256_t arg2)
2331{
2332        return hsimd256<32>::packus(simd_and(arg1, simd256<32>::lomask()), simd_and(arg2, simd256<32>::lomask()));
2333}
2334
2335//The total number of operations is 215
2336template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packl(bitblock256_t arg1, bitblock256_t arg2)
2337{
2338        return hsimd256<(128)>::packl(simd256<1>::ifh(simd256<64>::himask(), simd256<256>::srli<(32)>(arg1), arg1), simd256<1>::ifh(simd256<64>::himask(), simd256<256>::srli<(32)>(arg2), arg2));
2339}
2340
2341//The total number of operations is 127
2342template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packl(bitblock256_t arg1, bitblock256_t arg2)
2343{
2344        return hsimd256<(256)>::packl(simd256<64>::ifh(simd256<128>::himask(), simd256<256>::srli<(64)>(arg1), arg1), simd256<64>::ifh(simd256<128>::himask(), simd256<256>::srli<(64)>(arg2), arg2));
2345}
2346
2347//The total number of operations is 43
2348template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packl(bitblock256_t arg1, bitblock256_t arg2)
2349{
2350        return simd256<1>::ifh(simd256<256>::himask(), simd256<256>::slli<(128)>(arg1), arg2);
2351}
2352
2353//The total number of operations is 286
2354template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packh(bitblock256_t arg1, bitblock256_t arg2)
2355{
2356        return hsimd256<2>::packl(simd256<64>::srli<(1)>(arg1), simd256<64>::srli<(1)>(arg2));
2357}
2358
2359//The total number of operations is 198
2360template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packh(bitblock256_t arg1, bitblock256_t arg2)
2361{
2362        return hsimd256<4>::packl(simd256<64>::srli<(2)>(arg1), simd256<64>::srli<(2)>(arg2));
2363}
2364
2365//The total number of operations is 110
2366template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packh(bitblock256_t arg1, bitblock256_t arg2)
2367{
2368        return hsimd256<8>::packl(simd256<64>::srli<(4)>(arg1), simd256<64>::srli<(4)>(arg2));
2369}
2370
2371//The total number of operations is 20
2372template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packh(bitblock256_t arg1, bitblock256_t arg2)
2373{
2374        return hsimd256<16>::packus(simd256<16>::srli<(8)>(arg1), simd256<16>::srli<(8)>(arg2));
2375}
2376
2377//The total number of operations is 20
2378template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packh(bitblock256_t arg1, bitblock256_t arg2)
2379{
2380        return hsimd256<32>::packus(simd256<32>::srli<(16)>(arg1), simd256<32>::srli<(16)>(arg2));
2381}
2382
2383//The total number of operations is 227
2384template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packh(bitblock256_t arg1, bitblock256_t arg2)
2385{
2386        return hsimd256<64>::packl(simd256<64>::srli<(32)>(arg1), simd256<64>::srli<(32)>(arg2));
2387}
2388
2389//The total number of operations is 315
2390template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packh(bitblock256_t arg1, bitblock256_t arg2)
2391{
2392        return hsimd256<128>::packus(simd256<128>::srli<(64)>(arg1), simd256<128>::srli<(64)>(arg2));
2393}
2394
2395//The total number of operations is 44
2396template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packh(bitblock256_t arg1, bitblock256_t arg2)
2397{
2398        return simd256<1>::ifh(simd256<256>::himask(), arg1, simd256<256>::srli<(128)>(arg2));
2399}
2400
2401//The total number of operations is 655
2402template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::min_hl(bitblock256_t arg1, bitblock256_t arg2)
2403{
2404        return simd256<(1)>::min(hsimd256<2>::packh(arg1, arg2), hsimd256<2>::packl(arg1, arg2));
2405}
2406
2407//The total number of operations is 431
2408template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::min_hl(bitblock256_t arg1, bitblock256_t arg2)
2409{
2410        return simd256<(2)>::min(hsimd256<4>::packh(arg1, arg2), hsimd256<4>::packl(arg1, arg2));
2411}
2412
2413//The total number of operations is 231
2414template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::min_hl(bitblock256_t arg1, bitblock256_t arg2)
2415{
2416        return simd256<(4)>::min(hsimd256<8>::packh(arg1, arg2), hsimd256<8>::packl(arg1, arg2));
2417}
2418
2419//The total number of operations is 38
2420template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::min_hl(bitblock256_t arg1, bitblock256_t arg2)
2421{
2422        return simd256<(8)>::min(hsimd256<16>::packh(arg1, arg2), hsimd256<16>::packl(arg1, arg2));
2423}
2424
2425//The total number of operations is 38
2426template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::min_hl(bitblock256_t arg1, bitblock256_t arg2)
2427{
2428        return simd256<(16)>::min(hsimd256<32>::packh(arg1, arg2), hsimd256<32>::packl(arg1, arg2));
2429}
2430
2431//The total number of operations is 450
2432template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::min_hl(bitblock256_t arg1, bitblock256_t arg2)
2433{
2434        return simd256<(32)>::min(hsimd256<64>::packh(arg1, arg2), hsimd256<64>::packl(arg1, arg2));
2435}
2436
2437//The total number of operations is 453
2438template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::min_hl(bitblock256_t arg1, bitblock256_t arg2)
2439{
2440        return simd256<(64)>::min(hsimd256<128>::packh(arg1, arg2), hsimd256<128>::packl(arg1, arg2));
2441}
2442
2443//The total number of operations is 175
2444template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::min_hl(bitblock256_t arg1, bitblock256_t arg2)
2445{
2446        return simd256<(128)>::min(hsimd256<256>::packh(arg1, arg2), hsimd256<256>::packl(arg1, arg2));
2447}
2448
2449//The total number of operations is 414
2450template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<2>::packus(bitblock256_t arg1, bitblock256_t arg2)
2451{
2452        bitblock256_t arg11 = simd256<2>::ifh(arg1, simd256<2>::constant<0>(), arg1);
2453        bitblock256_t arg12 = simd_and(simd256<2>::lomask(), arg11);
2454        bitblock256_t arg21 = simd256<2>::ifh(arg2, simd256<2>::constant<0>(), arg2);
2455        bitblock256_t arg22 = simd_and(simd256<2>::lomask(), arg21);
2456        return hsimd256<2>::packl(simd256<1>::ifh(simd256<2>::eq(arg12, arg11), arg12, simd256<2>::lomask()), simd256<1>::ifh(simd256<2>::eq(arg22, arg21), arg22, simd256<2>::lomask()));
2457}
2458
2459//The total number of operations is 286
2460template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<4>::packus(bitblock256_t arg1, bitblock256_t arg2)
2461{
2462        bitblock256_t arg11 = simd256<4>::ifh(arg1, simd256<4>::constant<0>(), arg1);
2463        bitblock256_t arg12 = simd_and(simd256<4>::lomask(), arg11);
2464        bitblock256_t arg21 = simd256<4>::ifh(arg2, simd256<4>::constant<0>(), arg2);
2465        bitblock256_t arg22 = simd_and(simd256<4>::lomask(), arg21);
2466        return hsimd256<4>::packl(simd256<1>::ifh(simd256<4>::eq(arg12, arg11), arg12, simd256<4>::lomask()), simd256<1>::ifh(simd256<4>::eq(arg22, arg21), arg22, simd256<4>::lomask()));
2467}
2468
2469//The total number of operations is 144
2470template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<8>::packus(bitblock256_t arg1, bitblock256_t arg2)
2471{
2472        bitblock256_t arg11 = simd256<8>::ifh(arg1, simd256<8>::constant<0>(), arg1);
2473        bitblock256_t arg12 = simd_and(simd256<8>::lomask(), arg11);
2474        bitblock256_t arg21 = simd256<8>::ifh(arg2, simd256<8>::constant<0>(), arg2);
2475        bitblock256_t arg22 = simd_and(simd256<8>::lomask(), arg21);
2476        return hsimd256<8>::packl(simd256<1>::ifh(simd256<8>::eq(arg12, arg11), arg12, simd256<8>::lomask()), simd256<1>::ifh(simd256<8>::eq(arg22, arg21), arg22, simd256<8>::lomask()));
2477}
2478
2479//The total number of operations is 8
2480template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<16>::packus(bitblock256_t arg1, bitblock256_t arg2)
2481{
2482        return avx_general_combine256(_mm_packus_epi16(avx_select_lo128(arg1), avx_select_hi128(arg1)),_mm_packus_epi16(avx_select_lo128(arg2), avx_select_hi128(arg2)));
2483}
2484
2485//The total number of operations is 8
2486template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<32>::packus(bitblock256_t arg1, bitblock256_t arg2)
2487{
2488        return avx_general_combine256(_mm_packus_epi32(avx_select_lo128(arg1), avx_select_hi128(arg1)),_mm_packus_epi32(avx_select_lo128(arg2), avx_select_hi128(arg2)));
2489}
2490
2491//The total number of operations is 241
2492template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<64>::packus(bitblock256_t arg1, bitblock256_t arg2)
2493{
2494        bitblock256_t arg11 = simd256<64>::ifh(arg1, simd256<64>::constant<0>(), arg1);
2495        bitblock256_t arg12 = simd_and(simd256<64>::lomask(), arg11);
2496        bitblock256_t arg21 = simd256<64>::ifh(arg2, simd256<64>::constant<0>(), arg2);
2497        bitblock256_t arg22 = simd_and(simd256<64>::lomask(), arg21);
2498        return hsimd256<64>::packl(simd256<1>::ifh(simd256<64>::eq(arg12, arg11), arg12, simd256<64>::lomask()), simd256<1>::ifh(simd256<64>::eq(arg22, arg21), arg22, simd256<64>::lomask()));
2499}
2500
2501//The total number of operations is 277
2502template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<128>::packus(bitblock256_t arg1, bitblock256_t arg2)
2503{
2504        bitblock256_t arg11 = simd256<128>::ifh(arg1, simd256<128>::constant<0>(), arg1);
2505        bitblock256_t arg12 = simd_and(simd256<128>::lomask(), arg11);
2506        bitblock256_t arg21 = simd256<128>::ifh(arg2, simd256<128>::constant<0>(), arg2);
2507        bitblock256_t arg22 = simd_and(simd256<128>::lomask(), arg21);
2508        return hsimd256<128>::packl(simd256<1>::ifh(simd256<128>::eq(arg12, arg11), arg12, simd256<128>::lomask()), simd256<1>::ifh(simd256<128>::eq(arg22, arg21), arg22, simd256<128>::lomask()));
2509}
2510
2511//The total number of operations is 262
2512template <> IDISA_ALWAYS_INLINE bitblock256_t hsimd256<256>::packus(bitblock256_t arg1, bitblock256_t arg2)
2513{
2514        bitblock256_t hiPart = hsimd256<256>::packh(arg1, arg2);
2515        return simd256<(128)>::ifh(hiPart, simd256<(128)>::constant<0>(), simd_or(simd256<(128)>::gt(hiPart, simd256<(128)>::constant<0>()), hsimd256<256>::packl(arg1, arg2)));
2516}
2517
2518//The total number of operations is 64
2519template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::mergel(bitblock256_t arg1, bitblock256_t arg2)
2520{
2521        return esimd256<(2)>::mergel(simd256<1>::ifh(simd256<(2)>::himask(), arg1, simd256<(2)>::srli<1>(arg2)), simd256<1>::ifh(simd256<(2)>::himask(), simd256<(2)>::slli<1>(arg1), arg2));
2522}
2523
2524//The total number of operations is 44
2525template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::mergel(bitblock256_t arg1, bitblock256_t arg2)
2526{
2527        return esimd256<(4)>::mergel(simd256<1>::ifh(simd256<(4)>::himask(), arg1, simd256<(4)>::srli<2>(arg2)), simd256<1>::ifh(simd256<(4)>::himask(), simd256<(4)>::slli<2>(arg1), arg2));
2528}
2529
2530//The total number of operations is 24
2531template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::mergel(bitblock256_t arg1, bitblock256_t arg2)
2532{
2533        return esimd256<(8)>::mergel(simd256<1>::ifh(simd256<(8)>::himask(), arg1, simd256<(8)>::srli<4>(arg2)), simd256<1>::ifh(simd256<(8)>::himask(), simd256<(8)>::slli<4>(arg1), arg2));
2534}
2535
2536//The total number of operations is 4
2537template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::mergel(bitblock256_t arg1, bitblock256_t arg2)
2538{
2539        __m128i loPart2 = avx_select_lo128(arg2);
2540        __m128i loPart1 = avx_select_lo128(arg1);
2541        return avx_general_combine256(_mm_unpackhi_epi8(loPart2, loPart1), _mm_unpacklo_epi8(loPart2, loPart1));
2542}
2543
2544//The total number of operations is 4
2545template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::mergel(bitblock256_t arg1, bitblock256_t arg2)
2546{
2547        __m128i loPart2 = avx_select_lo128(arg2);
2548        __m128i loPart1 = avx_select_lo128(arg1);
2549        return avx_general_combine256(_mm_unpackhi_epi16(loPart2, loPart1), _mm_unpacklo_epi16(loPart2, loPart1));
2550}
2551
2552//The total number of operations is 4
2553template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::mergel(bitblock256_t arg1, bitblock256_t arg2)
2554{
2555        __m128i loPart2 = avx_select_lo128(arg2);
2556        __m128i loPart1 = avx_select_lo128(arg1);
2557        return avx_general_combine256(_mm_unpackhi_epi32(loPart2, loPart1), _mm_unpacklo_epi32(loPart2, loPart1));
2558}
2559
2560//The total number of operations is 4
2561template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::mergel(bitblock256_t arg1, bitblock256_t arg2)
2562{
2563        __m128i loPart2 = avx_select_lo128(arg2);
2564        __m128i loPart1 = avx_select_lo128(arg1);
2565        return avx_general_combine256(_mm_unpackhi_epi64(loPart2, loPart1), _mm_unpacklo_epi64(loPart2, loPart1));
2566}
2567
2568//The total number of operations is 48
2569template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::mergel(bitblock256_t arg1, bitblock256_t arg2)
2570{
2571        return esimd256<(64)>::mergel(simd256<1>::ifh(simd256<128>::himask(), arg1, simd256<128>::srli<(64)>(arg2)), simd256<1>::ifh(simd256<128>::himask(), simd256<128>::slli<(64)>(arg1), arg2));
2572}
2573
2574//The total number of operations is 64
2575template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::mergeh(bitblock256_t arg1, bitblock256_t arg2)
2576{
2577        return esimd256<(2)>::mergeh(simd256<1>::ifh(simd256<(2)>::himask(), arg1, simd256<(2)>::srli<1>(arg2)), simd256<1>::ifh(simd256<(2)>::himask(), simd256<(2)>::slli<1>(arg1), arg2));
2578}
2579
2580//The total number of operations is 44
2581template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::mergeh(bitblock256_t arg1, bitblock256_t arg2)
2582{
2583        return esimd256<(4)>::mergeh(simd256<1>::ifh(simd256<(4)>::himask(), arg1, simd256<(4)>::srli<2>(arg2)), simd256<1>::ifh(simd256<(4)>::himask(), simd256<(4)>::slli<2>(arg1), arg2));
2584}
2585
2586//The total number of operations is 24
2587template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::mergeh(bitblock256_t arg1, bitblock256_t arg2)
2588{
2589        return esimd256<(8)>::mergeh(simd256<1>::ifh(simd256<(8)>::himask(), arg1, simd256<(8)>::srli<4>(arg2)), simd256<1>::ifh(simd256<(8)>::himask(), simd256<(8)>::slli<4>(arg1), arg2));
2590}
2591
2592//The total number of operations is 4
2593template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::mergeh(bitblock256_t arg1, bitblock256_t arg2)
2594{
2595        __m128i hiPart2 = avx_select_hi128(arg2);
2596        __m128i hiPart1 = avx_select_hi128(arg1);
2597        return avx_general_combine256(_mm_unpackhi_epi8(hiPart2, hiPart1), _mm_unpacklo_epi8(hiPart2, hiPart1));
2598}
2599
2600//The total number of operations is 4
2601template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::mergeh(bitblock256_t arg1, bitblock256_t arg2)
2602{
2603        __m128i hiPart2 = avx_select_hi128(arg2);
2604        __m128i hiPart1 = avx_select_hi128(arg1);
2605        return avx_general_combine256(_mm_unpackhi_epi16(hiPart2, hiPart1), _mm_unpacklo_epi16(hiPart2, hiPart1));
2606}
2607
2608//The total number of operations is 4
2609template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::mergeh(bitblock256_t arg1, bitblock256_t arg2)
2610{
2611        __m128i hiPart2 = avx_select_hi128(arg2);
2612        __m128i hiPart1 = avx_select_hi128(arg1);
2613        return avx_general_combine256(_mm_unpackhi_epi32(hiPart2, hiPart1), _mm_unpacklo_epi32(hiPart2, hiPart1));
2614}
2615
2616//The total number of operations is 4
2617template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::mergeh(bitblock256_t arg1, bitblock256_t arg2)
2618{
2619        __m128i hiPart2 = avx_select_hi128(arg2);
2620        __m128i hiPart1 = avx_select_hi128(arg1);
2621        return avx_general_combine256(_mm_unpackhi_epi64(hiPart2, hiPart1), _mm_unpacklo_epi64(hiPart2, hiPart1));
2622}
2623
2624//The total number of operations is 48
2625template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::mergeh(bitblock256_t arg1, bitblock256_t arg2)
2626{
2627        return esimd256<(64)>::mergeh(simd256<1>::ifh(simd256<128>::himask(), arg1, simd256<128>::srli<(64)>(arg2)), simd256<1>::ifh(simd256<128>::himask(), simd256<128>::slli<(64)>(arg1), arg2));
2628}
2629
2630//The total number of operations is 52
2631template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::zeroextendh(bitblock256_t arg1)
2632{
2633        return esimd256<(2)>::mergeh(simd256<(2)>::srli<1>(arg1), simd_and(simd256<(2)>::lomask(), arg1));
2634}
2635
2636//The total number of operations is 32
2637template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::zeroextendh(bitblock256_t arg1)
2638{
2639        return esimd256<(4)>::mergeh(simd256<(4)>::srli<2>(arg1), simd_and(simd256<(4)>::lomask(), arg1));
2640}
2641
2642//The total number of operations is 12
2643template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::zeroextendh(bitblock256_t arg1)
2644{
2645        return esimd256<(8)>::mergeh(simd256<(8)>::srli<4>(arg1), simd_and(simd256<(8)>::lomask(), arg1));
2646}
2647
2648//The total number of operations is 11
2649template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::zeroextendh(bitblock256_t arg1)
2650{
2651        return esimd256<(16)>::mergeh(simd256<(16)>::srli<8>(arg1), simd_and(simd256<(16)>::lomask(), arg1));
2652}
2653
2654//The total number of operations is 11
2655template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::zeroextendh(bitblock256_t arg1)
2656{
2657        return esimd256<(32)>::mergeh(simd256<(32)>::srli<16>(arg1), simd_and(simd256<(32)>::lomask(), arg1));
2658}
2659
2660//The total number of operations is 11
2661template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::zeroextendh(bitblock256_t arg1)
2662{
2663        return esimd256<(64)>::mergeh(simd256<(64)>::srli<32>(arg1), simd_and(simd256<(64)>::lomask(), arg1));
2664}
2665
2666//The total number of operations is 68
2667template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::zeroextendh(bitblock256_t arg1)
2668{
2669        return esimd256<(128)>::mergeh(simd256<(128)>::srli<64>(arg1), simd_and(simd256<(128)>::lomask(), arg1));
2670}
2671
2672//The total number of operations is 41
2673template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::zeroextendh(bitblock256_t arg1)
2674{
2675        return simd256<(256)>::srli<128>(arg1);
2676}
2677
2678//The total number of operations is 52
2679template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::zeroextendl(bitblock256_t arg1)
2680{
2681        return esimd256<(2)>::mergel(simd256<(2)>::srli<1>(arg1), simd_and(simd256<(2)>::lomask(), arg1));
2682}
2683
2684//The total number of operations is 32
2685template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::zeroextendl(bitblock256_t arg1)
2686{
2687        return esimd256<(4)>::mergel(simd256<(4)>::srli<2>(arg1), simd_and(simd256<(4)>::lomask(), arg1));
2688}
2689
2690//The total number of operations is 12
2691template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::zeroextendl(bitblock256_t arg1)
2692{
2693        return esimd256<(8)>::mergel(simd256<(8)>::srli<4>(arg1), simd_and(simd256<(8)>::lomask(), arg1));
2694}
2695
2696//The total number of operations is 11
2697template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::zeroextendl(bitblock256_t arg1)
2698{
2699        return esimd256<(16)>::mergel(simd256<(16)>::srli<8>(arg1), simd_and(simd256<(16)>::lomask(), arg1));
2700}
2701
2702//The total number of operations is 11
2703template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::zeroextendl(bitblock256_t arg1)
2704{
2705        return esimd256<(32)>::mergel(simd256<(32)>::srli<16>(arg1), simd_and(simd256<(32)>::lomask(), arg1));
2706}
2707
2708//The total number of operations is 11
2709template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::zeroextendl(bitblock256_t arg1)
2710{
2711        return esimd256<(64)>::mergel(simd256<(64)>::srli<32>(arg1), simd_and(simd256<(64)>::lomask(), arg1));
2712}
2713
2714//The total number of operations is 68
2715template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::zeroextendl(bitblock256_t arg1)
2716{
2717        return esimd256<(128)>::mergel(simd256<(128)>::srli<64>(arg1), simd_and(simd256<(128)>::lomask(), arg1));
2718}
2719
2720//The total number of operations is 1
2721template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::zeroextendl(bitblock256_t arg1)
2722{
2723        return simd_and(simd256<(256)>::lomask(), arg1);
2724}
2725
2726//The total number of operations is 69
2727template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::signextendh(bitblock256_t arg1)
2728{
2729        return esimd256<(2)>::mergeh(simd256<(2)>::srai<1>(arg1), simd256<(2)>::srai<1>(simd256<(2)>::slli<1>(arg1)));
2730}
2731
2732//The total number of operations is 89
2733template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::signextendh(bitblock256_t arg1)
2734{
2735        return esimd256<(4)>::mergeh(simd256<(4)>::srai<2>(arg1), simd256<(4)>::srai<2>(simd256<(4)>::slli<2>(arg1)));
2736}
2737
2738//The total number of operations is 45
2739template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::signextendh(bitblock256_t arg1)
2740{
2741        return esimd256<(8)>::mergeh(simd256<(8)>::srai<4>(arg1), simd256<(8)>::srai<4>(simd256<(8)>::slli<4>(arg1)));
2742}
2743
2744//The total number of operations is 22
2745template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::signextendh(bitblock256_t arg1)
2746{
2747        return esimd256<(16)>::mergeh(simd256<(16)>::srai<8>(arg1), simd256<(16)>::srai<8>(simd256<(16)>::slli<8>(arg1)));
2748}
2749
2750//The total number of operations is 22
2751template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::signextendh(bitblock256_t arg1)
2752{
2753        return esimd256<(32)>::mergeh(simd256<(32)>::srai<16>(arg1), simd256<(32)>::srai<16>(simd256<(32)>::slli<16>(arg1)));
2754}
2755
2756//The total number of operations is 54
2757template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::signextendh(bitblock256_t arg1)
2758{
2759        return esimd256<(64)>::mergeh(simd256<(64)>::srai<32>(arg1), simd256<(64)>::srai<32>(simd256<(64)>::slli<32>(arg1)));
2760}
2761
2762//The total number of operations is 235
2763template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::signextendh(bitblock256_t arg1)
2764{
2765        return esimd256<(128)>::mergeh(simd256<(128)>::srai<64>(arg1), simd256<(128)>::srai<64>(simd256<(128)>::slli<64>(arg1)));
2766}
2767
2768//The total number of operations is 220
2769template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::signextendh(bitblock256_t arg1)
2770{
2771        return simd256<(256)>::srai<128>(arg1);
2772}
2773
2774//The total number of operations is 69
2775template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<1>::signextendl(bitblock256_t arg1)
2776{
2777        return esimd256<(2)>::mergel(simd256<(2)>::srai<1>(arg1), simd256<(2)>::srai<1>(simd256<(2)>::slli<1>(arg1)));
2778}
2779
2780//The total number of operations is 89
2781template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<2>::signextendl(bitblock256_t arg1)
2782{
2783        return esimd256<(4)>::mergel(simd256<(4)>::srai<2>(arg1), simd256<(4)>::srai<2>(simd256<(4)>::slli<2>(arg1)));
2784}
2785
2786//The total number of operations is 45
2787template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<4>::signextendl(bitblock256_t arg1)
2788{
2789        return esimd256<(8)>::mergel(simd256<(8)>::srai<4>(arg1), simd256<(8)>::srai<4>(simd256<(8)>::slli<4>(arg1)));
2790}
2791
2792//The total number of operations is 22
2793template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<8>::signextendl(bitblock256_t arg1)
2794{
2795        return esimd256<(16)>::mergel(simd256<(16)>::srai<8>(arg1), simd256<(16)>::srai<8>(simd256<(16)>::slli<8>(arg1)));
2796}
2797
2798//The total number of operations is 22
2799template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<16>::signextendl(bitblock256_t arg1)
2800{
2801        return esimd256<(32)>::mergel(simd256<(32)>::srai<16>(arg1), simd256<(32)>::srai<16>(simd256<(32)>::slli<16>(arg1)));
2802}
2803
2804//The total number of operations is 54
2805template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<32>::signextendl(bitblock256_t arg1)
2806{
2807        return esimd256<(64)>::mergel(simd256<(64)>::srai<32>(arg1), simd256<(64)>::srai<32>(simd256<(64)>::slli<32>(arg1)));
2808}
2809
2810//The total number of operations is 235
2811template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<64>::signextendl(bitblock256_t arg1)
2812{
2813        return esimd256<(128)>::mergel(simd256<(128)>::srai<64>(arg1), simd256<(128)>::srai<64>(simd256<(128)>::slli<64>(arg1)));
2814}
2815
2816//The total number of operations is 260
2817template <> IDISA_ALWAYS_INLINE bitblock256_t esimd256<128>::signextendl(bitblock256_t arg1)
2818{
2819        return simd256<(256)>::srai<128>(simd256<(256)>::slli<128>(arg1));
2820}
2821
2822//The total number of operations is 82
2823template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dsrli(bitblock256_t arg1, bitblock256_t arg2)
2824{
2825        return simd_or(mvmd256<2>::srli<sh>(arg1), mvmd256<2>::slli<((128)-sh)>(arg2));
2826}
2827
2828//The total number of operations is 82
2829template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::dsrli(bitblock256_t arg1, bitblock256_t arg2)
2830{
2831        return simd_or(mvmd256<4>::srli<sh>(arg1), mvmd256<4>::slli<((64)-sh)>(arg2));
2832}
2833
2834//The total number of operations is 82
2835template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::dsrli(bitblock256_t arg1, bitblock256_t arg2)
2836{
2837        return simd_or(mvmd256<8>::srli<sh>(arg1), mvmd256<8>::slli<((32)-sh)>(arg2));
2838}
2839
2840//The total number of operations is 82
2841template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::dsrli(bitblock256_t arg1, bitblock256_t arg2)
2842{
2843        return simd_or(mvmd256<16>::srli<sh>(arg1), mvmd256<16>::slli<((16)-sh)>(arg2));
2844}
2845
2846//The total number of operations is 82
2847template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::dsrli(bitblock256_t arg1, bitblock256_t arg2)
2848{
2849        return simd_or(mvmd256<32>::srli<sh>(arg1), mvmd256<32>::slli<((8)-sh)>(arg2));
2850}
2851
2852//The total number of operations is 82
2853template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::dsrli(bitblock256_t arg1, bitblock256_t arg2)
2854{
2855        return simd_or(mvmd256<64>::srli<sh>(arg1), mvmd256<64>::slli<((4)-sh)>(arg2));
2856}
2857
2858//The total number of operations is 82
2859template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dsrli(bitblock256_t arg1, bitblock256_t arg2)
2860{
2861        return simd_or(mvmd256<128>::srli<sh>(arg1), mvmd256<128>::slli<((2)-sh)>(arg2));
2862}
2863
2864//The total number of operations is 82
2865template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dsrli(bitblock256_t arg1, bitblock256_t arg2)
2866{
2867        return simd_or(mvmd256<256>::srli<sh>(arg1), mvmd256<256>::slli<((1)-sh)>(arg2));
2868}
2869
2870//The total number of operations is 1
2871template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill(uint64_t val1)
2872{
2873        return mvmd256<32>::fill((-1*val1));
2874}
2875
2876//The total number of operations is 1
2877template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill(uint64_t val1)
2878{
2879        return mvmd256<(4)>::fill(((val1<<2)|val1));
2880}
2881
2882//The total number of operations is 1
2883template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill(uint64_t val1)
2884{
2885        return mvmd256<(8)>::fill(((val1<<4)|val1));
2886}
2887
2888//The total number of operations is 1
2889template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill(uint64_t val1)
2890{
2891        return (bitblock256_t)_mm256_set1_epi8((int32_t)(val1));
2892}
2893
2894//The total number of operations is 1
2895template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill(uint64_t val1)
2896{
2897        return (bitblock256_t)_mm256_set1_epi16((int32_t)(val1));
2898}
2899
2900//The total number of operations is 1
2901template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill(uint64_t val1)
2902{
2903        return (bitblock256_t)_mm256_set1_epi32((int32_t)(val1));
2904}
2905
2906//The total number of operations is 2
2907template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<1>::extract(bitblock256_t arg1)
2908{
2909        return (((pos%2) == 0) ? (mvmd256<(2)>::extract<(pos/2)>(arg1)&(1)) : (mvmd256<(2)>::extract<(pos/2)>(arg1)>>1));
2910}
2911
2912//The total number of operations is 2
2913template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<2>::extract(bitblock256_t arg1)
2914{
2915        return (((pos%2) == 0) ? (mvmd256<(4)>::extract<(pos/2)>(arg1)&(3)) : (mvmd256<(4)>::extract<(pos/2)>(arg1)>>2));
2916}
2917
2918//The total number of operations is 2
2919template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<4>::extract(bitblock256_t arg1)
2920{
2921        return (((pos%2) == 0) ? (mvmd256<(8)>::extract<(pos/2)>(arg1)&(15)) : (mvmd256<(8)>::extract<(pos/2)>(arg1)>>4));
2922}
2923
2924//The total number of operations is 2
2925template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<8>::extract(bitblock256_t arg1)
2926{
2927        return (((pos%2) == 0) ? (mvmd256<(16)>::extract<(pos/2)>(arg1)&(255)) : (mvmd256<(16)>::extract<(pos/2)>(arg1)>>8));
2928}
2929
2930//The total number of operations is 2
2931template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<16>::extract(bitblock256_t arg1)
2932{
2933        return ((pos < 8) ? (65535&_mm_extract_epi16(avx_select_lo128(arg1), (int32_t)(pos))) : (65535&_mm_extract_epi16(avx_select_hi128(arg1), (int32_t)((pos-8)))));
2934}
2935
2936//The total number of operations is 2
2937template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<32>::extract(bitblock256_t arg1)
2938{
2939        return ((pos < 4) ? (((uint64_t)((4294967296UL)-1))&_mm_extract_epi32(avx_select_lo128(arg1), (int32_t)(pos))) : (((uint64_t)((4294967296UL)-1))&_mm_extract_epi32(avx_select_hi128(arg1), (int32_t)((pos-4)))));
2940}
2941
2942//The total number of operations is 4
2943template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd256<64>::extract(bitblock256_t arg1)
2944{
2945        return ((((uint64_t)mvmd256<(32)>::extract<((2*pos)+1)>(arg1))<<(32))|mvmd256<(32)>::extract<(2*pos)>(arg1));
2946}
2947
2948//The total number of operations is 30
2949template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::splat(bitblock256_t arg1)
2950{
2951        bitblock256_t tmpArg = (((pos%2) == 0) ? simd256<(2)>::slli<1>(arg1) : simd256<(2)>::srli<1>(arg1));
2952        bitblock256_t arg11 = (((pos%2) == 0) ? simd_and(simd256<(2)>::lomask(), arg1) : simd_and(simd256<(2)>::himask(), arg1));
2953        return mvmd256<(2)>::splat<(pos/2)>(simd_or(tmpArg, arg11));
2954}
2955
2956//The total number of operations is 21
2957template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::splat(bitblock256_t arg1)
2958{
2959        bitblock256_t tmpArg = (((pos%2) == 0) ? simd256<(4)>::slli<2>(arg1) : simd256<(4)>::srli<2>(arg1));
2960        bitblock256_t arg11 = (((pos%2) == 0) ? simd_and(simd256<(4)>::lomask(), arg1) : simd_and(simd256<(4)>::himask(), arg1));
2961        return mvmd256<(4)>::splat<(pos/2)>(simd_or(tmpArg, arg11));
2962}
2963
2964//The total number of operations is 12
2965template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::splat(bitblock256_t arg1)
2966{
2967        bitblock256_t tmpArg = (((pos%2) == 0) ? simd256<(8)>::slli<4>(arg1) : simd256<(8)>::srli<4>(arg1));
2968        bitblock256_t arg11 = (((pos%2) == 0) ? simd_and(simd256<(8)>::lomask(), arg1) : simd_and(simd256<(8)>::himask(), arg1));
2969        return mvmd256<(8)>::splat<(pos/2)>(simd_or(tmpArg, arg11));
2970}
2971
2972//The total number of operations is 3
2973template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::splat(bitblock256_t arg1)
2974{
2975        return ((pos < 16) ? mvmd256<8>::fill(_mm_extract_epi8(avx_select_lo128(arg1), (int32_t)(pos))) : mvmd256<8>::fill(_mm_extract_epi8(avx_select_hi128(arg1), (int32_t)((pos-16)))));
2976}
2977
2978//The total number of operations is 3
2979template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::splat(bitblock256_t arg1)
2980{
2981        return ((pos < 8) ? mvmd256<16>::fill(_mm_extract_epi16(avx_select_lo128(arg1), (int32_t)(pos))) : mvmd256<16>::fill(_mm_extract_epi16(avx_select_hi128(arg1), (int32_t)((pos-8)))));
2982}
2983
2984//The total number of operations is 3
2985template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::splat(bitblock256_t arg1)
2986{
2987        return ((pos < 4) ? mvmd256<32>::fill(_mm_extract_epi32(avx_select_lo128(arg1), (int32_t)(pos))) : mvmd256<32>::fill(_mm_extract_epi32(avx_select_hi128(arg1), (int32_t)((pos-4)))));
2988}
2989
2990//The total number of operations is 9
2991template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::splat(bitblock256_t arg1)
2992{
2993        return simd256<1>::ifh(simd256<64>::himask(), mvmd256<(32)>::splat<((2*pos)+1)>(arg1), mvmd256<(32)>::splat<(2*pos)>(arg1));
2994}
2995
2996//The total number of operations is 21
2997template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::splat(bitblock256_t arg1)
2998{
2999        return simd256<1>::ifh(simd256<128>::himask(), mvmd256<(64)>::splat<((2*pos)+1)>(arg1), mvmd256<(64)>::splat<(2*pos)>(arg1));
3000}
3001
3002//The total number of operations is 45
3003template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::splat(bitblock256_t arg1)
3004{
3005        return simd256<1>::ifh(simd256<256>::himask(), mvmd256<(128)>::splat<((2*pos)+1)>(arg1), mvmd256<(128)>::splat<(2*pos)>(arg1));
3006}
3007
3008//The total number of operations is 15
3009template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16)
3010{
3011        return simd_or(mvmd256<(2)>::fill16((val1<<1), (val3<<1), (val5<<1), (val7<<1), (val9<<1), (val11<<1), (val13<<1), (val15<<1), (val1<<1), (val3<<1), (val5<<1), (val7<<1), (val9<<1), (val11<<1), (val13<<1), (val15<<1)), mvmd256<(2)>::fill16((val2&(1)), (val4&(1)), (val6&(1)), (val8&(1)), (val10&(1)), (val12&(1)), (val14&(1)), (val16&(1)), (val2&(1)), (val4&(1)), (val6&(1)), (val8&(1)), (val10&(1)), (val12&(1)), (val14&(1)), (val16&(1))));
3012}
3013
3014//The total number of operations is 7
3015template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16)
3016{
3017        return simd_or(mvmd256<(4)>::fill16((val1<<2), (val3<<2), (val5<<2), (val7<<2), (val9<<2), (val11<<2), (val13<<2), (val15<<2), (val1<<2), (val3<<2), (val5<<2), (val7<<2), (val9<<2), (val11<<2), (val13<<2), (val15<<2)), mvmd256<(4)>::fill16((val2&(3)), (val4&(3)), (val6&(3)), (val8&(3)), (val10&(3)), (val12&(3)), (val14&(3)), (val16&(3)), (val2&(3)), (val4&(3)), (val6&(3)), (val8&(3)), (val10&(3)), (val12&(3)), (val14&(3)), (val16&(3))));
3018}
3019
3020//The total number of operations is 3
3021template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16)
3022{
3023        return simd_or(mvmd256<(8)>::fill16((val1<<4), (val3<<4), (val5<<4), (val7<<4), (val9<<4), (val11<<4), (val13<<4), (val15<<4), (val1<<4), (val3<<4), (val5<<4), (val7<<4), (val9<<4), (val11<<4), (val13<<4), (val15<<4)), mvmd256<(8)>::fill16((val2&(15)), (val4&(15)), (val6&(15)), (val8&(15)), (val10&(15)), (val12&(15)), (val14&(15)), (val16&(15)), (val2&(15)), (val4&(15)), (val6&(15)), (val8&(15)), (val10&(15)), (val12&(15)), (val14&(15)), (val16&(15))));
3024}
3025
3026//The total number of operations is 1
3027template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16)
3028{
3029        return (bitblock256_t)_mm256_set_epi8((int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4), (int32_t)(val5), (int32_t)(val6), (int32_t)(val7), (int32_t)(val8), (int32_t)(val9), (int32_t)(val10), (int32_t)(val11), (int32_t)(val12), (int32_t)(val13), (int32_t)(val14), (int32_t)(val15), (int32_t)(val16), (int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4), (int32_t)(val5), (int32_t)(val6), (int32_t)(val7), (int32_t)(val8), (int32_t)(val9), (int32_t)(val10), (int32_t)(val11), (int32_t)(val12), (int32_t)(val13), (int32_t)(val14), (int32_t)(val15), (int32_t)(val16));
3030}
3031
3032//The total number of operations is 5
3033template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16)
3034{
3035        return simd256<1>::ifh(simd256<(256)>::himask(), mvmd256<16>::fill8(val1, val2, val3, val4, val5, val6, val7, val8), mvmd256<16>::fill8(val9, val10, val11, val12, val13, val14, val15, val16));
3036}
3037
3038//The total number of operations is 40
3039template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::slli(bitblock256_t arg1)
3040{
3041        return simd256<256>::slli<(sh*2)>(arg1);
3042}
3043
3044//The total number of operations is 40
3045template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::slli(bitblock256_t arg1)
3046{
3047        return simd256<256>::slli<(sh*4)>(arg1);
3048}
3049
3050//The total number of operations is 40
3051template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::slli(bitblock256_t arg1)
3052{
3053        return simd256<256>::slli<(sh*8)>(arg1);
3054}
3055
3056//The total number of operations is 40
3057template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::slli(bitblock256_t arg1)
3058{
3059        return simd256<256>::slli<(sh*16)>(arg1);
3060}
3061
3062//The total number of operations is 40
3063template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::slli(bitblock256_t arg1)
3064{
3065        return simd256<256>::slli<(sh*32)>(arg1);
3066}
3067
3068//The total number of operations is 40
3069template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::slli(bitblock256_t arg1)
3070{
3071        return simd256<256>::slli<(sh*64)>(arg1);
3072}
3073
3074//The total number of operations is 40
3075template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::slli(bitblock256_t arg1)
3076{
3077        return simd256<256>::slli<(sh*128)>(arg1);
3078}
3079
3080//The total number of operations is 40
3081template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::slli(bitblock256_t arg1)
3082{
3083        return simd256<256>::slli<(sh*256)>(arg1);
3084}
3085
3086//The total number of operations is 5
3087template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
3088{
3089        return simd256<1>::ifh(simd256<(4)>::himask(), mvmd256<1>::fill2(val1, val2), mvmd256<1>::fill2(val3, val4));
3090}
3091
3092//The total number of operations is 5
3093template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
3094{
3095        return simd256<1>::ifh(simd256<(8)>::himask(), mvmd256<2>::fill2(val1, val2), mvmd256<2>::fill2(val3, val4));
3096}
3097
3098//The total number of operations is 5
3099template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
3100{
3101        return simd256<1>::ifh(simd256<(16)>::himask(), mvmd256<4>::fill2(val1, val2), mvmd256<4>::fill2(val3, val4));
3102}
3103
3104//The total number of operations is 5
3105template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
3106{
3107        return simd256<1>::ifh(simd256<(32)>::himask(), mvmd256<8>::fill2(val1, val2), mvmd256<8>::fill2(val3, val4));
3108}
3109
3110//The total number of operations is 3
3111template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
3112{
3113        return simd_or(mvmd256<(32)>::fill4((val1<<16), (val3<<16), (val1<<16), (val3<<16)), mvmd256<(32)>::fill4((val2&(65535)), (val4&(65535)), (val2&(65535)), (val4&(65535))));
3114}
3115
3116//The total number of operations is 1
3117template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
3118{
3119        return (bitblock256_t)_mm256_set_epi32((int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4), (int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4));
3120}
3121
3122//The total number of operations is 41
3123template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::srli(bitblock256_t arg1)
3124{
3125        return simd256<256>::srli<(sh*2)>(arg1);
3126}
3127
3128//The total number of operations is 41
3129template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::srli(bitblock256_t arg1)
3130{
3131        return simd256<256>::srli<(sh*4)>(arg1);
3132}
3133
3134//The total number of operations is 41
3135template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::srli(bitblock256_t arg1)
3136{
3137        return simd256<256>::srli<(sh*8)>(arg1);
3138}
3139
3140//The total number of operations is 41
3141template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::srli(bitblock256_t arg1)
3142{
3143        return simd256<256>::srli<(sh*16)>(arg1);
3144}
3145
3146//The total number of operations is 41
3147template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::srli(bitblock256_t arg1)
3148{
3149        return simd256<256>::srli<(sh*32)>(arg1);
3150}
3151
3152//The total number of operations is 41
3153template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::srli(bitblock256_t arg1)
3154{
3155        return simd256<256>::srli<(sh*64)>(arg1);
3156}
3157
3158//The total number of operations is 41
3159template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::srli(bitblock256_t arg1)
3160{
3161        return simd256<256>::srli<(sh*128)>(arg1);
3162}
3163
3164//The total number of operations is 41
3165template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::srli(bitblock256_t arg1)
3166{
3167        return simd256<256>::srli<(sh*256)>(arg1);
3168}
3169
3170//The total number of operations is 1
3171template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill2(uint64_t val1, uint64_t val2)
3172{
3173        return mvmd256<(2)>::fill(((val1<<1)|(val2&(1))));
3174}
3175
3176//The total number of operations is 1
3177template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill2(uint64_t val1, uint64_t val2)
3178{
3179        return mvmd256<(4)>::fill(((val1<<2)|(val2&(3))));
3180}
3181
3182//The total number of operations is 1
3183template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill2(uint64_t val1, uint64_t val2)
3184{
3185        return mvmd256<(8)>::fill(((val1<<4)|(val2&(15))));
3186}
3187
3188//The total number of operations is 1
3189template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill2(uint64_t val1, uint64_t val2)
3190{
3191        return mvmd256<(16)>::fill(((val1<<8)|(val2&(255))));
3192}
3193
3194//The total number of operations is 1
3195template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill2(uint64_t val1, uint64_t val2)
3196{
3197        return mvmd256<(32)>::fill(((val1<<16)|(val2&(65535))));
3198}
3199
3200//The total number of operations is 5
3201template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill2(uint64_t val1, uint64_t val2)
3202{
3203        return simd256<1>::ifh(simd256<(64)>::himask(), mvmd256<32>::fill(val1), mvmd256<32>::fill(val2));
3204}
3205
3206//The total number of operations is 82
3207template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::dslli(bitblock256_t arg1, bitblock256_t arg2)
3208{
3209        return simd_or(mvmd256<2>::slli<sh>(arg1), mvmd256<2>::srli<((128)-sh)>(arg2));
3210}
3211
3212//The total number of operations is 82
3213template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::dslli(bitblock256_t arg1, bitblock256_t arg2)
3214{
3215        return simd_or(mvmd256<4>::slli<sh>(arg1), mvmd256<4>::srli<((64)-sh)>(arg2));
3216}
3217
3218//The total number of operations is 82
3219template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::dslli(bitblock256_t arg1, bitblock256_t arg2)
3220{
3221        return simd_or(mvmd256<8>::slli<sh>(arg1), mvmd256<8>::srli<((32)-sh)>(arg2));
3222}
3223
3224//The total number of operations is 82
3225template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::dslli(bitblock256_t arg1, bitblock256_t arg2)
3226{
3227        return simd_or(mvmd256<16>::slli<sh>(arg1), mvmd256<16>::srli<((16)-sh)>(arg2));
3228}
3229
3230//The total number of operations is 82
3231template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::dslli(bitblock256_t arg1, bitblock256_t arg2)
3232{
3233        return simd_or(mvmd256<32>::slli<sh>(arg1), mvmd256<32>::srli<((8)-sh)>(arg2));
3234}
3235
3236//The total number of operations is 82
3237template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<64>::dslli(bitblock256_t arg1, bitblock256_t arg2)
3238{
3239        return simd_or(mvmd256<64>::slli<sh>(arg1), mvmd256<64>::srli<((4)-sh)>(arg2));
3240}
3241
3242//The total number of operations is 82
3243template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<128>::dslli(bitblock256_t arg1, bitblock256_t arg2)
3244{
3245        return simd_or(mvmd256<128>::slli<sh>(arg1), mvmd256<128>::srli<((2)-sh)>(arg2));
3246}
3247
3248//The total number of operations is 82
3249template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<256>::dslli(bitblock256_t arg1, bitblock256_t arg2)
3250{
3251        return simd_or(mvmd256<256>::slli<sh>(arg1), mvmd256<256>::srli<((1)-sh)>(arg2));
3252}
3253
3254//The total number of operations is 13
3255template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
3256{
3257        return simd256<1>::ifh(simd256<(8)>::himask(), mvmd256<1>::fill4(val1, val2, val3, val4), mvmd256<1>::fill4(val5, val6, val7, val8));
3258}
3259
3260//The total number of operations is 13
3261template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
3262{
3263        return simd256<1>::ifh(simd256<(16)>::himask(), mvmd256<2>::fill4(val1, val2, val3, val4), mvmd256<2>::fill4(val5, val6, val7, val8));
3264}
3265
3266//The total number of operations is 7
3267template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<4>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
3268{
3269        return simd_or(mvmd256<(8)>::fill8((val1<<4), (val3<<4), (val5<<4), (val7<<4), (val1<<4), (val3<<4), (val5<<4), (val7<<4)), mvmd256<(8)>::fill8((val2&(15)), (val4&(15)), (val6&(15)), (val8&(15)), (val2&(15)), (val4&(15)), (val6&(15)), (val8&(15))));
3270}
3271
3272//The total number of operations is 3
3273template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<8>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
3274{
3275        return simd_or(mvmd256<(16)>::fill8((val1<<8), (val3<<8), (val5<<8), (val7<<8), (val1<<8), (val3<<8), (val5<<8), (val7<<8)), mvmd256<(16)>::fill8((val2&(255)), (val4&(255)), (val6&(255)), (val8&(255)), (val2&(255)), (val4&(255)), (val6&(255)), (val8&(255))));
3276}
3277
3278//The total number of operations is 1
3279template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<16>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
3280{
3281        return (bitblock256_t)_mm256_set_epi16((int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4), (int32_t)(val5), (int32_t)(val6), (int32_t)(val7), (int32_t)(val8), (int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4), (int32_t)(val5), (int32_t)(val6), (int32_t)(val7), (int32_t)(val8));
3282}
3283
3284//The total number of operations is 5
3285template <> IDISA_ALWAYS_INLINE bitblock256_t mvmd256<32>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
3286{
3287        return simd256<1>::ifh(simd256<(256)>::himask(), mvmd256<32>::fill4(val1, val2, val3, val4), mvmd256<32>::fill4(val5, val6, val7, val8));
3288}
3289
3290//The total number of operations is 209
3291IDISA_ALWAYS_INLINE uint64_t bitblock256::popcount(bitblock256_t arg1)
3292{
3293        return mvmd256<64>::extract<0>(simd256<256>::popcount(arg1));
3294}
3295
3296//The total number of operations is 2
3297IDISA_ALWAYS_INLINE bool bitblock256::all(bitblock256_t arg1)
3298{
3299        return _mm256_testz_si256(((__m256i)simd_not(arg1)), ((__m256i)simd256<8>::constant<-1>())) == 1;
3300}
3301
3302//The total number of operations is 1
3303IDISA_ALWAYS_INLINE bool bitblock256::any(bitblock256_t arg1)
3304{
3305        return _mm256_testz_si256(((__m256i)arg1), ((__m256i)arg1)) == 0;
3306}
3307
3308#endif
Note: See TracBrowser for help on using the repository browser.