Changeset 2324


Ignore:
Timestamp:
Aug 24, 2012, 12:29:55 PM (7 years ago)
Author:
cameron
Message:

Library with field-width based integer types

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/idisa_cpp/idisa_sse2.cpp

    r2275 r2324  
    1414
    1515typedef __m128i bitblock128_t;
     16
     17template <uint32_t fw> struct FieldType {
     18   typedef int T;  //default for FieldType::T is int
     19};
     20
     21template <> struct FieldType<1> {typedef uint8_t T;};
     22template <> struct FieldType<2> {typedef uint8_t T;};
     23template <> struct FieldType<4> {typedef uint8_t T;};
     24template <> struct FieldType<8> {typedef uint8_t T;};
     25template <> struct FieldType<16> {typedef uint16_t T;};
     26template <> struct FieldType<32> {typedef uint32_t T;};
     27template <> struct FieldType<64> {typedef uint64_t T;};
     28template <> struct FieldType<128> {typedef uint64_t T;};
     29
     30
     31
    1632template <uint32_t fw>
    1733class simd128
     
    2339        static IDISA_ALWAYS_INLINE bitblock128_t umult(bitblock128_t arg1, bitblock128_t arg2);
    2440        static IDISA_ALWAYS_INLINE bitblock128_t ult(bitblock128_t arg1, bitblock128_t arg2);
    25         template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1);
     41        template <uint8_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1);
    2642        static IDISA_ALWAYS_INLINE bitblock128_t ctz(bitblock128_t arg1);
    2743        static IDISA_ALWAYS_INLINE bitblock128_t sll(bitblock128_t arg1, bitblock128_t shift_mask);
     
    3046        static IDISA_ALWAYS_INLINE bitblock128_t neg(bitblock128_t arg1);
    3147        static IDISA_ALWAYS_INLINE bitblock128_t himask();
    32         template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1);
     48        template <uint8_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1);
    3349        static IDISA_ALWAYS_INLINE bitblock128_t ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3);
    3450        static IDISA_ALWAYS_INLINE bitblock128_t sub(bitblock128_t arg1, bitblock128_t arg2);
     
    3753        static IDISA_ALWAYS_INLINE bitblock128_t lomask();
    3854        static IDISA_ALWAYS_INLINE bitblock128_t umin(bitblock128_t arg1, bitblock128_t arg2);
    39         template <uint64_t val> static IDISA_ALWAYS_INLINE bitblock128_t constant();
     55        template <typename FieldType<fw>::T val> static IDISA_ALWAYS_INLINE bitblock128_t constant();
    4056        static IDISA_ALWAYS_INLINE bitblock128_t min(bitblock128_t arg1, bitblock128_t arg2);
    4157        static IDISA_ALWAYS_INLINE bitblock128_t umax(bitblock128_t arg1, bitblock128_t arg2);
    4258        static IDISA_ALWAYS_INLINE bitblock128_t abs(bitblock128_t arg1);
    4359        static IDISA_ALWAYS_INLINE bitblock128_t xor_hl(bitblock128_t arg1);
    44         template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srai(bitblock128_t arg1);
     60        template <uint8_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srai(bitblock128_t arg1);
    4561        static IDISA_ALWAYS_INLINE bitblock128_t lt(bitblock128_t arg1, bitblock128_t arg2);
    4662        static IDISA_ALWAYS_INLINE bitblock128_t add(bitblock128_t arg1, bitblock128_t arg2);
     
    5672        static IDISA_ALWAYS_INLINE bitblock128_t packss(bitblock128_t arg1, bitblock128_t arg2);
    5773        static IDISA_ALWAYS_INLINE bitblock128_t packh(bitblock128_t arg1, bitblock128_t arg2);
    58         static IDISA_ALWAYS_INLINE uint64_t signmask(bitblock128_t arg1);
     74        static IDISA_ALWAYS_INLINE typename FieldType<128/fw>::T signmask(bitblock128_t arg1);
    5975        static IDISA_ALWAYS_INLINE bitblock128_t packl(bitblock128_t arg1, bitblock128_t arg2);
    6076        static IDISA_ALWAYS_INLINE bitblock128_t min_hl(bitblock128_t arg1, bitblock128_t arg2);
     
    7894{
    7995public:
    80         template <uint64_t msk> static IDISA_ALWAYS_INLINE bitblock128_t shufflei(bitblock128_t arg1);
    81         template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t dsrli(bitblock128_t arg1, bitblock128_t arg2);
    82         static IDISA_ALWAYS_INLINE bitblock128_t fill(uint64_t val1);
    83         template <uint64_t pos> static IDISA_ALWAYS_INLINE uint64_t extract(bitblock128_t arg1);
    84         template <uint64_t pos> static IDISA_ALWAYS_INLINE bitblock128_t splat(bitblock128_t arg1);
    85         template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1);
    86         static IDISA_ALWAYS_INLINE bitblock128_t fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    87         template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1);
    88         static IDISA_ALWAYS_INLINE bitblock128_t fill2(uint64_t val1, uint64_t val2);
    89         template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t dslli(bitblock128_t arg1, bitblock128_t arg2);
    90         static IDISA_ALWAYS_INLINE bitblock128_t fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
    91         static IDISA_ALWAYS_INLINE bitblock128_t fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
     96        template <uint8_t msk> static IDISA_ALWAYS_INLINE bitblock128_t shufflei(bitblock128_t arg1);
     97        template <uint8_t sh> static IDISA_ALWAYS_INLINE bitblock128_t dsrli(bitblock128_t arg1, bitblock128_t arg2);
     98        static IDISA_ALWAYS_INLINE bitblock128_t fill(typename FieldType<fw>::T val1);
     99        template <uint8_t pos> static IDISA_ALWAYS_INLINE typename FieldType<fw>::T extract(bitblock128_t arg1);
     100        template <uint8_t pos> static IDISA_ALWAYS_INLINE bitblock128_t splat(bitblock128_t arg1);
     101        template <uint8_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1);
     102        static IDISA_ALWAYS_INLINE bitblock128_t fill4(typename FieldType<fw>::T val1, typename FieldType<fw>::T val2, typename FieldType<fw>::T val3, typename FieldType<fw>::T val4);
     103        template <uint8_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1);
     104        static IDISA_ALWAYS_INLINE bitblock128_t fill2(typename FieldType<fw>::T val1, typename FieldType<fw>::T val2);
     105        template <uint8_t sh> static IDISA_ALWAYS_INLINE bitblock128_t dslli(bitblock128_t arg1, bitblock128_t arg2);
     106        static IDISA_ALWAYS_INLINE bitblock128_t fill8(typename FieldType<fw>::T val1, typename FieldType<fw>::T val2, typename FieldType<fw>::T val3, typename FieldType<fw>::T val4, typename FieldType<fw>::T val5, typename FieldType<fw>::T val6, typename FieldType<fw>::T val7, typename FieldType<fw>::T val8);
     107        static IDISA_ALWAYS_INLINE bitblock128_t fill16(typename FieldType<fw>::T val1, typename FieldType<fw>::T val2, typename FieldType<fw>::T val3, typename FieldType<fw>::T val4, typename FieldType<fw>::T val5, typename FieldType<fw>::T val6, typename FieldType<fw>::T val7, typename FieldType<fw>::T val8, typename FieldType<fw>::T val9, typename FieldType<fw>::T val10, typename FieldType<fw>::T val11, typename FieldType<fw>::T val12, typename FieldType<fw>::T val13, typename FieldType<fw>::T val14, typename FieldType<fw>::T val15, typename FieldType<fw>::T val16);
    92108};
    93109
     
    97113        static IDISA_ALWAYS_INLINE bitblock128_t sll(bitblock128_t arg1, bitblock128_t arg2);
    98114        static IDISA_ALWAYS_INLINE bitblock128_t load_unaligned(const bitblock128_t* arg1);
    99         template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1);
     115        template <uint8_t sh> static IDISA_ALWAYS_INLINE bitblock128_t srli(bitblock128_t arg1);
    100116        static IDISA_ALWAYS_INLINE bitblock128_t srl(bitblock128_t arg1, bitblock128_t arg2);
    101117        static IDISA_ALWAYS_INLINE void store_aligned(bitblock128_t arg1, bitblock128_t* arg2);
    102118        static IDISA_ALWAYS_INLINE bool all(bitblock128_t arg1);
    103119        static IDISA_ALWAYS_INLINE bool any(bitblock128_t arg1);
    104         static IDISA_ALWAYS_INLINE uint64_t popcount(bitblock128_t arg1);
    105         template <uint64_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1);
     120        static IDISA_ALWAYS_INLINE uint8_t popcount(bitblock128_t arg1);
     121        template <uint8_t sh> static IDISA_ALWAYS_INLINE bitblock128_t slli(bitblock128_t arg1);
    106122        static IDISA_ALWAYS_INLINE bitblock128_t load_aligned(const bitblock128_t* arg1);
    107123        static IDISA_ALWAYS_INLINE void store_unaligned(bitblock128_t arg1, bitblock128_t* arg2);
     
    162178template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::lt(bitblock128_t arg1, bitblock128_t arg2);
    163179template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::lt(bitblock128_t arg1, bitblock128_t arg2);
    164 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srli(bitblock128_t arg1);
    165 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srli(bitblock128_t arg1);
    166 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srli(bitblock128_t arg1);
    167 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srli(bitblock128_t arg1);
    168 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srli(bitblock128_t arg1);
    169 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srli(bitblock128_t arg1);
    170 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1);
     180template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srli(bitblock128_t arg1);
     181template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srli(bitblock128_t arg1);
     182template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srli(bitblock128_t arg1);
     183template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srli(bitblock128_t arg1);
     184template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srli(bitblock128_t arg1);
     185template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srli(bitblock128_t arg1);
     186template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1);
    171187template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ctz(bitblock128_t arg1);
    172188template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ctz(bitblock128_t arg1);
     
    209225template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::neg(bitblock128_t arg1);
    210226template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::neg(bitblock128_t arg1);
    211 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::slli(bitblock128_t arg1);
    212 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::slli(bitblock128_t arg1);
    213 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::slli(bitblock128_t arg1);
    214 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::slli(bitblock128_t arg1);
    215 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::slli(bitblock128_t arg1);
    216 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::slli(bitblock128_t arg1);
    217 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1);
     227template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::slli(bitblock128_t arg1);
     228template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::slli(bitblock128_t arg1);
     229template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::slli(bitblock128_t arg1);
     230template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::slli(bitblock128_t arg1);
     231template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::slli(bitblock128_t arg1);
     232template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::slli(bitblock128_t arg1);
     233template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1);
    218234template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3);
    219235template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::ifh(bitblock128_t arg1, bitblock128_t arg2, bitblock128_t arg3);
     
    241257template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srl(bitblock128_t arg1, bitblock128_t shift_mask);
    242258template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srl(bitblock128_t arg1, bitblock128_t shift_mask);
    243 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant();
    244 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant();
    245 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::constant();
    246 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::constant();
    247 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::constant();
    248 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::constant();
    249 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::constant();
    250 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::constant();
     259template <> template <typename FieldType<1>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant();
     260template <> template <typename FieldType<2>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant();
     261template <> template <typename FieldType<4>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::constant();
     262template <> template <typename FieldType<8>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::constant();
     263template <> template <typename FieldType<16>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::constant();
     264template <> template <typename FieldType<32>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::constant();
     265template <> template <typename FieldType<64>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::constant();
     266template <> template <typename FieldType<128>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::constant();
    251267template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::min(bitblock128_t arg1, bitblock128_t arg2);
    252268template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::min(bitblock128_t arg1, bitblock128_t arg2);
     
    287303template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::eq(bitblock128_t arg1, bitblock128_t arg2);
    288304template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::eq(bitblock128_t arg1, bitblock128_t arg2);
    289 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1);
    290 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1);
    291 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1);
    292 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1);
    293 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1);
    294 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1);
    295 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1);
     305template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1);
     306template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1);
     307template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1);
     308template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1);
     309template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1);
     310template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1);
     311template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1);
    296312template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::himask();
    297313template <> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::himask();
     
    338354template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<64>::packss(bitblock128_t arg1, bitblock128_t arg2);
    339355template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<128>::packss(bitblock128_t arg1, bitblock128_t arg2);
    340 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<4>::signmask(bitblock128_t arg1);
    341 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<8>::signmask(bitblock128_t arg1);
    342 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<16>::signmask(bitblock128_t arg1);
    343 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1);
    344 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1);
    345 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1);
     356template <> IDISA_ALWAYS_INLINE typename FieldType<32>::T hsimd128<4>::signmask(bitblock128_t arg1);
     357template <> IDISA_ALWAYS_INLINE typename FieldType<128/8>::T hsimd128<8>::signmask(bitblock128_t arg1);
     358template <> IDISA_ALWAYS_INLINE typename FieldType<128/16>::T hsimd128<16>::signmask(bitblock128_t arg1);
     359template <> IDISA_ALWAYS_INLINE typename FieldType<128/32>::T hsimd128<32>::signmask(bitblock128_t arg1);
     360template <> IDISA_ALWAYS_INLINE typename FieldType<128/64>::T hsimd128<64>::signmask(bitblock128_t arg1);
     361template <> IDISA_ALWAYS_INLINE typename FieldType<128/128>::T hsimd128<128>::signmask(bitblock128_t arg1);
    346362template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<2>::packl(bitblock128_t arg1, bitblock128_t arg2);
    347363template <> IDISA_ALWAYS_INLINE bitblock128_t hsimd128<4>::packl(bitblock128_t arg1, bitblock128_t arg2);
     
    414430template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<32>::signextendl(bitblock128_t arg1);
    415431template <> IDISA_ALWAYS_INLINE bitblock128_t esimd128<64>::signextendl(bitblock128_t arg1);
    416 template <> template <uint64_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::shufflei(bitblock128_t arg1);
    417 template <> template <uint64_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::shufflei(bitblock128_t arg1);
    418 template <> template <uint64_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::shufflei(bitblock128_t arg1);
    419 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
    420 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
    421 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
    422 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
    423 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
    424 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
    425 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
    426 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill(uint64_t val1);
    427 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill(uint64_t val1);
    428 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill(uint64_t val1);
    429 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill(uint64_t val1);
    430 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill(uint64_t val1);
    431 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill(uint64_t val1);
    432 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::fill(uint64_t val1);
    433 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::fill(uint64_t val1);
    434 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<1>::extract(bitblock128_t arg1);
    435 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<2>::extract(bitblock128_t arg1);
    436 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<4>::extract(bitblock128_t arg1);
    437 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<8>::extract(bitblock128_t arg1);
    438 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<16>::extract(bitblock128_t arg1);
    439 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<32>::extract(bitblock128_t arg1);
    440 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<64>::extract(bitblock128_t arg1);
    441 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::splat(bitblock128_t arg1);
    442 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::splat(bitblock128_t arg1);
    443 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::splat(bitblock128_t arg1);
    444 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::splat(bitblock128_t arg1);
    445 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::splat(bitblock128_t arg1);
    446 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::splat(bitblock128_t arg1);
    447 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::splat(bitblock128_t arg1);
    448 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::splat(bitblock128_t arg1);
    449 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    450 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    451 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    452 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16);
    453 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    454 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    455 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    456 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    457 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    458 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4);
    459 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::srli(bitblock128_t arg1);
    460 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1);
    461 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::srli(bitblock128_t arg1);
    462 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::srli(bitblock128_t arg1);
    463 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1);
    464 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::srli(bitblock128_t arg1);
    465 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1);
    466 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill2(uint64_t val1, uint64_t val2);
    467 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill2(uint64_t val1, uint64_t val2);
    468 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill2(uint64_t val1, uint64_t val2);
    469 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill2(uint64_t val1, uint64_t val2);
    470 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill2(uint64_t val1, uint64_t val2);
    471 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill2(uint64_t val1, uint64_t val2);
    472 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::fill2(uint64_t val1, uint64_t val2);
    473 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::dslli(bitblock128_t arg1, bitblock128_t arg2);
    474 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::dslli(bitblock128_t arg1, bitblock128_t arg2);
    475 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::dslli(bitblock128_t arg1, bitblock128_t arg2);
    476 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::dslli(bitblock128_t arg1, bitblock128_t arg2);
    477 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::dslli(bitblock128_t arg1, bitblock128_t arg2);
    478 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dslli(bitblock128_t arg1, bitblock128_t arg2);
    479 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dslli(bitblock128_t arg1, bitblock128_t arg2);
    480 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1);
    481 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1);
    482 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1);
    483 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1);
    484 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1);
    485 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1);
    486 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1);
    487 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
    488 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
    489 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
    490 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
    491 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8);
     432template <> template <uint8_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::shufflei(bitblock128_t arg1);
     433template <> template <uint8_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::shufflei(bitblock128_t arg1);
     434template <> template <uint8_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::shufflei(bitblock128_t arg1);
     435template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
     436template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
     437template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
     438template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
     439template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
     440template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
     441template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dsrli(bitblock128_t arg1, bitblock128_t arg2);
     442template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill(typename FieldType<1>::T val1);
     443template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill(typename FieldType<2>::T val1);
     444template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill(typename FieldType<4>::T val1);
     445template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill(typename FieldType<8>::T val1);
     446template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill(typename FieldType<16>::T val1);
     447template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill(typename FieldType<32>::T val1);
     448template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::fill(typename FieldType<64>::T val1);
     449template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::fill(typename FieldType<128>::T val1);
     450template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<1>::T mvmd128<1>::extract(bitblock128_t arg1);
     451template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<2>::T mvmd128<2>::extract(bitblock128_t arg1);
     452template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<4>::T mvmd128<4>::extract(bitblock128_t arg1);
     453template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<8>::T mvmd128<8>::extract(bitblock128_t arg1);
     454template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<16>::T mvmd128<16>::extract(bitblock128_t arg1);
     455template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<32>::T mvmd128<32>::extract(bitblock128_t arg1);
     456template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<64>::T mvmd128<64>::extract(bitblock128_t arg1);
     457template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::splat(bitblock128_t arg1);
     458template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::splat(bitblock128_t arg1);
     459template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::splat(bitblock128_t arg1);
     460template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::splat(bitblock128_t arg1);
     461template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::splat(bitblock128_t arg1);
     462template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::splat(bitblock128_t arg1);
     463template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::splat(bitblock128_t arg1);
     464template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::splat(bitblock128_t arg1);
     465template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill16(typename FieldType<1>::T val1, typename FieldType<1>::T val2, typename FieldType<1>::T val3, typename FieldType<1>::T val4, typename FieldType<1>::T val5, typename FieldType<1>::T val6, typename FieldType<1>::T val7, typename FieldType<1>::T val8, typename FieldType<1>::T val9, typename FieldType<1>::T val10, typename FieldType<1>::T val11, typename FieldType<1>::T val12, typename FieldType<1>::T val13, typename FieldType<1>::T val14, typename FieldType<1>::T val15, typename FieldType<1>::T val16);
     466template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill16(typename FieldType<2>::T val1, typename FieldType<2>::T val2, typename FieldType<2>::T val3, typename FieldType<2>::T val4, typename FieldType<2>::T val5, typename FieldType<2>::T val6, typename FieldType<2>::T val7, typename FieldType<2>::T val8, typename FieldType<2>::T val9, typename FieldType<2>::T val10, typename FieldType<2>::T val11, typename FieldType<2>::T val12, typename FieldType<2>::T val13, typename FieldType<2>::T val14, typename FieldType<2>::T val15, typename FieldType<2>::T val16);
     467template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill16(typename FieldType<4>::T val1, typename FieldType<4>::T val2, typename FieldType<4>::T val3, typename FieldType<4>::T val4, typename FieldType<4>::T val5, typename FieldType<4>::T val6, typename FieldType<4>::T val7, typename FieldType<4>::T val8, typename FieldType<4>::T val9, typename FieldType<4>::T val10, typename FieldType<4>::T val11, typename FieldType<4>::T val12, typename FieldType<4>::T val13, typename FieldType<4>::T val14, typename FieldType<4>::T val15, typename FieldType<4>::T val16);
     468template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill16(typename FieldType<8>::T val1, typename FieldType<8>::T val2, typename FieldType<8>::T val3, typename FieldType<8>::T val4, typename FieldType<8>::T val5, typename FieldType<8>::T val6, typename FieldType<8>::T val7, typename FieldType<8>::T val8, typename FieldType<8>::T val9, typename FieldType<8>::T val10, typename FieldType<8>::T val11, typename FieldType<8>::T val12, typename FieldType<8>::T val13, typename FieldType<8>::T val14, typename FieldType<8>::T val15, typename FieldType<8>::T val16);
     469template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(typename FieldType<1>::T val1, typename FieldType<1>::T val2, typename FieldType<1>::T val3, typename FieldType<1>::T val4);
     470template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill4(typename FieldType<2>::T val1, typename FieldType<2>::T val2, typename FieldType<2>::T val3, typename FieldType<2>::T val4);
     471template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill4(typename FieldType<4>::T val1, typename FieldType<4>::T val2, typename FieldType<4>::T val3, typename FieldType<4>::T val4);
     472template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill4(typename FieldType<8>::T val1, typename FieldType<8>::T val2, typename FieldType<8>::T val3, typename FieldType<8>::T val4);
     473template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill4(typename FieldType<16>::T val1, typename FieldType<16>::T val2, typename FieldType<16>::T val3, typename FieldType<16>::T val4);
     474template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill4(typename FieldType<32>::T val1, typename FieldType<32>::T val2, typename FieldType<32>::T val3, typename FieldType<32>::T val4);
     475template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::srli(bitblock128_t arg1);
     476template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1);
     477template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::srli(bitblock128_t arg1);
     478template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::srli(bitblock128_t arg1);
     479template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1);
     480template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::srli(bitblock128_t arg1);
     481template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1);
     482template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill2(typename FieldType<1>::T val1, typename FieldType<1>::T val2);
     483template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill2(typename FieldType<2>::T val1, typename FieldType<2>::T val2);
     484template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill2(typename FieldType<4>::T val1, typename FieldType<4>::T val2);
     485template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill2(typename FieldType<8>::T val1, typename FieldType<8>::T val2);
     486template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill2(typename FieldType<16>::T val1, typename FieldType<16>::T val2);
     487template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill2(typename FieldType<32>::T val1, typename FieldType<32>::T val2);
     488template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::fill2(typename FieldType<64>::T val1, typename FieldType<64>::T val2);
     489template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::dslli(bitblock128_t arg1, bitblock128_t arg2);
     490template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::dslli(bitblock128_t arg1, bitblock128_t arg2);
     491template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::dslli(bitblock128_t arg1, bitblock128_t arg2);
     492template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::dslli(bitblock128_t arg1, bitblock128_t arg2);
     493template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::dslli(bitblock128_t arg1, bitblock128_t arg2);
     494template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dslli(bitblock128_t arg1, bitblock128_t arg2);
     495template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dslli(bitblock128_t arg1, bitblock128_t arg2);
     496template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1);
     497template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1);
     498template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1);
     499template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1);
     500template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1);
     501template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1);
     502template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1);
     503template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(typename FieldType<1>::T val1, typename FieldType<1>::T val2, typename FieldType<1>::T val3, typename FieldType<1>::T val4, typename FieldType<1>::T val5, typename FieldType<1>::T val6, typename FieldType<1>::T val7, typename FieldType<1>::T val8);
     504template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill8(typename FieldType<2>::T val1, typename FieldType<2>::T val2, typename FieldType<2>::T val3, typename FieldType<2>::T val4, typename FieldType<2>::T val5, typename FieldType<2>::T val6, typename FieldType<2>::T val7, typename FieldType<2>::T val8);
     505template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill8(typename FieldType<4>::T val1, typename FieldType<4>::T val2, typename FieldType<4>::T val3, typename FieldType<4>::T val4, typename FieldType<4>::T val5, typename FieldType<4>::T val6, typename FieldType<4>::T val7, typename FieldType<4>::T val8);
     506template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill8(typename FieldType<8>::T val1, typename FieldType<8>::T val2, typename FieldType<8>::T val3, typename FieldType<8>::T val4, typename FieldType<8>::T val5, typename FieldType<8>::T val6, typename FieldType<8>::T val7, typename FieldType<8>::T val8);
     507template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill8(typename FieldType<16>::T val1, typename FieldType<16>::T val2, typename FieldType<16>::T val3, typename FieldType<16>::T val4, typename FieldType<16>::T val5, typename FieldType<16>::T val6, typename FieldType<16>::T val7, typename FieldType<16>::T val8);
    492508
    493509//Implementation Part
     
    900916
    901917//The total number of operations is 2.0
    902 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srli(bitblock128_t arg1)
     918template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srli(bitblock128_t arg1)
    903919{
    904920        return simd_and(simd128<32>::srli<sh>(arg1), simd128<2>::constant<((3)>>sh)>());
     
    906922
    907923//The total number of operations is 2.0
    908 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srli(bitblock128_t arg1)
     924template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srli(bitblock128_t arg1)
    909925{
    910926        return simd_and(simd128<32>::srli<sh>(arg1), simd128<4>::constant<((15)>>sh)>());
     
    912928
    913929//The total number of operations is 2.0
    914 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srli(bitblock128_t arg1)
     930template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srli(bitblock128_t arg1)
    915931{
    916932        return simd_and(simd128<32>::srli<sh>(arg1), simd128<8>::constant<((255)>>sh)>());
     
    918934
    919935//The total number of operations is 1.0
    920 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srli(bitblock128_t arg1)
     936template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srli(bitblock128_t arg1)
    921937{
    922938        return _mm_srli_epi16(arg1, (int32_t)(sh));
     
    924940
    925941//The total number of operations is 1.0
    926 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srli(bitblock128_t arg1)
     942template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srli(bitblock128_t arg1)
    927943{
    928944        return _mm_srli_epi32(arg1, (int32_t)(sh));
     
    930946
    931947//The total number of operations is 1.0
    932 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srli(bitblock128_t arg1)
     948template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srli(bitblock128_t arg1)
    933949{
    934950        return _mm_srli_epi64(arg1, (int32_t)(sh));
     
    936952
    937953//The total number of operations is 2.33333333333
    938 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1)
     954template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srli(bitblock128_t arg1)
    939955{
    940956        return (((sh%8) == 0) ? _mm_srli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::srli<(sh&63)>(_mm_srli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::srli<sh>(arg1), _mm_srli_si128(simd128<64>::slli<((128-sh)&63)>(arg1), (int32_t)(8)))));
     
    11961212
    11971213//The total number of operations is 2.0
    1198 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::slli(bitblock128_t arg1)
     1214template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::slli(bitblock128_t arg1)
    11991215{
    12001216        return simd_and(simd128<32>::slli<sh>(arg1), simd128<2>::constant<(((3)<<sh)&(3))>());
     
    12021218
    12031219//The total number of operations is 2.0
    1204 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::slli(bitblock128_t arg1)
     1220template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::slli(bitblock128_t arg1)
    12051221{
    12061222        return simd_and(simd128<32>::slli<sh>(arg1), simd128<4>::constant<(((15)<<sh)&(15))>());
     
    12081224
    12091225//The total number of operations is 2.0
    1210 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::slli(bitblock128_t arg1)
     1226template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::slli(bitblock128_t arg1)
    12111227{
    12121228        return simd_and(simd128<32>::slli<sh>(arg1), simd128<8>::constant<(((255)<<sh)&(255))>());
     
    12141230
    12151231//The total number of operations is 1.0
    1216 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::slli(bitblock128_t arg1)
     1232template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::slli(bitblock128_t arg1)
    12171233{
    12181234        return _mm_slli_epi16(arg1, (int32_t)(sh));
     
    12201236
    12211237//The total number of operations is 1.0
    1222 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::slli(bitblock128_t arg1)
     1238template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::slli(bitblock128_t arg1)
    12231239{
    12241240        return _mm_slli_epi32(arg1, (int32_t)(sh));
     
    12261242
    12271243//The total number of operations is 1.0
    1228 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::slli(bitblock128_t arg1)
     1244template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::slli(bitblock128_t arg1)
    12291245{
    12301246        return _mm_slli_epi64(arg1, (int32_t)(sh));
     
    12321248
    12331249//The total number of operations is 2.33333333333
    1234 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1)
     1250template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::slli(bitblock128_t arg1)
    12351251{
    12361252        return (((sh%8) == 0) ? _mm_slli_si128(arg1, (int32_t)((sh/8))) : ((sh >= 64) ? simd128<64>::slli<(sh&63)>(_mm_slli_si128(arg1, (int32_t)(8))) : simd_or(simd128<64>::slli<sh>(arg1), _mm_slli_si128(simd128<64>::srli<((128-sh)&63)>(arg1), (int32_t)(8)))));
     
    13931409
    13941410//The total number of operations is 0
    1395 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant()
     1411template <> template <typename FieldType<1>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<1>::constant()
    13961412{
    13971413        return simd128<32>::constant<(-1*val)>();
     
    13991415
    14001416//The total number of operations is 0
    1401 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant()
     1417template <> template <typename FieldType<2>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::constant()
    14021418{
    14031419        return simd128<(4)>::constant<((val<<2)|(val&(3)))>();
     
    14051421
    14061422//The total number of operations is 0
    1407 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::constant()
     1423template <> template <typename FieldType<4>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::constant()
    14081424{
    14091425        return simd128<(8)>::constant<((val<<4)|(val&(15)))>();
     
    14111427
    14121428//The total number of operations is 0
    1413 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::constant()
     1429template <> template <typename FieldType<8>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::constant()
    14141430{
    14151431        return _mm_set1_epi8((int32_t)(val));
     
    14171433
    14181434//The total number of operations is 0
    1419 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::constant()
     1435template <> template <typename FieldType<16>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::constant()
    14201436{
    14211437        return _mm_set1_epi16((int32_t)(val));
     
    14231439
    14241440//The total number of operations is 0
    1425 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::constant()
     1441template <> template <typename FieldType<32>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::constant()
    14261442{
    14271443        return _mm_set1_epi32((int32_t)(val));
     
    14291445
    14301446//The total number of operations is 0
    1431 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::constant()
     1447template <> template <typename FieldType<64>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::constant()
    14321448{
    14331449        return _mm_set_epi32((int32_t)((val>>32)), (int32_t)(val), (int32_t)((val>>32)), (int32_t)(val));
     
    14351451
    14361452//The total number of operations is 0
    1437 template <> template <uint64_t val> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::constant()
    1438 {
    1439         return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)((val>>32)), (int32_t)(val));
     1453template <> template <typename FieldType<128>::T val> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::constant()
     1454{
     1455        return _mm_set_epi32(0, 0, (int32_t)((val>>32)), (int32_t)(val));
    14401456}
    14411457
     
    16951711
    16961712//The total number of operations is 4.0
    1697 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)
     1713template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<2>::srai(bitblock128_t arg1)
    16981714{
    16991715        return ((sh == 0) ? arg1 : simd_or(simd_and(simd128<2>::himask(), arg1), simd128<2>::srli<1>(arg1)));
     
    17011717
    17021718//The total number of operations is 10.0
    1703 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)
     1719template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<4>::srai(bitblock128_t arg1)
    17041720{
    17051721        bitblock128_t tmp = simd128<4>::srli<((sh >= 4) ? (3) : ((sh < 0) ? 0 : sh))>(arg1);
     
    17081724
    17091725//The total number of operations is 5.0
    1710 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
     1726template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<8>::srai(bitblock128_t arg1)
    17111727{
    17121728        bitblock128_t tmp = simd128<8>::srli<((sh >= 8) ? (7) : ((sh < 0) ? 0 : sh))>(arg1);
     
    17151731
    17161732//The total number of operations is 1.0
    1717 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
     1733template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<16>::srai(bitblock128_t arg1)
    17181734{
    17191735        return _mm_srai_epi16(arg1, (int32_t)(sh));
     
    17211737
    17221738//The total number of operations is 1.0
    1723 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
     1739template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<32>::srai(bitblock128_t arg1)
    17241740{
    17251741        return _mm_srai_epi32(arg1, (int32_t)(sh));
     
    17271743
    17281744//The total number of operations is 4.5
    1729 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
     1745template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<64>::srai(bitblock128_t arg1)
    17301746{
    17311747        return simd_or(simd_and(simd128<64>::himask(), simd128<(32)>::srai<((sh < (32)) ? sh : (32))>(arg1)), ((sh <= (32)) ? simd128<64>::srli<sh>(arg1) : simd128<(32)>::srai<(sh-(32))>(simd128<64>::srli<(32)>(arg1))));
     
    17331749
    17341750//The total number of operations is 11.0833333333
    1735 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
     1751template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t simd128<128>::srai(bitblock128_t arg1)
    17361752{
    17371753        return simd_or(simd_and(simd128<128>::himask(), simd128<(64)>::srai<((sh < (64)) ? sh : (64))>(arg1)), ((sh <= (64)) ? simd128<128>::srli<sh>(arg1) : simd128<(64)>::srai<(sh-(64))>(simd128<128>::srli<(64)>(arg1))));
     
    20252041
    20262042//The total number of operations is 24.0
    2027 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<4>::signmask(bitblock128_t arg1)
     2043template <> IDISA_ALWAYS_INLINE typename FieldType<128/4>::T hsimd128<4>::signmask(bitblock128_t arg1)
    20282044{
    20292045        uint64_t tmpAns1 = hsimd128<(8)>::signmask(esimd128<4>::mergeh(arg1, simd128<4>::constant<0>()));
     
    20332049
    20342050//The total number of operations is 1.0
    2035 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<8>::signmask(bitblock128_t arg1)
     2051template <> IDISA_ALWAYS_INLINE typename FieldType<128/8>::T hsimd128<8>::signmask(bitblock128_t arg1)
    20362052{
    20372053        return _mm_movemask_epi8(arg1);
     
    20392055
    20402056//The total number of operations is 2.0
    2041 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<16>::signmask(bitblock128_t arg1)
     2057template <> IDISA_ALWAYS_INLINE typename FieldType<128/16>::T hsimd128<16>::signmask(bitblock128_t arg1)
    20422058{
    20432059        return hsimd128<(8)>::signmask(hsimd128<16>::packss(simd128<16>::constant<0>(), arg1));
     
    20452061
    20462062//The total number of operations is 1.0
    2047 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<32>::signmask(bitblock128_t arg1)
     2063template <> IDISA_ALWAYS_INLINE typename FieldType<128/32>::T hsimd128<32>::signmask(bitblock128_t arg1)
    20482064{
    20492065        return _mm_movemask_ps(_mm_castsi128_ps(arg1));
     
    20512067
    20522068//The total number of operations is 1.0
    2053 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<64>::signmask(bitblock128_t arg1)
     2069template <> IDISA_ALWAYS_INLINE typename FieldType<128/64>::T hsimd128<64>::signmask(bitblock128_t arg1)
    20542070{
    20552071        return _mm_movemask_pd(_mm_castsi128_pd(arg1));
     
    20572073
    20582074//The total number of operations is 6.33333333333
    2059 template <> IDISA_ALWAYS_INLINE uint64_t hsimd128<128>::signmask(bitblock128_t arg1)
     2075template <> IDISA_ALWAYS_INLINE typename FieldType<128/128>::T hsimd128<128>::signmask(bitblock128_t arg1)
    20602076{
    20612077        return hsimd128<(64)>::signmask(hsimd128<128>::packh(simd128<128>::constant<0>(), arg1));
     
    24982514
    24992515//The total number of operations is 13.6666666667
    2500 template <> template <uint64_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::shufflei(bitblock128_t arg1)
     2516template <> template <uint8_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::shufflei(bitblock128_t arg1)
    25012517{
    25022518        bitblock128_t tmphi = _mm_shufflehi_epi16(arg1, (int32_t)((shufflemask8_to_shufflemask4(msk)>>8)));
     
    25162532
    25172533//The total number of operations is 1.0
    2518 template <> template <uint64_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::shufflei(bitblock128_t arg1)
     2534template <> template <uint8_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::shufflei(bitblock128_t arg1)
    25192535{
    25202536        return _mm_shuffle_epi32(arg1, (int32_t)(msk));
     
    25222538
    25232539//The total number of operations is 1.0
    2524 template <> template <uint64_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::shufflei(bitblock128_t arg1)
     2540template <> template <uint8_t msk> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::shufflei(bitblock128_t arg1)
    25252541{
    25262542        return mvmd128<32>::shufflei<shufflemask4_from_shufflemask2(msk)>(arg1);
     
    25282544
    25292545//The total number of operations is 5.66666666667
    2530 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
     2546template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
    25312547{
    25322548        return simd_or(mvmd128<2>::srli<sh>(arg1), mvmd128<2>::slli<((64)-sh)>(arg2));
     
    25342550
    25352551//The total number of operations is 5.66666666667
    2536 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
     2552template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
    25372553{
    25382554        return simd_or(mvmd128<4>::srli<sh>(arg1), mvmd128<4>::slli<((32)-sh)>(arg2));
     
    25402556
    25412557//The total number of operations is 3.0
    2542 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
     2558template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
    25432559{
    25442560        return simd_or(mvmd128<8>::srli<sh>(arg1), mvmd128<8>::slli<((16)-sh)>(arg2));
     
    25462562
    25472563//The total number of operations is 3.0
    2548 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
     2564template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
    25492565{
    25502566        return simd_or(mvmd128<16>::srli<sh>(arg1), mvmd128<16>::slli<((8)-sh)>(arg2));
     
    25522568
    25532569//The total number of operations is 3.0
    2554 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
     2570template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
    25552571{
    25562572        return simd_or(mvmd128<32>::srli<sh>(arg1), mvmd128<32>::slli<((4)-sh)>(arg2));
     
    25582574
    25592575//The total number of operations is 3.0
    2560 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
     2576template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
    25612577{
    25622578        return simd_or(mvmd128<64>::srli<sh>(arg1), mvmd128<64>::slli<((2)-sh)>(arg2));
     
    25642580
    25652581//The total number of operations is 3.0
    2566 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
     2582template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dsrli(bitblock128_t arg1, bitblock128_t arg2)
    25672583{
    25682584        return simd_or(mvmd128<128>::srli<sh>(arg1), mvmd128<128>::slli<((1)-sh)>(arg2));
     
    25702586
    25712587//The total number of operations is 1.0
    2572 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill(uint64_t val1)
     2588template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill(typename FieldType<1>::T val1)
    25732589{
    25742590        return mvmd128<32>::fill((-1*val1));
     
    25762592
    25772593//The total number of operations is 1.0
    2578 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill(uint64_t val1)
     2594template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill(typename FieldType<2>::T val1)
    25792595{
    25802596        return mvmd128<(4)>::fill(((val1<<2)|val1));
     
    25822598
    25832599//The total number of operations is 1.0
    2584 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill(uint64_t val1)
     2600template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill(typename FieldType<4>::T val1)
    25852601{
    25862602        return mvmd128<(8)>::fill(((val1<<4)|val1));
     
    25882604
    25892605//The total number of operations is 1.0
    2590 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill(uint64_t val1)
     2606template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill(typename FieldType<8>::T val1)
    25912607{
    25922608        return _mm_set1_epi8((int32_t)(val1));
     
    25942610
    25952611//The total number of operations is 1.0
    2596 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill(uint64_t val1)
     2612template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill(typename FieldType<16>::T val1)
    25972613{
    25982614        return _mm_set1_epi16((int32_t)(val1));
     
    26002616
    26012617//The total number of operations is 1.0
    2602 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill(uint64_t val1)
     2618template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill(typename FieldType<32>::T val1)
    26032619{
    26042620        return _mm_set1_epi32((int32_t)(val1));
     
    26062622
    26072623//The total number of operations is 1.0
    2608 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::fill(uint64_t val1)
     2624template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::fill(typename FieldType<64>::T val1)
    26092625{
    26102626        return _mm_set_epi32((int32_t)((val1>>32)), (int32_t)(val1), (int32_t)((val1>>32)), (int32_t)(val1));
     
    26122628
    26132629//The total number of operations is 1.0
    2614 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::fill(uint64_t val1)
     2630template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::fill(typename FieldType<128>::T val1)
    26152631{
    26162632        return _mm_set_epi32((int32_t)(0), (int32_t)(0), (int32_t)((val1>>32)), (int32_t)(val1));
     
    26182634
    26192635//The total number of operations is 1.0
    2620 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<1>::extract(bitblock128_t arg1)
     2636template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<1>::T mvmd128<1>::extract(bitblock128_t arg1)
    26212637{
    26222638        return (((pos%2) == 0) ? (mvmd128<(2)>::extract<(pos/2)>(arg1)&(1)) : (mvmd128<(2)>::extract<(pos/2)>(arg1)>>1));
     
    26242640
    26252641//The total number of operations is 1.0
    2626 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<2>::extract(bitblock128_t arg1)
     2642template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<2>::T mvmd128<2>::extract(bitblock128_t arg1)
    26272643{
    26282644        return (((pos%2) == 0) ? (mvmd128<(4)>::extract<(pos/2)>(arg1)&(3)) : (mvmd128<(4)>::extract<(pos/2)>(arg1)>>2));
     
    26302646
    26312647//The total number of operations is 1.0
    2632 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<4>::extract(bitblock128_t arg1)
     2648template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<4>::T mvmd128<4>::extract(bitblock128_t arg1)
    26332649{
    26342650        return (((pos%2) == 0) ? (mvmd128<(8)>::extract<(pos/2)>(arg1)&(15)) : (mvmd128<(8)>::extract<(pos/2)>(arg1)>>4));
     
    26362652
    26372653//The total number of operations is 1.0
    2638 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<8>::extract(bitblock128_t arg1)
     2654template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<8>::T mvmd128<8>::extract(bitblock128_t arg1)
    26392655{
    26402656        return (((pos%2) == 0) ? (mvmd128<(16)>::extract<(pos/2)>(arg1)&(255)) : (mvmd128<(16)>::extract<(pos/2)>(arg1)>>8));
     
    26422658
    26432659//The total number of operations is 1.0
    2644 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<16>::extract(bitblock128_t arg1)
     2660template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<16>::T mvmd128<16>::extract(bitblock128_t arg1)
    26452661{
    26462662        return (65535&_mm_extract_epi16(arg1, (int32_t)(pos)));
     
    26482664
    26492665//The total number of operations is 2.0
    2650 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<32>::extract(bitblock128_t arg1)
     2666template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<32>::T mvmd128<32>::extract(bitblock128_t arg1)
    26512667{
    26522668        return ((mvmd128<(16)>::extract<((2*pos)+1)>(arg1)<<(16))|mvmd128<(16)>::extract<(2*pos)>(arg1));
     
    26542670
    26552671//The total number of operations is 4.0
    2656 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE uint64_t mvmd128<64>::extract(bitblock128_t arg1)
    2657 {
    2658         return ((mvmd128<(32)>::extract<((2*pos)+1)>(arg1)<<(32))|mvmd128<(32)>::extract<(2*pos)>(arg1));
     2672template <> template <uint8_t pos> IDISA_ALWAYS_INLINE typename FieldType<64>::T mvmd128<64>::extract(bitblock128_t arg1)
     2673{
     2674        return (((typename FieldType<64>::T)(mvmd128<(32)>::extract<((2*pos)+1)>(arg1))<<(32))|mvmd128<(32)>::extract<(2*pos)>(arg1));
    26592675}
    26602676
    26612677//The total number of operations is 12.6666666667
    2662 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::splat(bitblock128_t arg1)
     2678template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::splat(bitblock128_t arg1)
    26632679{
    26642680        return simd128<128>::sub(simd128<128>::constant<0>(), simd_and(simd128<128>::constant<1>(), simd128<128>::srli<pos>(arg1)));
     
    26662682
    26672683//The total number of operations is 13.0
    2668 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::splat(bitblock128_t arg1)
     2684template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::splat(bitblock128_t arg1)
    26692685{
    26702686        bitblock128_t tmpArg = (((pos%2) == 0) ? simd128<(4)>::slli<2>(arg1) : simd128<(4)>::srli<2>(arg1));
     
    26742690
    26752691//The total number of operations is 9.0
    2676 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::splat(bitblock128_t arg1)
     2692template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::splat(bitblock128_t arg1)
    26772693{
    26782694        bitblock128_t tmpArg = (((pos%2) == 0) ? simd128<(8)>::slli<4>(arg1) : simd128<(8)>::srli<4>(arg1));
     
    26822698
    26832699//The total number of operations is 5.0
    2684 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::splat(bitblock128_t arg1)
     2700template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::splat(bitblock128_t arg1)
    26852701{
    26862702        bitblock128_t tmpArg = (((pos%2) == 0) ? simd128<(16)>::slli<8>(arg1) : simd128<(16)>::srli<8>(arg1));
     
    26902706
    26912707//The total number of operations is 2.0
    2692 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::splat(bitblock128_t arg1)
     2708template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::splat(bitblock128_t arg1)
    26932709{
    26942710        return mvmd128<16>::fill(_mm_extract_epi16(arg1, (int32_t)(pos)));
     
    26962712
    26972713//The total number of operations is 1.0
    2698 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::splat(bitblock128_t arg1)
     2714template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::splat(bitblock128_t arg1)
    26992715{
    27002716        return mvmd128<32>::shufflei<shufflemask4(pos, pos, pos, pos)>(arg1);
     
    27022718
    27032719//The total number of operations is 5.0
    2704 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::splat(bitblock128_t arg1)
     2720template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::splat(bitblock128_t arg1)
    27052721{
    27062722        return simd128<1>::ifh(simd128<64>::himask(), mvmd128<(32)>::splat<((2*pos)+1)>(arg1), mvmd128<(32)>::splat<(2*pos)>(arg1));
     
    27082724
    27092725//The total number of operations is 13.0
    2710 template <> template <uint64_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::splat(bitblock128_t arg1)
     2726template <> template <uint8_t pos> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::splat(bitblock128_t arg1)
    27112727{
    27122728        return simd128<1>::ifh(simd128<128>::himask(), mvmd128<(64)>::splat<((2*pos)+1)>(arg1), mvmd128<(64)>::splat<(2*pos)>(arg1));
     
    27142730
    27152731//The total number of operations is 15.0
    2716 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16)
     2732template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill16(typename FieldType<1>::T val1, typename FieldType<1>::T val2, typename FieldType<1>::T val3, typename FieldType<1>::T val4, typename FieldType<1>::T val5, typename FieldType<1>::T val6, typename FieldType<1>::T val7, typename FieldType<1>::T val8, typename FieldType<1>::T val9, typename FieldType<1>::T val10, typename FieldType<1>::T val11, typename FieldType<1>::T val12, typename FieldType<1>::T val13, typename FieldType<1>::T val14, typename FieldType<1>::T val15, typename FieldType<1>::T val16)
    27172733{
    27182734        return simd_or(mvmd128<(2)>::fill16((val1<<1), (val3<<1), (val5<<1), (val7<<1), (val9<<1), (val11<<1), (val13<<1), (val15<<1), (val1<<1), (val3<<1), (val5<<1), (val7<<1), (val9<<1), (val11<<1), (val13<<1), (val15<<1)), mvmd128<(2)>::fill16((val2&(1)), (val4&(1)), (val6&(1)), (val8&(1)), (val10&(1)), (val12&(1)), (val14&(1)), (val16&(1)), (val2&(1)), (val4&(1)), (val6&(1)), (val8&(1)), (val10&(1)), (val12&(1)), (val14&(1)), (val16&(1))));
     
    27202736
    27212737//The total number of operations is 7.0
    2722 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16)
     2738template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill16(typename FieldType<2>::T val1, typename FieldType<2>::T val2, typename FieldType<2>::T val3, typename FieldType<2>::T val4, typename FieldType<2>::T val5, typename FieldType<2>::T val6, typename FieldType<2>::T val7, typename FieldType<2>::T val8, typename FieldType<2>::T val9, typename FieldType<2>::T val10, typename FieldType<2>::T val11, typename FieldType<2>::T val12, typename FieldType<2>::T val13, typename FieldType<2>::T val14, typename FieldType<2>::T val15, typename FieldType<2>::T val16)
    27232739{
    27242740        return simd_or(mvmd128<(4)>::fill16((val1<<2), (val3<<2), (val5<<2), (val7<<2), (val9<<2), (val11<<2), (val13<<2), (val15<<2), (val1<<2), (val3<<2), (val5<<2), (val7<<2), (val9<<2), (val11<<2), (val13<<2), (val15<<2)), mvmd128<(4)>::fill16((val2&(3)), (val4&(3)), (val6&(3)), (val8&(3)), (val10&(3)), (val12&(3)), (val14&(3)), (val16&(3)), (val2&(3)), (val4&(3)), (val6&(3)), (val8&(3)), (val10&(3)), (val12&(3)), (val14&(3)), (val16&(3))));
     
    27262742
    27272743//The total number of operations is 3.0
    2728 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16)
     2744template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill16(typename FieldType<4>::T val1, typename FieldType<4>::T val2, typename FieldType<4>::T val3, typename FieldType<4>::T val4, typename FieldType<4>::T val5, typename FieldType<4>::T val6, typename FieldType<4>::T val7, typename FieldType<4>::T val8, typename FieldType<4>::T val9, typename FieldType<4>::T val10, typename FieldType<4>::T val11, typename FieldType<4>::T val12, typename FieldType<4>::T val13, typename FieldType<4>::T val14, typename FieldType<4>::T val15, typename FieldType<4>::T val16)
    27292745{
    27302746        return simd_or(mvmd128<(8)>::fill16((val1<<4), (val3<<4), (val5<<4), (val7<<4), (val9<<4), (val11<<4), (val13<<4), (val15<<4), (val1<<4), (val3<<4), (val5<<4), (val7<<4), (val9<<4), (val11<<4), (val13<<4), (val15<<4)), mvmd128<(8)>::fill16((val2&(15)), (val4&(15)), (val6&(15)), (val8&(15)), (val10&(15)), (val12&(15)), (val14&(15)), (val16&(15)), (val2&(15)), (val4&(15)), (val6&(15)), (val8&(15)), (val10&(15)), (val12&(15)), (val14&(15)), (val16&(15))));
     
    27322748
    27332749//The total number of operations is 1.0
    2734 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill16(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8, uint64_t val9, uint64_t val10, uint64_t val11, uint64_t val12, uint64_t val13, uint64_t val14, uint64_t val15, uint64_t val16)
     2750template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill16(typename FieldType<8>::T val1, typename FieldType<8>::T val2, typename FieldType<8>::T val3, typename FieldType<8>::T val4, typename FieldType<8>::T val5, typename FieldType<8>::T val6, typename FieldType<8>::T val7, typename FieldType<8>::T val8, typename FieldType<8>::T val9, typename FieldType<8>::T val10, typename FieldType<8>::T val11, typename FieldType<8>::T val12, typename FieldType<8>::T val13, typename FieldType<8>::T val14, typename FieldType<8>::T val15, typename FieldType<8>::T val16)
    27352751{
    27362752        return _mm_set_epi8((int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4), (int32_t)(val5), (int32_t)(val6), (int32_t)(val7), (int32_t)(val8), (int32_t)(val9), (int32_t)(val10), (int32_t)(val11), (int32_t)(val12), (int32_t)(val13), (int32_t)(val14), (int32_t)(val15), (int32_t)(val16));
     
    27382754
    27392755//The total number of operations is 5.0
    2740 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     2756template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill4(typename FieldType<1>::T val1, typename FieldType<1>::T val2, typename FieldType<1>::T val3, typename FieldType<1>::T val4)
    27412757{
    27422758        return simd128<1>::ifh(simd128<(4)>::himask(), mvmd128<1>::fill2(val1, val2), mvmd128<1>::fill2(val3, val4));
     
    27442760
    27452761//The total number of operations is 5.0
    2746 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     2762template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill4(typename FieldType<2>::T val1, typename FieldType<2>::T val2, typename FieldType<2>::T val3, typename FieldType<2>::T val4)
    27472763{
    27482764        return simd128<1>::ifh(simd128<(8)>::himask(), mvmd128<2>::fill2(val1, val2), mvmd128<2>::fill2(val3, val4));
     
    27502766
    27512767//The total number of operations is 5.0
    2752 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     2768template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill4(typename FieldType<4>::T val1, typename FieldType<4>::T val2, typename FieldType<4>::T val3, typename FieldType<4>::T val4)
    27532769{
    27542770        return simd128<1>::ifh(simd128<(16)>::himask(), mvmd128<4>::fill2(val1, val2), mvmd128<4>::fill2(val3, val4));
     
    27562772
    27572773//The total number of operations is 5.0
    2758 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     2774template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill4(typename FieldType<8>::T val1, typename FieldType<8>::T val2, typename FieldType<8>::T val3, typename FieldType<8>::T val4)
    27592775{
    27602776        return simd128<1>::ifh(simd128<(32)>::himask(), mvmd128<8>::fill2(val1, val2), mvmd128<8>::fill2(val3, val4));
     
    27622778
    27632779//The total number of operations is 3.0
    2764 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     2780template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill4(typename FieldType<16>::T val1, typename FieldType<16>::T val2, typename FieldType<16>::T val3, typename FieldType<16>::T val4)
    27652781{
    27662782        return simd_or(mvmd128<(32)>::fill4((val1<<16), (val3<<16), (val1<<16), (val3<<16)), mvmd128<(32)>::fill4((val2&(65535)), (val4&(65535)), (val2&(65535)), (val4&(65535))));
     
    27682784
    27692785//The total number of operations is 1.0
    2770 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill4(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4)
     2786template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill4(typename FieldType<32>::T val1, typename FieldType<32>::T val2, typename FieldType<32>::T val3, typename FieldType<32>::T val4)
    27712787{
    27722788        return _mm_set_epi32((int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4));
     
    27742790
    27752791//The total number of operations is 2.33333333333
    2776 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::srli(bitblock128_t arg1)
     2792template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::srli(bitblock128_t arg1)
    27772793{
    27782794        return simd128<128>::srli<(sh*2)>(arg1);
     
    27802796
    27812797//The total number of operations is 2.33333333333
    2782 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
     2798template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::srli(bitblock128_t arg1)
    27832799{
    27842800        return simd128<128>::srli<(sh*4)>(arg1);
     
    27862802
    27872803//The total number of operations is 1.0
    2788 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::srli(bitblock128_t arg1)
     2804template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::srli(bitblock128_t arg1)
    27892805{
    27902806        return _mm_srli_si128(arg1, (int32_t)(sh));
     
    27922808
    27932809//The total number of operations is 1.0
    2794 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::srli(bitblock128_t arg1)
     2810template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::srli(bitblock128_t arg1)
    27952811{
    27962812        return mvmd128<(8)>::srli<(sh*2)>(arg1);
     
    27982814
    27992815//The total number of operations is 1.0
    2800 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1)
     2816template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::srli(bitblock128_t arg1)
    28012817{
    28022818        return mvmd128<(16)>::srli<(sh*2)>(arg1);
     
    28042820
    28052821//The total number of operations is 1.0
    2806 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::srli(bitblock128_t arg1)
     2822template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::srli(bitblock128_t arg1)
    28072823{
    28082824        return mvmd128<(32)>::srli<(sh*2)>(arg1);
     
    28102826
    28112827//The total number of operations is 1.0
    2812 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1)
     2828template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::srli(bitblock128_t arg1)
    28132829{
    28142830        return mvmd128<(64)>::srli<(sh*2)>(arg1);
     
    28162832
    28172833//The total number of operations is 1.0
    2818 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill2(uint64_t val1, uint64_t val2)
     2834template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill2(typename FieldType<1>::T val1, typename FieldType<1>::T val2)
    28192835{
    28202836        return mvmd128<(2)>::fill(((val1<<1)|(val2&(1))));
     
    28222838
    28232839//The total number of operations is 1.0
    2824 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill2(uint64_t val1, uint64_t val2)
     2840template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill2(typename FieldType<2>::T val1, typename FieldType<2>::T val2)
    28252841{
    28262842        return mvmd128<(4)>::fill(((val1<<2)|(val2&(3))));
     
    28282844
    28292845//The total number of operations is 1.0
    2830 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill2(uint64_t val1, uint64_t val2)
     2846template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill2(typename FieldType<4>::T val1, typename FieldType<4>::T val2)
    28312847{
    28322848        return mvmd128<(8)>::fill(((val1<<4)|(val2&(15))));
     
    28342850
    28352851//The total number of operations is 1.0
    2836 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill2(uint64_t val1, uint64_t val2)
     2852template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill2(typename FieldType<8>::T val1, typename FieldType<8>::T val2)
    28372853{
    28382854        return mvmd128<(16)>::fill(((val1<<8)|(val2&(255))));
     
    28402856
    28412857//The total number of operations is 1.0
    2842 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill2(uint64_t val1, uint64_t val2)
     2858template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill2(typename FieldType<16>::T val1, typename FieldType<16>::T val2)
    28432859{
    28442860        return mvmd128<(32)>::fill(((val1<<16)|(val2&(65535))));
     
    28462862
    28472863//The total number of operations is 5.0
    2848 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill2(uint64_t val1, uint64_t val2)
     2864template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::fill2(typename FieldType<32>::T val1, typename FieldType<32>::T val2)
    28492865{
    28502866        return simd128<1>::ifh(simd128<(64)>::himask(), mvmd128<32>::fill(val1), mvmd128<32>::fill(val2));
     
    28522868
    28532869//The total number of operations is 5.0
    2854 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::fill2(uint64_t val1, uint64_t val2)
     2870template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::fill2(typename FieldType<64>::T val1, typename FieldType<64>::T val2)
    28552871{
    28562872        return simd128<1>::ifh(simd128<(128)>::himask(), mvmd128<64>::fill(val1), mvmd128<64>::fill(val2));
     
    28582874
    28592875//The total number of operations is 5.66666666667
    2860 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::dslli(bitblock128_t arg1, bitblock128_t arg2)
     2876template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::dslli(bitblock128_t arg1, bitblock128_t arg2)
    28612877{
    28622878        return simd_or(mvmd128<2>::slli<sh>(arg1), mvmd128<2>::srli<((64)-sh)>(arg2));
     
    28642880
    28652881//The total number of operations is 5.66666666667
    2866 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::dslli(bitblock128_t arg1, bitblock128_t arg2)
     2882template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::dslli(bitblock128_t arg1, bitblock128_t arg2)
    28672883{
    28682884        return simd_or(mvmd128<4>::slli<sh>(arg1), mvmd128<4>::srli<((32)-sh)>(arg2));
     
    28702886
    28712887//The total number of operations is 3.0
    2872 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::dslli(bitblock128_t arg1, bitblock128_t arg2)
     2888template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::dslli(bitblock128_t arg1, bitblock128_t arg2)
    28732889{
    28742890        return simd_or(mvmd128<8>::slli<sh>(arg1), mvmd128<8>::srli<((16)-sh)>(arg2));
     
    28762892
    28772893//The total number of operations is 3.0
    2878 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::dslli(bitblock128_t arg1, bitblock128_t arg2)
     2894template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::dslli(bitblock128_t arg1, bitblock128_t arg2)
    28792895{
    28802896        return simd_or(mvmd128<16>::slli<sh>(arg1), mvmd128<16>::srli<((8)-sh)>(arg2));
     
    28822898
    28832899//The total number of operations is 3.0
    2884 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::dslli(bitblock128_t arg1, bitblock128_t arg2)
     2900template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::dslli(bitblock128_t arg1, bitblock128_t arg2)
    28852901{
    28862902        return simd_or(mvmd128<32>::slli<sh>(arg1), mvmd128<32>::srli<((4)-sh)>(arg2));
     
    28882904
    28892905//The total number of operations is 3.0
    2890 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dslli(bitblock128_t arg1, bitblock128_t arg2)
     2906template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::dslli(bitblock128_t arg1, bitblock128_t arg2)
    28912907{
    28922908        return simd_or(mvmd128<64>::slli<sh>(arg1), mvmd128<64>::srli<((2)-sh)>(arg2));
     
    28942910
    28952911//The total number of operations is 3.0
    2896 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dslli(bitblock128_t arg1, bitblock128_t arg2)
     2912template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::dslli(bitblock128_t arg1, bitblock128_t arg2)
    28972913{
    28982914        return simd_or(mvmd128<128>::slli<sh>(arg1), mvmd128<128>::srli<((1)-sh)>(arg2));
     
    29002916
    29012917//The total number of operations is 2.33333333333
    2902 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1)
     2918template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::slli(bitblock128_t arg1)
    29032919{
    29042920        return simd128<128>::slli<(sh*2)>(arg1);
     
    29062922
    29072923//The total number of operations is 2.33333333333
    2908 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1)
     2924template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::slli(bitblock128_t arg1)
    29092925{
    29102926        return mvmd128<(2)>::slli<(sh*2)>(arg1);
     
    29122928
    29132929//The total number of operations is 1.0
    2914 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1)
     2930template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::slli(bitblock128_t arg1)
    29152931{
    29162932        return _mm_slli_si128(arg1, (int32_t)(sh));
     
    29182934
    29192935//The total number of operations is 1.0
    2920 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1)
     2936template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::slli(bitblock128_t arg1)
    29212937{
    29222938        return mvmd128<(8)>::slli<(sh*2)>(arg1);
     
    29242940
    29252941//The total number of operations is 1.0
    2926 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1)
     2942template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<32>::slli(bitblock128_t arg1)
    29272943{
    29282944        return mvmd128<(16)>::slli<(sh*2)>(arg1);
     
    29302946
    29312947//The total number of operations is 1.0
    2932 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1)
     2948template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<64>::slli(bitblock128_t arg1)
    29332949{
    29342950        return mvmd128<(32)>::slli<(sh*2)>(arg1);
     
    29362952
    29372953//The total number of operations is 1.0
    2938 template <> template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1)
     2954template <> template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<128>::slli(bitblock128_t arg1)
    29392955{
    29402956        return mvmd128<(64)>::slli<(sh*2)>(arg1);
     
    29422958
    29432959//The total number of operations is 13.0
    2944 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
     2960template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<1>::fill8(typename FieldType<1>::T val1, typename FieldType<1>::T val2, typename FieldType<1>::T val3, typename FieldType<1>::T val4, typename FieldType<1>::T val5, typename FieldType<1>::T val6, typename FieldType<1>::T val7, typename FieldType<1>::T val8)
    29452961{
    29462962        return simd128<1>::ifh(simd128<(8)>::himask(), mvmd128<1>::fill4(val1, val2, val3, val4), mvmd128<1>::fill4(val5, val6, val7, val8));
     
    29482964
    29492965//The total number of operations is 13.0
    2950 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
     2966template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<2>::fill8(typename FieldType<2>::T val1, typename FieldType<2>::T val2, typename FieldType<2>::T val3, typename FieldType<2>::T val4, typename FieldType<2>::T val5, typename FieldType<2>::T val6, typename FieldType<2>::T val7, typename FieldType<2>::T val8)
    29512967{
    29522968        return simd128<1>::ifh(simd128<(16)>::himask(), mvmd128<2>::fill4(val1, val2, val3, val4), mvmd128<2>::fill4(val5, val6, val7, val8));
     
    29542970
    29552971//The total number of operations is 7.0
    2956 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
     2972template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<4>::fill8(typename FieldType<4>::T val1, typename FieldType<4>::T val2, typename FieldType<4>::T val3, typename FieldType<4>::T val4, typename FieldType<4>::T val5, typename FieldType<4>::T val6, typename FieldType<4>::T val7, typename FieldType<4>::T val8)
    29572973{
    29582974        return simd_or(mvmd128<(8)>::fill8((val1<<4), (val3<<4), (val5<<4), (val7<<4), (val1<<4), (val3<<4), (val5<<4), (val7<<4)), mvmd128<(8)>::fill8((val2&(15)), (val4&(15)), (val6&(15)), (val8&(15)), (val2&(15)), (val4&(15)), (val6&(15)), (val8&(15))));
     
    29602976
    29612977//The total number of operations is 3.0
    2962 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
     2978template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<8>::fill8(typename FieldType<8>::T val1, typename FieldType<8>::T val2, typename FieldType<8>::T val3, typename FieldType<8>::T val4, typename FieldType<8>::T val5, typename FieldType<8>::T val6, typename FieldType<8>::T val7, typename FieldType<8>::T val8)
    29632979{
    29642980        return simd_or(mvmd128<(16)>::fill8((val1<<8), (val3<<8), (val5<<8), (val7<<8), (val1<<8), (val3<<8), (val5<<8), (val7<<8)), mvmd128<(16)>::fill8((val2&(255)), (val4&(255)), (val6&(255)), (val8&(255)), (val2&(255)), (val4&(255)), (val6&(255)), (val8&(255))));
     
    29662982
    29672983//The total number of operations is 1.0
    2968 template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill8(uint64_t val1, uint64_t val2, uint64_t val3, uint64_t val4, uint64_t val5, uint64_t val6, uint64_t val7, uint64_t val8)
     2984template <> IDISA_ALWAYS_INLINE bitblock128_t mvmd128<16>::fill8(typename FieldType<16>::T val1, typename FieldType<16>::T val2, typename FieldType<16>::T val3, typename FieldType<16>::T val4, typename FieldType<16>::T val5, typename FieldType<16>::T val6, typename FieldType<16>::T val7, typename FieldType<16>::T val8)
    29692985{
    29702986        return _mm_set_epi16((int32_t)(val1), (int32_t)(val2), (int32_t)(val3), (int32_t)(val4), (int32_t)(val5), (int32_t)(val6), (int32_t)(val7), (int32_t)(val8));
     
    29843000
    29853001//The total number of operations is 2.33333333333
    2986 template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srli(bitblock128_t arg1)
     3002template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::srli(bitblock128_t arg1)
    29873003{
    29883004        return simd128<128>::srli<sh>(arg1);
     
    29963012
    29973013//The total number of operations is 20.3333333333
    2998 IDISA_ALWAYS_INLINE uint64_t bitblock128::popcount(bitblock128_t arg1)
    2999 {
    3000         return mvmd128<64>::extract<0>(simd128<128>::popcount(arg1));
     3014IDISA_ALWAYS_INLINE uint8_t bitblock128::popcount(bitblock128_t arg1)
     3015{
     3016        return mvmd128<8>::extract<0>(simd128<128>::popcount(arg1));
    30013017}
    30023018
     
    30143030
    30153031//The total number of operations is 2.33333333333
    3016 template <uint64_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::slli(bitblock128_t arg1)
     3032template <uint8_t sh> IDISA_ALWAYS_INLINE bitblock128_t bitblock128::slli(bitblock128_t arg1)
    30173033{
    30183034        return simd128<128>::slli<sh>(arg1);
Note: See TracChangeset for help on using the changeset viewer.