Ignore:
Timestamp:
Nov 18, 2013, 6:21:18 AM (6 years ago)
Author:
cameron
Message:

simd-lib updates

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icXML/icXML-devel/src/simd-lib/s2p.hpp

    r2720 r3567  
    107107   immediately convert back. */
    108108#ifdef USE_S2P_AVX
    109 #include "idisa_cpp/idisa_sse2.cpp"
    110 #define avx_select_lo128(x) \
    111         ((__m128i) _mm256_castps256_ps128(x))
    112 
    113 #define avx_select_hi128(x) \
    114         ((__m128i)(_mm256_extractf128_ps(x, 1)))
    115 
    116 #define avx_general_combine256(x, y) \
    117    (_mm256_insertf128_ps(_mm256_castps128_ps256((__m128) y), (__m128) x, 1))
    118 
    119109#define s2p_step(s0, s1, hi_mask, shift, p0, p1)  \
    120110  do {\
     
    138128#endif
    139129
     130#ifndef USE_S2P_AVX2
    140131#define s2p_bytepack(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
    141132  do {\
     
    157148        s2p_step(bit33337777_0,bit33337777_1,simd<8>::himask(),4,p3,p7);\
    158149  } while(0)
     150#endif
     151
     152#ifdef USE_S2P_AVX2
     153#define s2p_step_shuf(shuf, s0, s1, hi_mask, shift, p0, p1)  \
     154  do {\
     155        BitBlock x0, x1, t0, t1;\
     156        x0 = _mm256_permute4x64_epi64(_mm256_shuffle_epi8(s0, shuf), 0xD8);\
     157        x1 = _mm256_permute4x64_epi64(_mm256_shuffle_epi8(s1, shuf), 0xD8);\
     158        t0 = _mm256_permute2x128_si256(x1, x0, 0x31);\
     159        t1 = _mm256_permute2x128_si256(x1, x0, 0x20);\
     160        p0 = simd<1>::ifh(hi_mask, t0, simd<16>::srli<shift>(t1));\
     161        p1 = simd<1>::ifh(hi_mask, simd<16>::slli<shift>(t0), t1);\
     162  } while(0)
     163
     164#define s2p_bytepack(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
     165  do {\
     166        BitBlock shuf = _mm256_set_epi32(0x0F0D0B09, 0x07050301, 0x0E0C0A08, 0x06040200, 0x0F0D0B09, 0x07050301, 0x0E0C0A08, 0x06040200);\
     167        BitBlock bit00224466_0,bit00224466_1,bit00224466_2,bit00224466_3;\
     168        BitBlock bit11335577_0,bit11335577_1,bit11335577_2,bit11335577_3;\
     169        BitBlock bit00004444_0,bit22226666_0,bit00004444_1,bit22226666_1;\
     170        BitBlock bit11115555_0,bit33337777_0,bit11115555_1,bit33337777_1;\
     171        s2p_step_shuf(shuf, s0,s1,simd<2>::himask(),1,bit00224466_0,bit11335577_0);\
     172        s2p_step_shuf(shuf, s2,s3,simd<2>::himask(),1,bit00224466_1,bit11335577_1);\
     173        s2p_step_shuf(shuf, s4,s5,simd<2>::himask(),1,bit00224466_2,bit11335577_2);\
     174        s2p_step_shuf(shuf, s6,s7,simd<2>::himask(),1,bit00224466_3,bit11335577_3);\
     175        s2p_step_shuf(shuf, bit00224466_0,bit00224466_1,simd<4>::himask(),2,bit00004444_0,bit22226666_0);\
     176        s2p_step_shuf(shuf, bit00224466_2,bit00224466_3,simd<4>::himask(),2,bit00004444_1,bit22226666_1);\
     177        s2p_step_shuf(shuf, bit11335577_0,bit11335577_1,simd<4>::himask(),2,bit11115555_0,bit33337777_0);\
     178        s2p_step_shuf(shuf, bit11335577_2,bit11335577_3,simd<4>::himask(),2,bit11115555_1,bit33337777_1);\
     179        s2p_step_shuf(shuf, bit00004444_0,bit00004444_1,simd<8>::himask(),4,p0,p4);\
     180        s2p_step_shuf(shuf, bit11115555_0,bit11115555_1,simd<8>::himask(),4,p1,p5);\
     181        s2p_step_shuf(shuf, bit22226666_0,bit22226666_1,simd<8>::himask(),4,p2,p6);\
     182        s2p_step_shuf(shuf, bit33337777_0,bit33337777_1,simd<8>::himask(),4,p3,p7);\
     183  } while(0)
     184
     185#endif
     186
     187
     188
    159189
    160190/* For sizeof(BitBlock) = 16 */
     191#if BLOCK_SIZE == 128
    161192typedef uint16_t BitPack;
     193#endif
     194#if BLOCK_SIZE == 256
     195typedef uint32_t BitPack;
     196#endif
    162197
    163198#define movemask_step(s7, s6, s5, s4, s3, s2, s1, s0, p) \
Note: See TracChangeset for help on using the changeset viewer.