Changeset 338 for trunk


Ignore:
Timestamp:
Nov 20, 2009, 3:53:08 PM (10 years ago)
Author:
ksherdy
Message:

Implemented simd<8>::movemask, simd<8>::splat, simd<32>::pack, simd<64>::pack

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/sse_simd_t.h

    r337 r338  
    88#define SSE_SIMD_H
    99
    10 #include <stdio.h>
    11 
    1210/*
    1311  Replace the following SSE version specific include directives with the x86intrin.h meta intrinsic header. This header is available with GCC 4.4.X
     
    6058#include <pmmintrin.h>
    6159#endif
     60
     61#include <stdio.h>
     62
    6263typedef __m128i SIMD_type;
     64
     65void print_bit_block(const char * var_name, SIMD_type v);
     66
    6367/*------------------------------------------------------------*/
    6468/* I. SIMD bitwise logical operations */
     
    109113        /* Splat generator using the first field of a register. */
    110114        static inline SIMD_type splat(SIMD_type r);
     115  /* Move mask */
     116  static inline int movemask(SIMD_type r);
     117
    111118       
    112119        /* Shift immediate with the shift constant as a template parameter. */
     
    184191template<> template <int n> inline SIMD_type simd<2>::constant() {return simd<4>::constant<(n<<2|n)>();}
    185192
     193// Splat the first 8-bit int into all positions.
     194template <> inline SIMD_type simd<8>::splat(SIMD_type x) {
     195  return _mm_set1_epi8(*(uint8_t *)(&x));
     196}
     197
    186198// Splat the first 16-bit int into all positions.
    187199template <> inline SIMD_type simd<16>::splat(SIMD_type x) {
     
    199211  return _mm_shuffle_epi32(x,_MM_SHUFFLE(1,0,1,0));
    200212}
     213
     214// Move mask 8-bit
     215template <> inline int simd<8>::movemask(SIMD_type r) {
     216  return _mm_movemask_epi8(r);
     217}
     218
    201219
    202220/* Shift immediate operations with direct implementation by built-ins. */
     
    418436template<> inline SIMD_type simd<64>::eq(SIMD_type r1, SIMD_type r2) {
    419437
    420 #ifdef __SSE4_1__
    421   return _mm_cmpeq_epi64(r1, r2);
    422 #else
     438  #ifdef __SSE4_1__
     439    return _mm_cmpeq_epi64(r1, r2);
     440  #endif
     441
     442  // Fall back
    423443  SIMD_type t = _mm_cmpeq_epi32(r1, r2);
    424444  return simd_and(t, _mm_shuffle_epi32(t,_MM_SHUFFLE(2,3,0,1)));
    425 #endif
    426445
    427446}
     
    434453inline SIMD_type simd<16>::pack(SIMD_type r1, SIMD_type r2) {
    435454        return _mm_packus_epi16(simd_andc(r2, simd<16>::himask()), simd_andc(r1, simd<16>::himask()));
     455}
     456
     457template<>
     458inline SIMD_type simd<32>::pack(SIMD_type r1, SIMD_type r2) {
     459
     460  #ifdef __SSE4_1__
     461        return _mm_packus_epi32(simd_andc(r2, simd<32>::himask()), simd_andc(r1, simd<32>::himask()));
     462  #endif
     463
     464  #ifdef __SSSE3__
     465    return  simd_or (_mm_shuffle_epi8(simd_andc(r1, simd<32>::himask()), _mm_set_epi8(0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15)),
     466                     _mm_shuffle_epi8(simd_andc(r2, simd<32>::himask()), _mm_set_epi8(2,3,6,7,10,11,14,15,0,1,4,5,8,9,12,13)));
     467  #endif
     468
     469  // Fall back - Verify *saturated* pack implementation is ok.
     470  return _mm_packs_epi32(simd_andc(r2, simd<32>::himask()), simd_andc(r1, simd<32>::himask()));
     471
     472}
     473
     474template<>
     475inline SIMD_type simd<64>::pack(SIMD_type r1, SIMD_type r2) {
     476
     477
     478  return  simd_or(_mm_shuffle_epi32(simd_andc(r1, simd<64>::himask()), _MM_SHUFFLE(2,0,3,1)),
     479                  _mm_shuffle_epi32(simd_andc(r2, simd<64>::himask()), _MM_SHUFFLE(3,1,2,0))); // 1,3 contain 0
     480 
    436481}
    437482
Note: See TracChangeset for help on using the changeset viewer.