Changeset 1182


Ignore:
Timestamp:
Apr 29, 2011, 9:48:01 PM (8 years ago)
Author:
ksherdy
Message:

Add gather, scatter, and overload splat.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/sse_simd_t.h

    r736 r1182  
    101101 
    102102enum HOM_t {x,h,l};
     103
     104/* ****************************************************************************************************
     105   SYMBOL TABLE SUPPORT
     106   **************************************************************************************************** */
     107// Gather Scatter scale factor
     108typedef enum {SF1=1, SF2=2, SF4=4, SF8=8}  ScaleFactor;
     109
     110/* **************************************************************************************************** */
    103111
    104112/* simd<fw> is a template struct providing all the simd operations
     
    158166        template <HOM_t m1, HOM_t m2> static inline SIMD_type mergel(SIMD_type r1, SIMD_type r2);
    159167
     168/* ****************************************************************************************************
     169   SYMBOL TABLE SUPPORT
     170   **************************************************************************************************** */
     171       
     172        static inline SIMD_type splat(unsigned char * p);
     173
     174        /* Gather/Scatter operations. */       
     175        static inline SIMD_type gather(void * base, size_t * index, ScaleFactor sf);
     176        static inline void scatter(const SIMD_type * base, const unsigned char * index, ScaleFactor sf);
     177       
     178/* **************************************************************************************************** */     
     179
    160180};
    161181
    162 #define sisd_to_int(x) _mm_cvtsi128_si32(x)
    163 
    164 #define sisd_from_int(n) _mm_cvtsi32_si128(n)
    165 
    166 
    167 
    168 
    169 /* III.  Implementations of simd<fw> operations. */
    170 
    171 /* Constant generator functions for various field widths. */
    172 
    173 template<> inline SIMD_type simd<2>::himask() {return _mm_set1_epi32(0xAAAAAAAA);}
    174 
    175 template<> inline SIMD_type simd<4>::himask() {return _mm_set1_epi32(0xCCCCCCCC);}
    176 
    177 template<> inline SIMD_type simd<8>::himask() {return _mm_set1_epi32(0xF0F0F0F0);}
    178 
    179 template<> inline SIMD_type simd<16>::himask() {return _mm_set1_epi32(0xFF00FF00);}
    180 
    181 template<> inline SIMD_type simd<32>::himask() {return _mm_set1_epi32(0xFFFF0000);}
    182 
    183 template<> inline SIMD_type simd<64>::himask() {return _mm_set_epi32(-1,0,-1,0);}
    184 
    185 template<> inline SIMD_type simd<128>::himask() {return _mm_set_epi32(-1,-1,0,0);}
    186 
    187 template<> inline SIMD_type simd<16>::lomask() {return _mm_set1_epi32(0x00FF00FF);}
    188 
    189 template<> inline SIMD_type simd<32>::lomask() {return _mm_set1_epi32(0x0000FFFF);}
    190 
    191 template<> inline SIMD_type simd<64>::lomask() {return _mm_set_epi32(0,-1,0,-1);}
    192 
    193 template<> inline SIMD_type simd<128>::lomask() {return _mm_set_epi32(0,0,-1,-1);}
    194 
    195 template<> template <int n> inline SIMD_type simd<4>::constant() {return _mm_set1_epi8((n)<<4|(n));}
    196 
    197 template<> template <int n> inline SIMD_type simd<8>::constant() {return _mm_set1_epi8(n);}
    198 
    199 template<> template <int n> inline SIMD_type simd<16>::constant() {return _mm_set1_epi16(n);}
    200 
    201 template<> template <int n> inline SIMD_type simd<32>::constant() {return _mm_set1_epi32(n);}
    202 
    203 template<> template <> inline SIMD_type simd<1>::constant<0>() {return simd<32>::constant<0>();}
    204 template<> template <> inline SIMD_type simd<1>::constant<1>() {return simd<32>::constant<-1>();}
    205 
    206 template<> template <int n> inline SIMD_type simd<2>::constant() {return simd<4>::constant<(n<<2|n)>();}
     182/* ****************************************************************************************************
     183   SYMBOL TABLE SUPPORT
     184   **************************************************************************************************** */
     185
     186template <>
     187inline SIMD_type simd<8>::gather(void * base, size_t * index, ScaleFactor sf) {
     188
     189        SIMD_type rv;
     190       
     191        unsigned char temp_ary[sizeof(SIMD_type)];
     192/*     
     193        unsigned char v0 = *((unsigned char *) base + index[0] * sf);
     194        unsigned char v1 = *((unsigned char *) base + index[1] * sf);
     195        unsigned char v2 = *((unsigned char *) base + index[2] * sf);
     196        unsigned char v3 = *((unsigned char *) base + index[3] * sf);
     197
     198        unsigned char v4 = *((unsigned char *) base + index[4] * sf);
     199        unsigned char v5 = *((unsigned char *) base + index[5] * sf);
     200        unsigned char v6 = *((unsigned char *) base + index[6] * sf);
     201        unsigned char v7 = *((unsigned char *) base + index[7] * sf);
     202       
     203        unsigned char v8 = *((unsigned char *) base + index[8] * sf);
     204        unsigned char v9 = *((unsigned char *) base + index[9] * sf);
     205        unsigned char v10 = *((unsigned char *) base + index[10] * sf);
     206        unsigned char v11 = *((unsigned char *) base + index[11] * sf);
     207       
     208        unsigned char v12 = *((unsigned char *) base + index[12] * sf);
     209        unsigned char v13 = *((unsigned char *) base + index[13] * sf);
     210        unsigned char v14 = *((unsigned char *) base + index[14] * sf);
     211        unsigned char v15 = *((unsigned char *) base + index[15] * sf);
     212       
     213        rv = _mm_set_epi8(v15,v14,v13,v12,v11,v10,v9,v8,v7,v6,v5,v4,v3,v2,v1,v0);       
     214*/
     215        temp_ary[0] = *((unsigned char *) base + index[0]);
     216        temp_ary[1] = *((unsigned char *) base + index[1]);
     217        temp_ary[2] = *((unsigned char *) base + index[2]);
     218        temp_ary[3] = *((unsigned char *) base + index[3]);
     219       
     220        temp_ary[4] = *((unsigned char *) base + index[4]);
     221        temp_ary[5] = *((unsigned char *) base + index[5]);
     222        temp_ary[6] = *((unsigned char *) base + index[6]);
     223        temp_ary[7] = *((unsigned char *) base + index[7]);
     224       
     225        temp_ary[8] = *((unsigned char *) base + index[8]);
     226        temp_ary[9] = *((unsigned char *) base + index[9]);
     227        temp_ary[10] = *((unsigned char *) base + index[10]);
     228        temp_ary[11] = *((unsigned char *) base + index[11]);
     229       
     230        temp_ary[12] = *((unsigned char *) base + index[12]);
     231        temp_ary[13] = *((unsigned char *) base + index[13]);
     232        temp_ary[14] = *((unsigned char *) base + index[14]);
     233        temp_ary[15] = *((unsigned char *) base + index[15]);
     234       
     235        rv = _mm_loadu_si128((SIMD_type *)temp_ary);   
     236       
     237        return rv;
     238       
     239}
     240
     241/*
     242template <> inline void scatter(const void * base, const unsigned char * index, const size_t scale);
     243*/
     244
     245/* **************************************************************************************************** */
     246
     247/* ****************************************************************************************************
     248   SYMBOL TABLE SUPPORT
     249   **************************************************************************************************** */
    207250
    208251// Splat the first 8-bit int into all positions.
     
    226269  return _mm_shuffle_epi32(x,_MM_SHUFFLE(1,0,1,0));
    227270}
     271
     272template <> inline SIMD_type simd<8>::splat(unsigned char * p) {
     273        return _mm_set1_epi8(*(uint8_t *)p);
     274}
     275
     276template <> inline SIMD_type simd<16>::splat(unsigned char * p) {
     277        return _mm_set1_epi16(*(uint16_t *)p);
     278}
     279
     280/* **************************************************************************************************** */
     281
     282
     283#define sisd_to_int(x) _mm_cvtsi128_si32(x)
     284
     285#define sisd_from_int(n) _mm_cvtsi32_si128(n)
     286
     287
     288
     289
     290/* III.  Implementations of simd<fw> operations. */
     291
     292/* Constant generator functions for various field widths. */
     293
     294template<> inline SIMD_type simd<2>::himask() {return _mm_set1_epi32(0xAAAAAAAA);}
     295
     296template<> inline SIMD_type simd<4>::himask() {return _mm_set1_epi32(0xCCCCCCCC);}
     297
     298template<> inline SIMD_type simd<8>::himask() {return _mm_set1_epi32(0xF0F0F0F0);}
     299
     300template<> inline SIMD_type simd<16>::himask() {return _mm_set1_epi32(0xFF00FF00);}
     301
     302template<> inline SIMD_type simd<32>::himask() {return _mm_set1_epi32(0xFFFF0000);}
     303
     304template<> inline SIMD_type simd<64>::himask() {return _mm_set_epi32(-1,0,-1,0);}
     305
     306template<> inline SIMD_type simd<128>::himask() {return _mm_set_epi32(-1,-1,0,0);}
     307
     308template<> inline SIMD_type simd<16>::lomask() {return _mm_set1_epi32(0x00FF00FF);}
     309
     310template<> inline SIMD_type simd<32>::lomask() {return _mm_set1_epi32(0x0000FFFF);}
     311
     312template<> inline SIMD_type simd<64>::lomask() {return _mm_set_epi32(0,-1,0,-1);}
     313
     314template<> inline SIMD_type simd<128>::lomask() {return _mm_set_epi32(0,0,-1,-1);}
     315
     316template<> template <int n> inline SIMD_type simd<4>::constant() {return _mm_set1_epi8((n)<<4|(n));}
     317
     318template<> template <int n> inline SIMD_type simd<8>::constant() {return _mm_set1_epi8(n);}
     319
     320template<> template <int n> inline SIMD_type simd<16>::constant() {return _mm_set1_epi16(n);}
     321
     322template<> template <int n> inline SIMD_type simd<32>::constant() {return _mm_set1_epi32(n);}
     323
     324template<> template <> inline SIMD_type simd<1>::constant<0>() {return simd<32>::constant<0>();}
     325template<> template <> inline SIMD_type simd<1>::constant<1>() {return simd<32>::constant<-1>();}
     326
     327template<> template <int n> inline SIMD_type simd<2>::constant() {return simd<4>::constant<(n<<2|n)>();}
     328
     329/*
     330// Splat the first 8-bit int into all positions.
     331template <> inline SIMD_type simd<8>::splat(SIMD_type x) {
     332  return _mm_set1_epi8(*(uint8_t *)(&x));
     333}
     334
     335// Splat the first 16-bit int into all positions.
     336template <> inline SIMD_type simd<16>::splat(SIMD_type x) {
     337  SIMD_type t = _mm_shufflelo_epi16(x,0);
     338  return _mm_shuffle_epi32(t,0);
     339}
     340
     341// Splat the first 32-bit int into all positions.
     342template <> inline SIMD_type simd<32>::splat(SIMD_type x) {
     343  return _mm_shuffle_epi32(x,0);
     344}
     345
     346// Splat the first 64-bit int into all positions.
     347template <> inline SIMD_type simd<64>::splat(SIMD_type x) {
     348  return _mm_shuffle_epi32(x,_MM_SHUFFLE(1,0,1,0));
     349}
     350*/
    228351
    229352// Move mask 8-bit
Note: See TracChangeset for help on using the changeset viewer.