Changeset 626


Ignore:
Timestamp:
Sep 14, 2010, 6:00:47 PM (9 years ago)
Author:
cameron
Message:

s2p_movemask implementation

Location:
trunk/lib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/lib/s2p.h

    r619 r626  
    2121*/
    2222
    23 /* Different algorithms may be selected. */
     23/*  1.  ALGORITHM Selection. 
     24        Choice of 3 algorithms: s2p_ideal, s2p_movemask, s2p_bytepack
     25        Default is s2p_bytepack.
     26        Compiling with -DUSE_S2P_IDEAL or -DUSE_S2P_MOVEMASK to override.
     27*/
     28
    2429#ifdef USE_S2P_IDEAL
    2530#define S2P_ALGORITHM s2p_ideal
     
    3439#endif
    3540
     41
     42
     43#if (BYTE_ORDER == BIG_ENDIAN)
     44#define s2p(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7)\
     45  S2P_ALGORITHM(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7)
     46#endif
     47#if (BYTE_ORDER == LITTLE_ENDIAN)
     48#define s2p(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7)\
     49  S2P_ALGORITHM(s7, s6, s5, s4, s3, s2, s1, s0, p0, p1, p2, p3, p4, p5, p6, p7)
     50#endif
    3651
    3752
     
    110125  } while(0)
    111126
    112 #if (BYTE_ORDER == BIG_ENDIAN)
    113 #define s2p(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7)\
    114   S2P_ALGORITHM(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7)
    115 #endif
    116 #if (BYTE_ORDER == LITTLE_ENDIAN)
    117 #define s2p(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7)\
    118   S2P_ALGORITHM(s7, s6, s5, s4, s3, s2, s1, s0, p0, p1, p2, p3, p4, p5, p6, p7)
    119 #endif
     127/* For sizeof(SIMD_type) = 16 */
     128typedef uint16_t BitPack;
     129
     130#define movemask_step(s0, s1, s2, s3, s4, s5, s6, s7, p) \
     131  do { \
     132        union { BitPack bit_pack[8];\
     133                SIMD_type bit_block;\
     134              } b;\
     135        b.bit_pack[0] = simd_movemask_8(s7);\
     136        b.bit_pack[1] = simd_movemask_8(s6);\
     137        b.bit_pack[2] = simd_movemask_8(s5);\
     138        b.bit_pack[3] = simd_movemask_8(s4);\
     139        b.bit_pack[4] = simd_movemask_8(s3);\
     140        b.bit_pack[5] = simd_movemask_8(s2);\
     141        b.bit_pack[6] = simd_movemask_8(s1);\
     142        b.bit_pack[7] = simd_movemask_8(s0);\
     143        p = b.bit_block;\
     144   } while (0)
     145
     146#define bitshift_step(s0, s1, s2, s3, s4, s5, s6, s7) \
     147  do { \
     148        s0 = simd_add_8(s0, s0);\
     149        s1 = simd_add_8(s1, s1);\
     150        s2 = simd_add_8(s2, s2);\
     151        s3 = simd_add_8(s3, s3);\
     152        s4 = simd_add_8(s4, s4);\
     153        s5 = simd_add_8(s5, s5);\
     154        s6 = simd_add_8(s6, s6);\
     155        s7 = simd_add_8(s7, s7);\
     156  } while (0)
     157
     158
     159#define s2p_movemask(s0, s1, s2, s3, s4, s5, s6, s7, p0, p1, p2, p3, p4, p5, p6, p7) \
     160  do { \
     161        movemask_step(s0, s1, s2, s3, s4, s5, s6, s7, p0);\
     162        bitshift_step(s0, s1, s2, s3, s4, s5, s6, s7);\
     163        movemask_step(s0, s1, s2, s3, s4, s5, s6, s7, p1);\
     164        bitshift_step(s0, s1, s2, s3, s4, s5, s6, s7);\
     165        movemask_step(s0, s1, s2, s3, s4, s5, s6, s7, p2);\
     166        bitshift_step(s0, s1, s2, s3, s4, s5, s6, s7);\
     167        movemask_step(s0, s1, s2, s3, s4, s5, s6, s7, p3);\
     168        bitshift_step(s0, s1, s2, s3, s4, s5, s6, s7);\
     169        movemask_step(s0, s1, s2, s3, s4, s5, s6, s7, p4);\
     170        bitshift_step(s0, s1, s2, s3, s4, s5, s6, s7);\
     171        movemask_step(s0, s1, s2, s3, s4, s5, s6, s7, p5);\
     172        bitshift_step(s0, s1, s2, s3, s4, s5, s6, s7);\
     173        movemask_step(s0, s1, s2, s3, s4, s5, s6, s7, p6);\
     174        bitshift_step(s0, s1, s2, s3, s4, s5, s6, s7);\
     175        movemask_step(s0, s1, s2, s3, s4, s5, s6, s7, p7);\
     176  } while (0)
     177
    120178
    121179#endif
  • trunk/lib/sse_simd.h

    r533 r626  
    205205}
    206206
     207#define simd_movemask_8(v) _mm_movemask_epi8(v)
     208
    207209#define simd_all_eq_8(v1, v2) simd_all_true_8(_mm_cmpeq_epi8(v1, v2))
    208210#define simd_all_le_8(v1, v2) \
Note: See TracChangeset for help on using the changeset viewer.