source: u8u16/trunk/lib/mmx_simd_built_in.h @ 5877

Last change on this file since 5877 was 5877, checked in by cameron, 15 months ago

Adding old u8u16 for Teradata

  • Property svn:executable set to *
File size: 4.0 KB
Line 
1/* Copyright (C) 2007 Dan Lin, Robert D. Cameron
2   Licensed to International Characters Inc. and Simon Fraser University
3              under the Academic Free License version 3.0.
4   Licensed to the public under the Open Software License version 3.0.
5*/
6
7/*------------------------------------------------------------*/
8
9#include <mmintrin.h>
10typedef __m64 SIMD_type;
11
12
13/*------------------------------------------------------------*/
14/* I. SIMD bitwise logical operations */
15
16#define simd_or(b1, b2) _mm_or_si64(b1, b2)
17#define simd_and(b1, b2) _mm_and_si64(b1, b2)
18#define simd_xor(b1, b2) _mm_xor_si64(b1, b2)
19#define simd_andc(b1, b2) _mm_andnot_si64(b2, b1)
20#define simd_if(cond, then_val, else_val) \
21  simd_or(simd_and(then_val, cond), simd_andc(else_val, cond))
22#define simd_not(b) (~b)
23#define simd_nor(a,b) (~simd_or(a,b))
24
25
26/*  Specific constants. */
27#define simd_himask_2 _mm_set1_pi8(0xAA)
28#define simd_himask_4 _mm_set1_pi8(0xCC)
29#define simd_himask_8 _mm_set1_pi8(0xF0)
30#define simd_himask_16 _mm_set1_pi16(0xFF00)
31#define simd_himask_32 _mm_set1_pi32(0xFFFF0000)
32#define simd_himask_64 _mm_set_pi32(-1,0)
33/*  Assigned constants. */
34#define simd_const_32(n) _mm_set1_pi32(n)
35#define simd_const_16(n) _mm_set1_pi16(n)
36#define simd_const_8(n) _mm_set1_pi8(n)
37#define simd_const_4(n) _mm_set1_pi8((n)<<4|(n))
38#define simd_const_2(n) simd_const_4((n)<<2|(n))
39#define simd_const_1(n) simd_const_2((n)<<1|(n))
40
41/*  Operations: add, subtract, multiply, shift, merge, pack
42               in different field width*/
43
44#define simd_add_8(a, b) _mm_add_pi8(a, b)
45#define simd_add_16(a, b) _mm_add_pi16(a, b)
46#define simd_add_32(a, b) _mm_add_pi32(a, b)
47#define simd_add_64(a, b) _mm_add_si64(a, b)
48#define simd_sub_8(a, b) _mm_sub_pi8(a, b)
49#define simd_sub_16(a, b) _mm_sub_pi16(a, b)
50#define simd_sub_32(a, b) _mm_sub_pi32(a, b)
51#define simd_sub_64(a, b) _mm_sub_si64(a, b)
52#define simd_mult_16(a, b) _mm_mullo_pi16(a, b)
53#define simd_slli_16(r, shft) _mm_slli_pi16(r, shft)
54#define simd_srli_16(r, shft) _mm_srli_pi16(r, shft)
55#define simd_srai_16(r, shft) _mm_srai_pi16(r, shft)
56#define simd_slli_32(r, shft) _mm_slli_pi32(r, shft)
57#define simd_srli_32(r, shft) _mm_srli_pi32(r, shft)
58#define simd_srai_32(r, shft) _mm_srai_pi32(r, shft)
59#define simd_slli_64(r, shft) _mm_slli_si64(r, shft)
60#define simd_srli_64(r, shft) _mm_srli_si64(r, shft)
61#define simd_sll_64(r, shft_reg) _mm_sll_si64(r, shft_reg)
62#define simd_srl_64(r, shft_reg) _mm_srl_si64(r, shft_reg)
63#define simd_mergeh_8(a, b) _mm_unpackhi_pi8(b, a)
64#define simd_mergeh_16(a, b) _mm_unpackhi_pi16(b, a)
65#define simd_mergeh_32(a, b) _mm_unpackhi_pi32(b, a)
66#define simd_mergel_8(a, b) _mm_unpacklo_pi8(b, a)
67#define simd_mergel_16(a, b) _mm_unpacklo_pi16(b, a)
68#define simd_mergel_32(a, b) _mm_unpacklo_pi32(b, a)
69#define simd_pack_16(a, b) \
70  _mm_packs_pu16(simd_andc(b, simd_himask_16), simd_andc(a, simd_himask_16))
71static inline SIMD_type simd_pack_32(SIMD_type a, SIMD_type b)
72{
73   SIMD_type a1, b1;
74   asm volatile("pshufw $8, %[a], %[a1]\n\t"
75                : [a1] "=y" (a1)
76                : [a] "y" (a));
77   asm volatile("pshufw $8, %[b], %[b1]\n\t"
78                : [b1] "=y" (b1)
79                : [b] "y" (b));
80   /* a1 = _mm_shuffle_pi16(a,8);
81      b1 = _mm_shuffle_pi16(b,8); */
82   return simd_mergel_32(a1,b1);
83}
84#define simd_pack_64(a, b) \
85  simd_mergel_32_(a, b)
86#define simd_eq_8(a, b) _mm_cmpeq_pi8(a, b)
87#define simd_eq_16(a, b) _mm_cmpeq_pi16(a, b)
88#define simd_eq_32(a, b) _mm_cmpeq_pi32(a, b)
89
90/*  Full block operations */
91#define sisd_sll(r, shft) simd_sll_64(r, shft)
92#define sisd_srl(r, shft) simd_srl_64(r, shft)
93#define sisd_slli(r, shft) simd_slli_64(r, shft)
94#define sisd_srli(r, shft) simd_srli_64(r, shft)
95#define sisd_add(a, b) simd_add_64(a, b)
96#define sisd_sub(a, b) simd_sub_64(a, b)
97
98
99#define sisd_store_aligned(r, addr) *((SIMD_type *) (addr)) = r
100#define sisd_store_unaligned(r, addr) *((SIMD_type *) (addr)) = r
101#define sisd_load_aligned(addr) ((SIMD_type) *((SIMD_type *) (addr)))
102#define sisd_load_unaligned(addr) ((SIMD_type) *((SIMD_type *) (addr)))
103
104#define sisd_to_int(x) _mm_cvtsi64_si32(x)
105#define sisd_from_int(n) _mm_cvtsi32_si64(n)
106
107
108
109
110
111
112
113
114
115
116
117
118
119
Note: See TracBrowser for help on using the repository browser.