source: u8u16/trunk/lib/mmx_simd.h @ 5877

Last change on this file since 5877 was 5877, checked in by cameron, 14 months ago

Adding old u8u16 for Teradata

File size: 4.1 KB
Line 
1/* Copyright (C) 2007 Robert D. Cameron, Dan Lin
2   Licensed to International Characters Inc. and Simon Fraser University
3              under the Academic Free License version 3.0.
4   Licensed to the public under the Open Software License version 3.0.
5*/
6#ifndef _MSC_VER
7#include <stdint.h>
8#endif
9#ifdef _MSC_VER
10#include "stdint.h"
11#define LITTLE_ENDIAN 1234
12#define BIG_ENDIAN 4321
13#define BYTE_ORDER LITTLE_ENDIAN
14#endif
15#include <limits.h>
16#ifndef LONG_BIT
17#define LONG_BIT (8* sizeof(unsigned long))
18#endif
19#include "mmx_simd_built_in.h"
20#include "mmx_simd_basic.h"
21#include "mmx_simd_modified.h"
22
23/* mmintrin.h does not provide access to pmaxub;
24   xmmintrin.h does via _mm_max_pu8(a, b), but also
25   requires SSE. */
26
27static inline SIMD_type simd_max_8(SIMD_type a, SIMD_type b) {
28  asm volatile(
29    "pmaxub %[rb], %[ra]\n\t"
30    : [ra] "+y" (a)
31    : [rb] "y" (b));
32  return a;
33}
34
35char mask_x55 [8] __attribute__ ((aligned(8))) = 
36    {0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55};
37char mask_x33 [8] __attribute__ ((aligned(8))) = 
38    {0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33};
39char mask_x0F [8] __attribute__ ((aligned(8))) = 
40    {0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F};
41
42static inline int simd_all_true_8(SIMD_type maskvector) {
43/*  return _mm_movemask_epi8(maskvector) == 0xFF;  */
44  int maskbyte;
45  asm volatile("pmovmskb %[maskvector], %[maskbyte]\n"
46               : [maskbyte] "=r" (maskbyte)
47               : [maskvector] "y" (maskvector));
48  return maskbyte == 0xFF; 
49}
50static inline int simd_any_true_8(SIMD_type maskvector) {
51/*  return _mm_movemask_epi8(maskvector) =!= 0;  */
52  int maskbyte;
53  asm volatile("pmovmskb %[maskvector], %[maskbyte]\n"
54               : [maskbyte] "=r" (maskbyte)
55               : [maskvector] "y" (maskvector));
56  return maskbyte != 0; 
57}
58
59static inline int simd_any_sign_bit_8(SIMD_type v) {
60/*  return _mm_movemask_epi8(v) =!= 0;  */
61  int signbyte;
62  asm volatile("pmovmskb %[v], %[signbyte]\n"
63               : [signbyte] "=r" (signbyte)
64               : [v] "y" (v));
65  return signbyte != 0; 
66}
67
68#define simd_all_eq_8(v1, v2) simd_all_true_8(_mm_cmpeq_pi8(v1, v2))
69#define simd_all_le_8(v1, v2) \
70  simd_all_eq_8(simd_max_8(v1, v2), v2)
71
72#define simd_all_signed_gt_8(v1, v2) simd_all_true_8(_mm_cmpgt_pi8(v1, v2))
73
74static inline int bitblock_has_bit(SIMD_type v) {
75  return !simd_all_true_8(simd_eq_8(v, simd_const_8(0)));
76}
77
78
79#define bitblock_test_bit(blk, n) \
80  sisd_to_int(sisd_srli(sisd_slli(blk, (BLOCKSIZE - 1) - (n)), BLOCKSIZE-1))
81
82/*#define bitblock_test_bit(blk, n) \
83((n) == 63 ? sisd_to_int(sisd_srli(blk, n)) : \
84(sisd_to_int(sisd_srli(blk, n)) & 1)) */
85
86
87void print_bit_block(char * var_name, SIMD_type v) {
88  union {SIMD_type vec; unsigned char elems[8];} x;
89  x.vec = v;
90  unsigned char c, bit_reversed;
91  int i;
92  printf("%20s = ", var_name);
93  for (i = 0; i < sizeof(SIMD_type); i++) {
94    c = x.elems[i];
95     printf("%02X ", c); 
96  }
97  printf("\n");
98}
99
100static inline int bitblock_bit_count(SIMD_type v) {
101
102  SIMD_type cts_2 = simd_add_2_lh(v,v);
103  SIMD_type cts_4 = simd_add_4_lh(cts_2,cts_2);
104  SIMD_type cts_8 = simd_add_8_lh(cts_4,cts_4);
105  SIMD_type r = simd_const_8(0);
106
107  asm volatile("psadbw %[r_reg], %[cts8_reg]\n\t"
108               : [cts8_reg] "+y" (cts_8)
109               : [r_reg] "y" (r));     
110  return sisd_to_int(cts_8);
111}
112
113
114
115static inline int count_forward_zeroes(SIMD_type bits) {
116  union {SIMD_type vec; unsigned int elems[2];} v;
117  v.vec = bits;
118  if (v.elems[0] != 0) return __builtin_ctzl(v.elems[0]);
119  else if (v.elems[1] != 0) return 32 + __builtin_ctzl(v.elems[1]);
120  else return 64;
121}
122
123
124/* Scans for a 1 as long as it takes.  Use a sentinel to fence. */
125static inline int bitstream_scan(SIMD_type * stream, int bit_posn) {
126  unsigned int * bitstream_ptr = (unsigned int *) (((intptr_t) stream) + bit_posn/8);
127  unsigned int bitstream_slice = *bitstream_ptr & (-1 << bit_posn % 8);
128  unsigned int slice_scan;
129  if (bitstream_slice == 0) {
130    do {
131      bitstream_ptr++;
132      bitstream_slice = *bitstream_ptr;
133    } while (bitstream_slice == 0);
134  }
135  slice_scan = __builtin_ctz(bitstream_slice);
136  return 8*((intptr_t) bitstream_ptr - (intptr_t) stream) + slice_scan;
137}
138
139
140
141
Note: See TracBrowser for help on using the repository browser.