source: trunk/lib/lib_simd.h @ 890

Last change on this file since 890 was 742, checked in by ksherdy, 9 years ago

Add utility method to print array values, little-endian, right-to-left.

File size: 7.9 KB
Line 
1/*  lib_simd_h:  SIMD Library including idealized SIMD operations
2    Copyright (C) 2008, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6
7    This file contains generic architecture-independent definitions,
8    importing architecture-specific implementations from appropriate
9    files.
10*/
11
12/*------------------------------------------------------------*/
13#ifndef LIB_SIMD_H
14#define LIB_SIMD_H
15#include <sys/types.h>
16#include <limits.h>
17
18#ifndef LONG_BIT
19#if ULONG_MAX == 0xFFFFFFFF
20#define LONG_BIT 32
21#endif
22#if ULONG_MAX == 0xFFFFFFFFFFFFFFFF
23#define LONG_BIT 64
24#endif
25#endif
26
27#if (defined(__i386) || defined(__x86_64))
28#ifdef TEMPLATED_SIMD_LIB
29#include "sse_simd_t.h"
30#endif
31#ifndef TEMPLATED_SIMD_LIB
32#include "sse_simd.h"
33#endif
34#endif
35#ifdef _ARCH_PPC
36#include "altivec_simd.h"
37#endif
38
39/* Useful definitions from Linux kernel*/
40#ifdef __GNUC__
41/*
42#define likely(x) __builtin_expect((x),1)
43#define unlikely(x) __builtin_expect((x),0)
44*/
45static inline long likely(long x) {
46        return __builtin_expect(x, 1);
47}
48static inline long unlikely(long x) {
49        return __builtin_expect(x, 0);
50}
51
52#endif
53#ifdef _MSC_VER
54#define inline __inline
55#include "lib/sse_simd.h"
56#define likely(x) (x)
57#define unlikely(x) (x)
58#endif
59
60#ifdef TEMPLATED_SIMD_LIB
61static inline SIMD_type sisd_sll(SIMD_type blk, SIMD_type n) {
62        return simd<128>::sll(blk, n);
63}
64static inline SIMD_type sisd_srl(SIMD_type blk, SIMD_type n) {
65        return simd<128>::srl(blk, n);
66}
67#define sisd_slli(blk, n) simd<128>::slli<n>(blk)
68#define sisd_srli(blk, n) simd<128>::srli<n>(blk)
69#endif
70
71
72/* Shift forward and back operations, based on endianness */
73#if BYTE_ORDER == BIG_ENDIAN
74#define sisd_sfl(blk, n) sisd_srl(blk, n)
75#define sisd_sbl(blk, n) sisd_sll(blk, n)
76#define sisd_sfli(blk, n) sisd_srli(blk, n)
77#define sisd_sbli(blk, n) sisd_slli(blk, n)
78#define sb_op(x, n) ((x)<<(n))
79#define sf_op(x, n) ((x)>>(n))
80#define cfzl __builtin_clzl
81#define cbzl __builtin_ctzl
82#endif
83
84#if BYTE_ORDER == LITTLE_ENDIAN
85#define sisd_sfl(blk, n) sisd_sll(blk, n)
86#define sisd_sbl(blk, n) sisd_srl(blk, n)
87#define sisd_sfli(blk, n) sisd_slli(blk, n)
88#define sisd_sbli(blk, n) sisd_srli(blk, n)
89#define sb_op(x, n) ((x)>>(n))
90#define sf_op(x, n) ((x)<<(n))
91#ifdef __GNUC__
92#define cfzl __builtin_ctzl
93#define cbzl __builtin_clzl
94#endif
95#ifdef _MSC_VER
96#include <intrin.h>
97#pragma intrinsic(_BitScanForward)
98//  precondition: x > 0
99static inline unsigned long cfzl(unsigned long x) {
100        unsigned long zeroes;
101        _BitScanForward(&zeroes, x);
102        return zeroes;
103}
104static inline unsigned long cbzl(unsigned long x) {
105        unsigned long zeroes;
106        _BitScanReverse(&zeroes, x);
107        return zeroes;
108}
109#endif
110#endif
111
112
113static inline int count_forward_zeroes(SIMD_type bits) {
114  union {SIMD_type vec; unsigned long elems[sizeof(SIMD_type)/sizeof(long)];} v;
115  v.vec = bits;
116  if (v.elems[0] != 0) return cfzl(v.elems[0]);
117  else if (v.elems[1] != 0) return LONG_BIT + cfzl(v.elems[1]);
118#ifdef _MSC_VER
119  else if (v.elems[2] != 0) return 2*LONG_BIT + cfzl(v.elems[2]);
120  else if (v.elems[3] != 0) return 3*LONG_BIT + cfzl(v.elems[3]);
121#endif
122#ifndef _MSC_VER
123#if LONG_BIT < 64
124  else if (v.elems[2] != 0) return 2*LONG_BIT + cfzl(v.elems[2]);
125  else if (v.elems[3] != 0) return 3*LONG_BIT + cfzl(v.elems[3]);
126#endif
127#endif
128  else return 8*sizeof(SIMD_type);
129}
130
131static inline int count_backward_zeroes(SIMD_type bits) {
132  union {SIMD_type vec; unsigned long elems[sizeof(SIMD_type)/sizeof(long)];} v;
133  v.vec = bits;
134#if LONG_BIT == 64
135  if (v.elems[1] != 0) return cbzl(v.elems[1]);
136  else if (v.elems[0] != 0) return LONG_BIT + cbzl(v.elems[0]);
137#endif
138#if LONG_BIT < 64
139  if (v.elems[3] != 0) return cbzl(v.elems[3]);
140  else if (v.elems[2] != 0) return LONG_BIT + cbzl(v.elems[2]);
141  else if (v.elems[1] != 0) return 2*LONG_BIT + cbzl(v.elems[1]);
142  else if (v.elems[0] != 0) return 3*LONG_BIT + cbzl(v.elems[0]);
143#endif
144  else return 8*sizeof(SIMD_type);
145}
146
147static inline unsigned long bitstream_segment_from(SIMD_type * stream, int bit_posn) {
148  unsigned long * bitstream_ptr = (unsigned long *) (((intptr_t) stream) + bit_posn/8);
149  return sb_op(*bitstream_ptr, bit_posn % 8);
150}
151
152/* Scans for a 1 as long as it takes.  Use a sentinel to fence.
153   Works for either endianness.  */
154static inline int bitstream_scan(SIMD_type * stream, int bit_posn) {
155  unsigned long * bitstream_ptr = (unsigned long *) (((intptr_t) stream) + bit_posn/8);
156  unsigned long bitstream_slice = sb_op(*bitstream_ptr, bit_posn % 8);
157  if (bitstream_slice != 0) return bit_posn + cfzl(bitstream_slice);
158  else {
159    do {
160      bitstream_ptr++;
161      bitstream_slice = *bitstream_ptr;
162    } while (bitstream_slice == 0);
163    int base_posn = 8*((intptr_t) bitstream_ptr - (intptr_t) stream);
164    return base_posn + cfzl(bitstream_slice);
165  }
166}
167
168static inline int bitstream_scan0(SIMD_type * stream) {
169  unsigned long * bitstream_ptr = (unsigned long *) stream;
170  unsigned long bitstream_slice = *bitstream_ptr;
171  int base_posn = 0;
172  while (bitstream_slice == 0) {
173    bitstream_ptr++;
174    bitstream_slice = *bitstream_ptr;
175  }
176  base_posn = 8*((intptr_t) bitstream_ptr - (intptr_t) stream);
177  return base_posn + cfzl(bitstream_slice);
178}
179
180
181/* Allocator for arrays of aligned SIMD data values.
182   Ideally the new operator could be used to allocate arrays
183   of vector data aligned on the required boundaries
184   (16-byte for SSE or Altivec).  But since this alignment
185   is not guaranteed except on Mac OS X, the following routine
186   is used. */
187
188static inline SIMD_type * simd_new(size_t SIMD_packs) {
189#ifdef __APPLE__
190        return new SIMD_type [SIMD_packs];
191#endif
192#ifdef _MSC_VER
193        SIMD_type * v = (SIMD_type*)_aligned_malloc(sizeof(SIMD_type) * SIMD_packs, sizeof(SIMD_type));
194        if (v != 0) return v;
195        else {
196                printf("Failed to allocated new array of %i SIMD packs.\n", SIMD_packs);
197                exit(-1);
198        }
199#endif
200#if !defined(__APPLE__) && !defined(_MSC_VER)
201        SIMD_type * v;
202        int rslt = posix_memalign((void **) &v,
203                                  sizeof(SIMD_type),
204                                  sizeof(SIMD_type) * SIMD_packs);
205        if (rslt == 0) return v;
206        else {
207                printf("Failed to allocated new array of %zu SIMD packs.\n", SIMD_packs);
208                exit(-1);
209        }
210#endif
211}
212
213static inline void simd_delete(SIMD_type * blk_ptr) {
214#ifdef __APPLE__
215  delete [] blk_ptr;
216#endif
217#ifndef __APPLE__
218  free((void *) blk_ptr);
219#endif
220}
221
222static void print_bit_block(char * var_name, SIMD_type v) {
223  union {SIMD_type vec; unsigned char elems[8];} x;
224  x.vec = v;
225  unsigned char c;
226  int i;
227  printf("%20s = ", var_name);
228  for (i = 0; i < sizeof(SIMD_type); i++) {
229    c = x.elems[i];
230     printf("%02X ", c); 
231  }
232  printf("\n");
233}
234
235/* Prints the register representation of a 32 bit value. */
236static void print_general_register_32(const char * var_name, uint32_t v) {
237        unsigned char c;
238        printf("%30s = ", var_name);
239        for(int i=sizeof(uint32_t)-1; i>=0; i--) {
240                c = *(((unsigned char *)&v)+i);
241                printf("%02X ", c); 
242        }
243        printf("\n");
244}
245
246/* Prints the register representation of a 64 bit value. */
247static void print_general_register_64(const char * var_name, uint64_t v) {
248        unsigned char c;
249        printf("%30s = ", var_name);
250        for(int i=sizeof(uint64_t)-1; i>=0; i--) {
251                c = *(((unsigned char *)&v)+i);
252                printf("%02X ", c); 
253        }
254        printf("\n");
255}
256
257/* Prints the SIMD register representation of a SIMD value. */
258static void print_simd_register(const char * var_name, SIMD_type v) {
259  union {SIMD_type vec; unsigned char elems[8];} x;
260  x.vec = v;
261  unsigned char c;
262  printf("%30s = ", var_name);
263  for(int i=sizeof(SIMD_type)-1; i>=0; i--) {
264    c = x.elems[i];
265    printf("%02X ", c); 
266  }
267  printf("\n");
268}
269
270/* Prints the array values little endian, right to left. */ 
271static void print_array_le(const char * var_name, char * buf, int size) {
272  unsigned char c;
273  printf("%30s = ", var_name);
274  for(int i=size-1; i>=0; i--) {
275    c = buf[i];
276    if(c == 0) {
277      printf("0");      // print zero for UTF code point 0
278    } else {
279      printf("%c", c); 
280    }
281    if(i%8 == 0) {
282      printf(" ");      // print a single space separator
283    }
284  }
285  printf("\n");
286}
287
288#endif
289
Note: See TracBrowser for help on using the repository browser.