source: trunk/lib/lib_simd.h @ 403

Last change on this file since 403 was 403, checked in by ksherdy, 9 years ago

Add debug print function to the display register view of a value.

File size: 6.7 KB
Line 
1/*  lib_simd_h:  SIMD Library including idealized SIMD operations
2    Copyright (C) 2008, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6
7    This file contains generic architecture-independent definitions,
8    importing architecture-specific implementations from appropriate
9    files.
10*/
11
12/*------------------------------------------------------------*/
13#ifndef LIB_SIMD_H
14#define LIB_SIMD_H
15#include <sys/types.h>
16#include <limits.h>
17
18#if (defined(__i386) || defined(__x86_64))
19#ifdef TEMPLATED_SIMD_LIB
20#include "sse_simd_t.h"
21#endif
22#ifndef TEMPLATED_SIMD_LIB
23#include "sse_simd.h"
24#endif
25#endif
26#ifdef _ARCH_PPC
27#include "altivec_simd.h"
28#endif
29
30/* Useful definitions from Linux kernel*/
31#ifdef __GNUC__
32/*
33#define likely(x) __builtin_expect((x),1)
34#define unlikely(x) __builtin_expect((x),0)
35*/
36static inline long likely(long x) {
37        return __builtin_expect(x, 1);
38}
39static inline long unlikely(long x) {
40        return __builtin_expect(x, 0);
41}
42
43#endif
44#ifdef _MSC_VER
45#define likely(x) (x)
46#define unlikely(x) (x)
47#endif
48
49/* Shift forward and back operations, based on endianness */
50#if BYTE_ORDER == BIG_ENDIAN
51#define sisd_sfl(blk, n) sisd_srl(blk, n)
52#define sisd_sbl(blk, n) sisd_sll(blk, n)
53#define sisd_sfli(blk, n) sisd_srli(blk, n)
54#define sisd_sbli(blk, n) sisd_slli(blk, n)
55#define sb_op(x, n) ((x)<<(n))
56#define sf_op(x, n) ((x)>>(n))
57#define cfzl __builtin_clzl
58#endif
59#if BYTE_ORDER == LITTLE_ENDIAN
60#ifdef TEMPLATED_SIMD_LIB
61static inline SIMD_type sisd_sfl(SIMD_type blk, SIMD_type n) {
62        return simd<128>::sll(blk, n);
63}
64
65static inline SIMD_type sisd_sbl(SIMD_type blk, SIMD_type n) {
66        return simd<128>::srl(blk, n);
67}
68#define sisd_sfli(blk, n) simd<128>::slli<n>(blk)
69#define sisd_sbli(blk, n) simd<128>::srli<n>(blk)
70#endif
71#ifndef TEMPLATED_SIMD_LIB
72static inline SIMD_type sisd_sfl(SIMD_type blk, SIMD_type n) {
73        return sisd_sll(blk, n);
74}
75static inline SIMD_type sisd_sbl(SIMD_type blk, SIMD_type n) {
76        return sisd_srl(blk, n);
77}
78#define sisd_sfli(blk, n) sisd_slli(blk, n)
79#define sisd_sbli(blk, n) sisd_srli(blk, n)
80#endif
81#define sb_op(x, n) ((x)>>(n))
82#define sf_op(x, n) ((x)<<(n))
83#ifdef __GNUC__
84#define cfzl __builtin_ctzl
85#endif
86#ifdef _MSC_VER
87#include <intrin.h>
88#pragma intrinsic(_BitScanForward)
89//  precondition: x > 0
90static inline unsigned long cfzl(unsigned long x) {
91        unsigned long zeroes;
92        _BitScanForward(&zeroes, x);
93        return zeroes;
94}
95#endif
96#endif
97
98
99static inline int count_forward_zeroes(SIMD_type bits) {
100  union {SIMD_type vec; unsigned long elems[sizeof(SIMD_type)/sizeof(long)];} v;
101  v.vec = bits;
102  if (v.elems[0] != 0) return cfzl(v.elems[0]);
103  else if (v.elems[1] != 0) return LONG_BIT + cfzl(v.elems[1]);
104#ifdef _MSC_VER
105  else if (v.elems[2] != 0) return 2*LONG_BIT + cfzl(v.elems[2]);
106  else if (v.elems[3] != 0) return 3*LONG_BIT + cfzl(v.elems[3]);
107#endif
108#ifndef _MSC_VER
109#if LONG_BIT < 64
110  else if (v.elems[2] != 0) return 2*LONG_BIT + cfzl(v.elems[2]);
111  else if (v.elems[3] != 0) return 3*LONG_BIT + cfzl(v.elems[3]);
112#endif
113#endif
114  else return 8*sizeof(SIMD_type);
115}
116
117static inline unsigned long bitstream_segment_from(SIMD_type * stream, int bit_posn) {
118  unsigned long * bitstream_ptr = (unsigned long *) (((intptr_t) stream) + bit_posn/8);
119  return sb_op(*bitstream_ptr, bit_posn % 8);
120}
121
122/* Scans for a 1 as long as it takes.  Use a sentinel to fence.
123   Works for either endianness.  */
124static inline int bitstream_scan(SIMD_type * stream, int bit_posn) {
125  unsigned long * bitstream_ptr = (unsigned long *) (((intptr_t) stream) + bit_posn/8);
126  unsigned long bitstream_slice = sb_op(*bitstream_ptr, bit_posn % 8);
127  if (bitstream_slice != 0) return bit_posn + cfzl(bitstream_slice);
128  else {
129    do {
130      bitstream_ptr++;
131      bitstream_slice = *bitstream_ptr;
132    } while (bitstream_slice == 0);
133    int base_posn = 8*((intptr_t) bitstream_ptr - (intptr_t) stream);
134    return base_posn + cfzl(bitstream_slice);
135  }
136}
137
138static inline int bitstream_scan0(SIMD_type * stream) {
139  unsigned long * bitstream_ptr = (unsigned long *) stream;
140  unsigned long bitstream_slice = *bitstream_ptr;
141  int base_posn = 0;
142  while (bitstream_slice == 0) {
143    bitstream_ptr++;
144    bitstream_slice = *bitstream_ptr;
145  }
146  base_posn = 8*((intptr_t) bitstream_ptr - (intptr_t) stream);
147  return base_posn + cfzl(bitstream_slice);
148}
149
150
151/* Allocator for arrays of aligned SIMD data values.
152   Ideally the new operator could be used to allocate arrays
153   of vector data aligned on the required boundaries
154   (16-byte for SSE or Altivec).  But since this alignment
155   is not guaranteed except on Mac OS X, the following routine
156   is used. */
157
158static inline SIMD_type * simd_new(size_t SIMD_packs) {
159#ifdef __APPLE__
160        return new SIMD_type [SIMD_packs];
161#endif
162#ifdef _MSC_VER
163        SIMD_type * v = (SIMD_type*)_aligned_malloc(sizeof(SIMD_type) * SIMD_packs, sizeof(SIMD_type));
164        if (v != 0) return v;
165        else {
166                printf("Failed to allocated new array of %i SIMD packs.\n", SIMD_packs);
167                exit(-1);
168        }
169#endif
170#if !defined(__APPLE__) && !defined(_MSC_VER)
171        SIMD_type * v;
172        int rslt = posix_memalign((void **) &v,
173                                  sizeof(SIMD_type),
174                                  sizeof(SIMD_type) * SIMD_packs);
175        if (rslt == 0) return v;
176        else {
177                printf("Failed to allocated new array of %zu SIMD packs.\n", SIMD_packs);
178                exit(-1);
179        }
180#endif
181}
182
183static inline void simd_delete(SIMD_type * blk_ptr) {
184#ifdef __APPLE__
185  delete [] blk_ptr;
186#endif
187#ifndef __APPLE__
188  free((void *) blk_ptr);
189#endif
190}
191
192void print_bit_block(char * var_name, SIMD_type v) {
193  union {SIMD_type vec; unsigned char elems[8];} x;
194  x.vec = v;
195  unsigned char c, bit_reversed;
196  int i;
197  printf("%20s = ", var_name);
198  for (i = 0; i < sizeof(SIMD_type); i++) {
199    c = x.elems[i];
200     printf("%02X ", c); 
201  }
202  printf("\n");
203}
204
205/* Prints the register representation of a 32 bit value. */
206static void print_general_register_32(const char * var_name, uint32_t v) {
207        unsigned char c;
208        int i;
209        printf("%30s = ", var_name);
210        for(int i=sizeof(uint32_t)-1; i>=0; i--) {
211                c = *(((unsigned char *)&v)+i);
212                printf("%02X ", c); 
213        }
214        printf("\n");
215}
216
217/* Prints the register representation of a 64 bit value. */
218static void print_general_register_64(const char * var_name, uint32_t v) {
219        unsigned char c;
220        int i;
221        printf("%30s = ", var_name);
222        for(int i=sizeof(uint32_t)-1; i>=0; i--) {
223                c = *(((unsigned char *)&v)+i);
224                printf("%02X ", c); 
225        }
226        printf("\n");
227}
228
229/* Prints the SIMD register representation of a SIMD value. */
230static void print_simd_register(const char * var_name, SIMD_type v) {
231  union {SIMD_type vec; unsigned char elems[8];} x;
232  x.vec = v;
233  unsigned char c, bit_reversed;
234  int i;
235  printf("%30s = ", var_name);
236  for(int i=sizeof(SIMD_type)-1; i>=0; i--) {
237    c = x.elems[i];
238    printf("%02X ", c); 
239  }
240  printf("\n");
241}
242
243#endif
244
Note: See TracBrowser for help on using the repository browser.