source: trunk/lib/lib_simd.h @ 90

Last change on this file since 90 was 90, checked in by cameron, 11 years ago

Temp. workaround for MSVC

File size: 4.5 KB
Line 
1/*  lib_simd_h:  SIMD Library including idealized SIMD operations
2    Copyright (C) 2008, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6
7    This file contains generic architecture-independent definitions,
8    importing architecture-specific implementations from appropriate
9    files.
10*/
11
12/*------------------------------------------------------------*/
13#ifndef LIB_SIMD_H
14#define LIB_SIMD_H
15#include <sys/types.h>
16#include <limits.h>
17
18#if (defined(__i386) || defined(__x86_64))
19#include "sse_simd.h"
20#endif
21#ifdef _ARCH_PPC
22#include "altivec_simd.h"
23#endif
24
25/* Useful definitions from Linux kernel*/
26#ifdef __GNUC__
27#define likely(x) __builtin_expect((x),1)
28#define unlikely(x) __builtin_expect((x),0)
29#endif
30#ifdef _MSC_VER
31#define likely(x) (x)
32#define unlikely(x) (x)
33#endif
34
35/* Shift forward and back operations, based on endianness */
36#if BYTE_ORDER == BIG_ENDIAN
37#define sisd_sfl(blk, n) sisd_srl(blk, n)
38#define sisd_sbl(blk, n) sisd_sll(blk, n)
39#define sisd_sfli(blk, n) sisd_srli(blk, n)
40#define sisd_sbli(blk, n) sisd_slli(blk, n)
41#define sb_op(x, n) ((x)<<(n))
42#define sf_op(x, n) ((x)>>(n))
43#define cfzl __builtin_clzl
44#endif
45#if BYTE_ORDER == LITTLE_ENDIAN
46#define sisd_sfl(blk, n) sisd_sll(blk, n)
47#define sisd_sbl(blk, n) sisd_srl(blk, n)
48#define sisd_sfli(blk, n) sisd_slli(blk, n)
49#define sisd_sbli(blk, n) sisd_srli(blk, n)
50#define sb_op(x, n) ((x)>>(n))
51#define sf_op(x, n) ((x)<<(n))
52#ifdef __GNUC__
53#define cfzl __builtin_ctzl
54#endif
55#ifdef _MSC_VER
56#include <intrin.h>
57#pragma intrinsic(_BitScanForward)
58//  precondition: x > 0
59static inline unsigned long cfzl(unsigned long x) {
60        unsigned long zeroes;
61        _BitScanForward(&zeroes, x);
62        return zeroes;
63}
64#endif
65#endif
66
67
68static inline int count_forward_zeroes(SIMD_type bits) {
69  union {SIMD_type vec; unsigned long elems[sizeof(SIMD_type)/sizeof(long)];} v;
70  v.vec = bits;
71  if (v.elems[0] != 0) return cfzl(v.elems[0]);
72  else if (v.elems[1] != 0) return LONG_BIT + cfzl(v.elems[1]);
73#ifdef _MSC_VER
74  else if (v.elems[2] != 0) return 2*LONG_BIT + cfzl(v.elems[2]);
75  else if (v.elems[3] != 0) return 3*LONG_BIT + cfzl(v.elems[3]);
76#endif
77#ifndef _MSC_VER
78#if LONG_BIT < 64
79  else if (v.elems[2] != 0) return 2*LONG_BIT + cfzl(v.elems[2]);
80  else if (v.elems[3] != 0) return 3*LONG_BIT + cfzl(v.elems[3]);
81#endif
82#endif
83  else return 8*sizeof(SIMD_type);
84}
85
86
87/* Scans for a 1 as long as it takes.  Use a sentinel to fence.
88   Works for either endianness.  */
89static inline int bitstream_scan(SIMD_type * stream, int bit_posn) {
90  unsigned long * bitstream_ptr = (unsigned long *) (((intptr_t) stream) + bit_posn/8);
91  unsigned long bitstream_slice = sb_op(*bitstream_ptr, bit_posn % 8);
92  if (bitstream_slice != 0) return bit_posn + cfzl(bitstream_slice);
93  else {
94    do {
95      bitstream_ptr++;
96      bitstream_slice = *bitstream_ptr;
97    } while (bitstream_slice == 0);
98    int base_posn = 8*((intptr_t) bitstream_ptr - (intptr_t) stream);
99    return base_posn + cfzl(bitstream_slice);
100  }
101}
102
103static inline int bitstream_scan0(SIMD_type * stream) {
104  unsigned long * bitstream_ptr = (unsigned long *) stream;
105  unsigned long bitstream_slice = *bitstream_ptr;
106  int base_posn = 0;
107  while (bitstream_slice == 0) {
108    bitstream_ptr++;
109    bitstream_slice = *bitstream_ptr;
110  }
111  base_posn = 8*((intptr_t) bitstream_ptr - (intptr_t) stream);
112  return base_posn + cfzl(bitstream_slice);
113}
114
115
116/* Allocator for arrays of aligned SIMD data values.
117   Ideally the new operator could be used to allocate arrays
118   of vector data aligned on the required boundaries
119   (16-byte for SSE or Altivec).  But since this alignment
120   is not guaranteed except on Mac OS X, the following routine
121   is used. */
122
123static inline SIMD_type * simd_new(size_t SIMD_packs) {
124#ifdef __APPLE__
125        return new SIMD_type [SIMD_packs];
126#endif
127#ifdef _MSC_VER
128        SIMD_type * v = (SIMD_type*)_aligned_malloc(sizeof(SIMD_type) * SIMD_packs, sizeof(SIMD_type));
129        if (v != 0) return v;
130        else {
131                printf("Failed to allocated new array of %i SIMD packs.\n", SIMD_packs);
132                exit(-1);
133        }
134#endif
135#if !defined(__APPLE__) && !defined(_MSC_VER)
136        SIMD_type * v;
137        int rslt = posix_memalign((void **) &v,
138                                  sizeof(SIMD_type),
139                                  sizeof(SIMD_type) * SIMD_packs);
140        if (rslt == 0) return v;
141        else {
142                printf("Failed to allocated new array of %i SIMD packs.\n", SIMD_packs);
143                exit(-1);
144        }
145#endif
146}
147
148static inline void simd_delete(SIMD_type * blk_ptr) {
149#ifdef __APPLE__
150  delete [] blk_ptr;
151#endif
152#ifndef __APPLE__
153  free((void *) blk_ptr);
154#endif
155}
156
157#endif
158
Note: See TracBrowser for help on using the repository browser.