source: proto/parabix2/compiled/block_carry.h @ 452

Last change on this file since 452 was 452, checked in by cameron, 9 years ago

Fix typedef

File size: 3.6 KB
Line 
1/*  Block Addition, Subtraction and Shifts with Carry
2    Copyright (C) 2010, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6*/
7#ifndef BLOCK_CARRY_H
8#define BLOCK_CARRY_H
9
10/*------------------------------------------------------------*/
11#include "sse_simd.h"
12
13#define ADC_64 1
14#define SIMD_CARRY 2
15#define CARRY_STRATEGY ADC_64
16
17#if (CARRY_STRATEGY == ADC_64)
18typedef uint64_t CarryType;
19
20#define Carry0 = 0
21
22#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
23  __asm__  ("sahf\n\t" \
24        "adc %[e1], %[z1]\n\t" \
25        "adc %[e2], %[z2]\n\t" \
26        "lahf\n\t" \
27     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
28         : "[z1]" (x1), "[z2]" (x2), \
29           [e1] "r" (y1), [e2] "r" (y2), \
30           "[carryflag]" (carry) \
31         : "cc")
32
33#define adc128(first, second, carry, sum) \
34do\
35{\
36  union {__m128i bitblock;\
37         uint64_t int64[2];} rslt;\
38\
39  union {__m128i bitblock;\
40         uint64_t int64[2];} x;\
41\
42  union {__m128i bitblock;\
43         uint64_t int64[2];} y;\
44\
45  x.bitblock = first;\
46  y.bitblock = second;\
47\
48  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
49  sum = rslt.bitblock;\
50}while(0)
51
52#define advance_with_carry(cursor, carry, rslt)\
53do{\
54union {__m128i bitblock;\
55uint64_t int64[2];} z;\
56\
57union {__m128i bitblock;\
58uint64_t int64[2];} x;\
59\
60x.bitblock = cursor;\
61\
62double_int64_adc(x.int64[0], x.int64[1], x.int64[0], x.int64[1], z.int64[0], z.int64[1], carry);\
63rslt = z.bitblock;\
64}while(0)
65
66#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
67  __asm__  ("sahf\n\t" \
68        "sbb %[e1], %[z1]\n\t" \
69        "sbb %[e2], %[z2]\n\t" \
70        "lahf\n\t" \
71     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
72         : "[z1]" (x1), "[z2]" (x2), \
73           [e1] "r" (y1), [e2] "r" (y2), \
74           "[carryflag]" (carry) \
75         : "cc")
76
77#define sbb128(first, second, carry, sum) \
78do\
79{ union {__m128i bitblock;\
80         uint64_t int64[2];} rslt;\
81\
82  union {__m128i bitblock;\
83         uint64_t int64[2];} x;\
84\
85  union {__m128i bitblock;\
86         uint64_t int64[2];} y;\
87\
88  x.bitblock = first;\
89  y.bitblock = second;\
90\
91  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
92                   rslt.int64[0], rslt.int64[1], carry);\
93  sum = rslt.bitblock;\
94}while(0)
95
96#endif
97
98#if (CARRY_STRATEGY == SIMD_CARRY)
99
100#typedef SIMD_type CarryType;
101
102#define Carry0 simd_const_1(0)
103
104#define adc128(x, y, carry,  sum) \
105do{ \
106  SIMD_type gen = simd_and(x, y); \
107  SIMD_type prop = simd_or(x, y); \
108  SIMD_type partial = simd_add_64(simd_add_64(x, y), carry); \
109  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
110  sum = simd_add_64(c1, partial); \
111  carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127); \
112} while(0)
113
114
115#define sbb128(x, y, borrow, difference) \
116do {\
117  SIMD_type gen = simd_andc(y, x); \
118  SIMD_type prop = simd_not(simd_xor(x, y)); \
119  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), borrow); \
120  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
121  difference = simd_sub_64(partial, b1); \
122  borrow = sisd_srli(simd_or(gen, simd_and(prop, difference)), 127); \
123}while(0)
124
125
126#define advance_with_carry(cursor, carry, rslt)\
127do{\
128  SIMD_type shift_out = simd_srli_64(cursor, 63);\
129  SIMD_type low_bits = simd_mergel_64(shift_out, carry);\
130  carry = sisd_srli(shift_out, 64);\
131  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
132}while(0)
133
134#endif
135#endif
136
137
Note: See TracBrowser for help on using the repository browser.