source: proto/parabix2/compiled/block_carry.h @ 459

Last change on this file since 459 was 459, checked in by cameron, 9 years ago

carry_or defined, updates for MSVC

File size: 6.3 KB
Line 
1/*  Block Addition, Subtraction and Shifts with Carry
2    Copyright (C) 2010, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6
7This file defines addition, subtract and shift operations on
8128-bit blocks.   Different versions of the operations are
9selectable with the CARRY_STRATEGY preprocessor constant.
10
11Each implementation defines the following "abstract data type"
12for block operations with carry.
13
14Typename:   CarryType
15Constant:   Carry0  represents a value of 0 for the carry bit.
16Predicate:  test_carry(x) returns nonzero if a carry bit is 1, 0 otherwise.
17Function:   carry_or(carry1, carry2) forms the logical or of two carries.
18Function:   adc128(x, y, carry, sum) computes (carry, sum) = x + y + carry,
19Function:   advance_with_carry(cursor, carry, rslt)
20                 computes (carry, rslt) = cursor + cursor + carry
21Function:   sbb128(x, y, borrow, diff)
22                 computes (borrow, diff) = y - x - borrow
23
24*/
25#ifndef BLOCK_CARRY_H
26#define BLOCK_CARRY_H
27
28
29
30
31
32/*------------------------------------------------------------*/
33#include "sse_simd.h"
34
35#define ADC64 1
36#define SIMD_CARRY 2
37#define ADC64_SAHF 3
38
39#define CARRY_STRATEGY ADC64_SAHF
40
41#if (CARRY_STRATEGY == ADC64)
42typedef uint64_t CarryType;
43
44#define Carry0 0
45
46#define test_carry(x) ((x) > 0)
47
48#define carry_or(carry1, carry2) (carry1 | carry2)
49
50#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
51  __asm__  ("add %[carryflag], %[e1]\n\t" \
52        "adc %[e1], %[z1]\n\t" \
53        "adc %[e2], %[z2]\n\t" \
54        "mov $0, %[carryflag]\n\t" \
55        "adc $0, %[carryflag]\n\t" \
56        : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=r" (carry) \
57        : "[z1]" (x1), "[z2]" (x2), \
58           [e1] "r" (y1), [e2] "r" (y2), \
59           "[carryflag]" (carry) \
60        : "cc")
61
62#define adc128(first, second, carry, sum) \
63do\
64{\
65  union {__m128i bitblock;\
66         uint64_t int64[2];} rslt;\
67\
68  union {__m128i bitblock;\
69         uint64_t int64[2];} x;\
70\
71  union {__m128i bitblock;\
72         uint64_t int64[2];} y;\
73\
74  x.bitblock = first;\
75  y.bitblock = second;\
76\
77  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
78  sum = rslt.bitblock;\
79}while(0)
80
81
82
83#define advance_with_carry(cursor, carry, rslt)\
84    adc128(cursor, cursor, carry, rslt)
85
86
87#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
88  __asm__  ("sahf\n\t" \
89        "sbb %[e1], %[z1]\n\t" \
90        "sbb %[e2], %[z2]\n\t" \
91        "lahf\n\t" \
92     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
93         : "[z1]" (x1), "[z2]" (x2), \
94           [e1] "r" (y1), [e2] "r" (y2), \
95           "[carryflag]" (carry) \
96         : "cc")
97
98#define sbb128(first, second, borrow, diff) \
99do\
100{ union {__m128i bitblock;\
101         uint64_t int64[2];} rslt;\
102\
103  union {__m128i bitblock;\
104         uint64_t int64[2];} x;\
105\
106  union {__m128i bitblock;\
107         uint64_t int64[2];} y;\
108\
109  x.bitblock = first;\
110  y.bitblock = second;\
111\
112  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
113                   rslt.int64[0], rslt.int64[1], borrow);\
114  diff = rslt.bitblock;\
115}while(0)
116
117#endif
118
119#if (CARRY_STRATEGY == ADC64_SAHF)
120typedef uint64_t CarryType;
121
122#define Carry0 0
123
124#define test_carry(x) (((x)&256) > 0)
125
126#define carry_or(carry1, carry2) (carry1 | carry2)
127
128#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
129  __asm__  ("sahf\n\t" \
130        "adc %[e1], %[z1]\n\t" \
131        "adc %[e2], %[z2]\n\t" \
132        "lahf\n\t" \
133     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
134         : "[z1]" (x1), "[z2]" (x2), \
135           [e1] "r" (y1), [e2] "r" (y2), \
136           "[carryflag]" (carry) \
137         : "cc")
138
139#define adc128(first, second, carry, sum) \
140do\
141{\
142  union {__m128i bitblock;\
143         uint64_t int64[2];} rslt;\
144\
145  union {__m128i bitblock;\
146         uint64_t int64[2];} x;\
147\
148  union {__m128i bitblock;\
149         uint64_t int64[2];} y;\
150\
151  x.bitblock = first;\
152  y.bitblock = second;\
153\
154  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
155  sum = rslt.bitblock;\
156}while(0)
157
158
159
160#define advance_with_carry(cursor, carry, rslt)\
161    adc128(cursor, cursor, carry, rslt)
162
163
164#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
165  __asm__  ("sahf\n\t" \
166        "sbb %[e1], %[z1]\n\t" \
167        "sbb %[e2], %[z2]\n\t" \
168        "lahf\n\t" \
169     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
170         : "[z1]" (x1), "[z2]" (x2), \
171           [e1] "r" (y1), [e2] "r" (y2), \
172           "[carryflag]" (carry) \
173         : "cc")
174
175#define sbb128(first, second, borrow, diff) \
176do\
177{ union {__m128i bitblock;\
178         uint64_t int64[2];} rslt;\
179\
180  union {__m128i bitblock;\
181         uint64_t int64[2];} x;\
182\
183  union {__m128i bitblock;\
184         uint64_t int64[2];} y;\
185\
186  x.bitblock = first;\
187  y.bitblock = second;\
188\
189  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
190                   rslt.int64[0], rslt.int64[1], borrow);\
191  diff = rslt.bitblock;\
192}while(0)
193
194#endif
195
196
197
198#if (CARRY_STRATEGY == SIMD_CARRY)
199
200typedef SIMD_type CarryType;
201
202#define Carry0 simd_const_1(0)
203
204#define test_carry(x) bitblock_has_bit(x)
205
206#define carry_or(carry1, carry2) simd_or(carry1, carry2)
207
208#define adc128(x, y, carry,  sum) \
209do{ \
210  SIMD_type gen = simd_and(x, y); \
211  SIMD_type prop = simd_or(x, y); \
212  SIMD_type partial = simd_add_64(simd_add_64(x, y), carry); \
213  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
214  sum = simd_add_64(c1, partial); \
215  carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127); \
216} while(0)
217
218
219#define sbb128(x, y, borrow, difference) \
220do {\
221  SIMD_type gen = simd_andc(y, x); \
222  SIMD_type prop = simd_not(simd_xor(x, y)); \
223  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), borrow); \
224  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
225  difference = simd_sub_64(partial, b1); \
226  borrow = sisd_srli(simd_or(gen, simd_and(prop, difference)), 127); \
227}while(0)
228
229
230#define advance_with_carry(cursor, carry, rslt)\
231do{\
232  SIMD_type shift_out = simd_srli_64(cursor, 63);\
233  SIMD_type low_bits = simd_mergel_64(shift_out, carry);\
234  carry = sisd_srli(shift_out, 64);\
235  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
236}while(0)
237
238#endif
239#endif
240
241
Note: See TracBrowser for help on using the repository browser.