source: trunk/lib/block_carry.h @ 531

Last change on this file since 531 was 473, checked in by ksherdy, 9 years ago

Fixed int64_adc for llvm-g++.

File size: 7.1 KB
Line 
1/*  Block Addition, Subtraction and Shifts with Carry
2    Copyright (C) 2010, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6
7This file defines addition, subtract and shift operations on
8128-bit blocks.   Different versions of the operations are
9selectable with the CARRY_STRATEGY preprocessor constant.
10
11Each implementation defines the following "abstract data type"
12for block operations with carry.
13
14Typename:   CarryType
15Constant:   Carry0  represents a value of 0 for the carry bit.
16Predicate:  test_carry(x) returns nonzero if a carry bit is 1, 0 otherwise.
17Function:   carry_or(carry1, carry2) forms the logical or of two carries.
18Function:   adc128(x, y, carry, sum) computes (carry, sum) = x + y + carry,
19Function:   advance_with_carry(cursor, carry, rslt)
20                 computes (carry, rslt) = cursor + cursor + carry
21Function:   sbb128(x, y, borrow, diff)
22                 computes (borrow, diff) = y - x - borrow
23
24*/
25#ifndef BLOCK_CARRY_H
26#define BLOCK_CARRY_H
27
28
29
30
31
32/*------------------------------------------------------------*/
33#include "sse_simd.h"
34
35#define SIMD_CARRY_STRATEGY 1
36#define ADC64_STRATEGY 2
37#define ADC64_SAHF_STRATEGY 3
38
39#ifdef ADC64
40#ifdef SAHFLAHF
41#define CARRY_STRATEGY ADC64_SAHF_STRATEGY
42#else
43#define CARRY_STRATEGY ADC64_STRATEGY
44#endif
45#else
46#define CARRY_STRATEGY SIMD_CARRY_STRATEGY
47#endif
48
49#if (CARRY_STRATEGY == ADC64_STRATEGY)
50typedef uint64_t CarryType;
51
52#define Carry0 0
53
54#define test_carry(x) ((x) > 0)
55
56#define carry_or(carry1, carry2) (carry1 | carry2)
57
58#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
59   __asm__ __volatile__ ("add %[carryin], %[z1]\n\t" \
60         "adc %[e1], %[z1]\n\t" \
61         "adc %[e2], %[z2]\n\t" \
62         "mov $0, %[carryout]\n\t" \
63         "adc $0, %[carryout]\n\t" \
64         : [z1] "=a" (rslt1), [z2] "=r" (rslt2), [carryout] "=r" (carry) \
65         : "0" (y1), "1" (y2), [carryin] "2" (carry), \
66                   [e1] "b" (x1), [e2] "r" (x2)\
67         : "cc")
68
69#define adc128(first, second, carry, sum) \
70do\
71{\
72  union {__m128i bitblock;\
73         uint64_t int64[2];} rslt;\
74\
75  union {__m128i bitblock;\
76         uint64_t int64[2];} x;\
77\
78  union {__m128i bitblock;\
79         uint64_t int64[2];} y;\
80\
81  x.bitblock = first;\
82  y.bitblock = second;\
83\
84  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
85  sum = rslt.bitblock;\
86}while(0)
87
88
89
90#define advance_with_carry(cursor, carry, rslt)\
91    adc128(cursor, cursor, carry, rslt)
92
93
94#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
95  __asm__  ("sahf\n\t" \
96        "sbb %[e1], %[z1]\n\t" \
97        "sbb %[e2], %[z2]\n\t" \
98        "lahf\n\t" \
99     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
100         : "[z1]" (x1), "[z2]" (x2), \
101           [e1] "r" (y1), [e2] "r" (y2), \
102           "[carryflag]" (carry) \
103         : "cc")
104
105#define sbb128(first, second, borrow, diff) \
106do\
107{ union {__m128i bitblock;\
108         uint64_t int64[2];} rslt;\
109\
110  union {__m128i bitblock;\
111         uint64_t int64[2];} x;\
112\
113  union {__m128i bitblock;\
114         uint64_t int64[2];} y;\
115\
116  x.bitblock = first;\
117  y.bitblock = second;\
118\
119  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
120                   rslt.int64[0], rslt.int64[1], borrow);\
121  diff = rslt.bitblock;\
122}while(0)
123
124#endif
125
126#if (CARRY_STRATEGY == ADC64_SAHF_STRATEGY)
127typedef uint64_t CarryType;
128
129#define Carry0 0
130
131#define test_carry(x) (((x)&256) > 0)
132
133#define carry_or(carry1, carry2) (carry1 | carry2)
134
135#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
136  __asm__  ("sahf\n\t" \
137        "adc %[e1], %[z1]\n\t" \
138        "adc %[e2], %[z2]\n\t" \
139        "lahf\n\t" \
140     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
141         : "[z1]" (x1), "[z2]" (x2), \
142           [e1] "r" (y1), [e2] "r" (y2), \
143           "[carryflag]" (carry) \
144         : "cc")
145
146#define adc128(first, second, carry, sum) \
147do\
148{\
149  union {__m128i bitblock;\
150         uint64_t int64[2];} rslt;\
151\
152  union {__m128i bitblock;\
153         uint64_t int64[2];} x;\
154\
155  union {__m128i bitblock;\
156         uint64_t int64[2];} y;\
157\
158  x.bitblock = first;\
159  y.bitblock = second;\
160\
161  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
162  sum = rslt.bitblock;\
163}while(0)
164
165
166
167#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
168  __asm__  ("sahf\n\t" \
169        "adc %[z1], %[z1]\n\t" \
170        "adc %[z2], %[z2]\n\t" \
171        "lahf\n\t" \
172     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
173         : "[z1]" (x1), "[z2]" (x2), \
174           "[carryflag]" (carry) \
175         : "cc")
176
177
178#define advance_with_carry(cursor, carry, rslt)\
179do\
180{\
181  union {__m128i bitblock;\
182         uint64_t int64[2];} z;\
183\
184  union {__m128i bitblock;\
185         uint64_t int64[2];} x;\
186\
187  x.bitblock = cursor;\
188\
189  double_int64_advance(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carry);\
190  rslt = z.bitblock;\
191}while(0)
192
193
194
195
196#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
197  __asm__  ("sahf\n\t" \
198        "sbb %[e1], %[z1]\n\t" \
199        "sbb %[e2], %[z2]\n\t" \
200        "lahf\n\t" \
201     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
202         : "[z1]" (x1), "[z2]" (x2), \
203           [e1] "r" (y1), [e2] "r" (y2), \
204           "[carryflag]" (carry) \
205         : "cc")
206
207#define sbb128(first, second, borrow, diff) \
208do\
209{ union {__m128i bitblock;\
210         uint64_t int64[2];} rslt;\
211\
212  union {__m128i bitblock;\
213         uint64_t int64[2];} x;\
214\
215  union {__m128i bitblock;\
216         uint64_t int64[2];} y;\
217\
218  x.bitblock = first;\
219  y.bitblock = second;\
220\
221  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
222                   rslt.int64[0], rslt.int64[1], borrow);\
223  diff = rslt.bitblock;\
224}while(0)
225
226#endif
227
228
229
230#if (CARRY_STRATEGY == SIMD_CARRY_STRATEGY)
231
232typedef SIMD_type CarryType;
233
234#define Carry0 simd_const_1(0)
235
236#define test_carry(x) bitblock_has_bit(x)
237
238#define carry_or(carry1, carry2) simd_or(carry1, carry2)
239
240#define adc128(x, y, carry,  sum) \
241do{ \
242  SIMD_type gen = simd_and(x, y); \
243  SIMD_type prop = simd_or(x, y); \
244  SIMD_type partial = simd_add_64(simd_add_64(x, y), carry); \
245  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
246  sum = simd_add_64(c1, partial); \
247  carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127); \
248} while(0)
249
250
251#define sbb128(x, y, borrow, difference) \
252do {\
253  SIMD_type gen = simd_andc(y, x); \
254  SIMD_type prop = simd_not(simd_xor(x, y)); \
255  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), borrow); \
256  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
257  difference = simd_sub_64(partial, b1); \
258  borrow = sisd_srli(simd_or(gen, simd_and(prop, difference)), 127); \
259}while(0)
260
261
262#define advance_with_carry(cursor, carry, rslt)\
263do{\
264  SIMD_type shift_out = simd_srli_64(cursor, 63);\
265  SIMD_type low_bits = simd_mergel_64(shift_out, carry);\
266  carry = sisd_srli(shift_out, 64);\
267  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
268}while(0)
269
270#endif
271#endif
272
273
Note: See TracBrowser for help on using the repository browser.