source: trunk/lib/block_carry.h @ 575

Last change on this file since 575 was 534, checked in by ksherdy, 9 years ago

Add inline asm macros for clc and adc.

File size: 7.4 KB
Line 
1/*  Block Addition, Subtraction and Shifts with Carry
2    Copyright (C) 2010, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6
7This file defines addition, subtract and shift operations on
8128-bit blocks.   Different versions of the operations are
9selectable with the CARRY_STRATEGY preprocessor constant.
10
11Each implementation defines the following "abstract data type"
12for block operations with carry.
13
14Typename:   CarryType
15Constant:   Carry0  represents a value of 0 for the carry bit.
16Predicate:  test_carry(x) returns nonzero if a carry bit is 1, 0 otherwise.
17Function:   carry_or(carry1, carry2) forms the logical or of two carries.
18Function:   adc128(x, y, carry, sum) computes (carry, sum) = x + y + carry,
19Function:   advance_with_carry(cursor, carry, rslt)
20                 computes (carry, rslt) = cursor + cursor + carry
21Function:   sbb128(x, y, borrow, diff)
22                 computes (borrow, diff) = y - x - borrow
23
24*/
25#ifndef BLOCK_CARRY_H
26#define BLOCK_CARRY_H
27
28
29
30
31
32/*------------------------------------------------------------*/
33#include "sse_simd.h"
34
35#define SIMD_CARRY_STRATEGY 1
36#define ADC64_STRATEGY 2
37#define ADC64_SAHF_STRATEGY 3
38
39#ifdef ADC64
40#ifdef SAHFLAHF
41#define CARRY_STRATEGY ADC64_SAHF_STRATEGY
42#else
43#define CARRY_STRATEGY ADC64_STRATEGY
44#endif
45#else
46#define CARRY_STRATEGY SIMD_CARRY_STRATEGY
47#endif
48
49#if (CARRY_STRATEGY == ADC64_STRATEGY)
50typedef uint64_t CarryType;
51
52#define Carry0 0
53
54#define test_carry(x) ((x) > 0)
55
56#define carry_or(carry1, carry2) (carry1 | carry2)
57
58#define clc() \
59  __asm__ __volatile__ ("clc\n\t":::)
60
61#define adc(x,y,carry,sum) \
62  __asm__ __volatile__ ("add %[carryflag], %[e]\n\t" \
63        "adc %[e], %[z]\n\t" \
64        "mov $0, %1\n\t" \
65        "adc %[carryflag], %1\n\t" \
66        : [z] "=r" (sum), [carryflag] "=a" (carry) \
67        : "[z]" (x), [e] "r" (y), "[carryflag]" (carry) \
68        : "cc")
69
70#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
71   __asm__ __volatile__ ("add %[carryin], %[z1]\n\t" \
72         "adc %[e1], %[z1]\n\t" \
73         "adc %[e2], %[z2]\n\t" \
74         "mov $0, %[carryout]\n\t" \
75         "adc $0, %[carryout]\n\t" \
76         : [z1] "=a" (rslt1), [z2] "=r" (rslt2), [carryout] "=r" (carry) \
77         : "0" (y1), "1" (y2), [carryin] "2" (carry), \
78                   [e1] "b" (x1), [e2] "r" (x2)\
79         : "cc")
80
81#define adc128(first, second, carry, sum) \
82do\
83{\
84  union {__m128i bitblock;\
85         uint64_t int64[2];} rslt;\
86\
87  union {__m128i bitblock;\
88         uint64_t int64[2];} x;\
89\
90  union {__m128i bitblock;\
91         uint64_t int64[2];} y;\
92\
93  x.bitblock = first;\
94  y.bitblock = second;\
95\
96  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
97  sum = rslt.bitblock;\
98}while(0)
99
100
101
102#define advance_with_carry(cursor, carry, rslt)\
103    adc128(cursor, cursor, carry, rslt)
104
105
106#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
107  __asm__  ("sahf\n\t" \
108        "sbb %[e1], %[z1]\n\t" \
109        "sbb %[e2], %[z2]\n\t" \
110        "lahf\n\t" \
111     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
112         : "[z1]" (x1), "[z2]" (x2), \
113           [e1] "r" (y1), [e2] "r" (y2), \
114           "[carryflag]" (carry) \
115         : "cc")
116
117#define sbb128(first, second, borrow, diff) \
118do\
119{ union {__m128i bitblock;\
120         uint64_t int64[2];} rslt;\
121\
122  union {__m128i bitblock;\
123         uint64_t int64[2];} x;\
124\
125  union {__m128i bitblock;\
126         uint64_t int64[2];} y;\
127\
128  x.bitblock = first;\
129  y.bitblock = second;\
130\
131  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
132                   rslt.int64[0], rslt.int64[1], borrow);\
133  diff = rslt.bitblock;\
134}while(0)
135
136#endif
137
138#if (CARRY_STRATEGY == ADC64_SAHF_STRATEGY)
139typedef uint64_t CarryType;
140
141#define Carry0 0
142
143#define test_carry(x) (((x)&256) > 0)
144
145#define carry_or(carry1, carry2) (carry1 | carry2)
146
147#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
148  __asm__  ("sahf\n\t" \
149        "adc %[e1], %[z1]\n\t" \
150        "adc %[e2], %[z2]\n\t" \
151        "lahf\n\t" \
152     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
153         : "[z1]" (x1), "[z2]" (x2), \
154           [e1] "r" (y1), [e2] "r" (y2), \
155           "[carryflag]" (carry) \
156         : "cc")
157
158#define adc128(first, second, carry, sum) \
159do\
160{\
161  union {__m128i bitblock;\
162         uint64_t int64[2];} rslt;\
163\
164  union {__m128i bitblock;\
165         uint64_t int64[2];} x;\
166\
167  union {__m128i bitblock;\
168         uint64_t int64[2];} y;\
169\
170  x.bitblock = first;\
171  y.bitblock = second;\
172\
173  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
174  sum = rslt.bitblock;\
175}while(0)
176
177
178
179#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
180  __asm__  ("sahf\n\t" \
181        "adc %[z1], %[z1]\n\t" \
182        "adc %[z2], %[z2]\n\t" \
183        "lahf\n\t" \
184     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
185         : "[z1]" (x1), "[z2]" (x2), \
186           "[carryflag]" (carry) \
187         : "cc")
188
189
190#define advance_with_carry(cursor, carry, rslt)\
191do\
192{\
193  union {__m128i bitblock;\
194         uint64_t int64[2];} z;\
195\
196  union {__m128i bitblock;\
197         uint64_t int64[2];} x;\
198\
199  x.bitblock = cursor;\
200\
201  double_int64_advance(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carry);\
202  rslt = z.bitblock;\
203}while(0)
204
205
206
207
208#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
209  __asm__  ("sahf\n\t" \
210        "sbb %[e1], %[z1]\n\t" \
211        "sbb %[e2], %[z2]\n\t" \
212        "lahf\n\t" \
213     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
214         : "[z1]" (x1), "[z2]" (x2), \
215           [e1] "r" (y1), [e2] "r" (y2), \
216           "[carryflag]" (carry) \
217         : "cc")
218
219#define sbb128(first, second, borrow, diff) \
220do\
221{ union {__m128i bitblock;\
222         uint64_t int64[2];} rslt;\
223\
224  union {__m128i bitblock;\
225         uint64_t int64[2];} x;\
226\
227  union {__m128i bitblock;\
228         uint64_t int64[2];} y;\
229\
230  x.bitblock = first;\
231  y.bitblock = second;\
232\
233  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
234                   rslt.int64[0], rslt.int64[1], borrow);\
235  diff = rslt.bitblock;\
236}while(0)
237
238#endif
239
240
241
242#if (CARRY_STRATEGY == SIMD_CARRY_STRATEGY)
243
244typedef SIMD_type CarryType;
245
246#define Carry0 simd_const_1(0)
247
248#define test_carry(x) bitblock_has_bit(x)
249
250#define carry_or(carry1, carry2) simd_or(carry1, carry2)
251
252#define adc128(x, y, carry,  sum) \
253do{ \
254  SIMD_type gen = simd_and(x, y); \
255  SIMD_type prop = simd_or(x, y); \
256  SIMD_type partial = simd_add_64(simd_add_64(x, y), carry); \
257  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
258  sum = simd_add_64(c1, partial); \
259  carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127); \
260} while(0)
261
262
263#define sbb128(x, y, borrow, difference) \
264do {\
265  SIMD_type gen = simd_andc(y, x); \
266  SIMD_type prop = simd_not(simd_xor(x, y)); \
267  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), borrow); \
268  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
269  difference = simd_sub_64(partial, b1); \
270  borrow = sisd_srli(simd_or(gen, simd_and(prop, difference)), 127); \
271}while(0)
272
273
274#define advance_with_carry(cursor, carry, rslt)\
275do{\
276  SIMD_type shift_out = simd_srli_64(cursor, 63);\
277  SIMD_type low_bits = simd_mergel_64(shift_out, carry);\
278  carry = sisd_srli(shift_out, 64);\
279  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
280}while(0)
281
282#endif
283#endif
284
285
Note: See TracBrowser for help on using the repository browser.