source: trunk/lib/block_carry.h @ 726

Last change on this file since 726 was 726, checked in by cameron, 8 years ago

Fix ADC64 version of double_int64_sbb to use sbb not sahf/lahf

File size: 7.7 KB
Line 
1/*  Block Addition, Subtraction and Shifts with Carry
2    Copyright (C) 2010, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6
7This file defines addition, subtract and shift operations on
8128-bit blocks.   Different versions of the operations are
9selectable with the CARRY_STRATEGY preprocessor constant.
10
11Each implementation defines the following "abstract data type"
12for block operations with carry.
13
14Typename:   CarryType
15Constant:   Carry0  represents a value of 0 for the carry bit.
16Predicate:  test_carry(x) returns nonzero if a carry bit is 1, 0 otherwise.
17Function:   carry_or(carry1, carry2) forms the logical or of two carries.
18Function:   adc128(x, y, carry, sum) computes (carry, sum) = x + y + carry,
19Function:   advance_with_carry(cursor, carry, rslt)
20                 computes (carry, rslt) = cursor + cursor + carry
21Function:   sbb128(x, y, borrow, diff)
22                 computes (borrow, diff) = y - x - borrow
23
24*/
25#ifndef BLOCK_CARRY_H
26#define BLOCK_CARRY_H
27
28
29typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64;
30
31
32
33/*------------------------------------------------------------*/
34#include "sse_simd.h"
35
36#define SIMD_CARRY_STRATEGY 1
37#define ADC64_STRATEGY 2
38#define ADC64_SAHF_STRATEGY 3
39
40#ifdef ADC64
41#ifdef SAHFLAHF
42#define CARRY_STRATEGY ADC64_SAHF_STRATEGY
43#else
44#define CARRY_STRATEGY ADC64_STRATEGY
45#endif
46#else
47#define CARRY_STRATEGY SIMD_CARRY_STRATEGY
48#endif
49
50#if (CARRY_STRATEGY == ADC64_STRATEGY)
51typedef uint64_t CarryType;
52
53#define Carry0 0
54
55#define test_carry(x) ((x) > 0)
56
57#define carry_or(carry1, carry2) (carry1 | carry2)
58
59#define clc() \
60  __asm__ __volatile__ ("clc\n\t":::)
61
62#define adc(x,y,carry,sum) \
63  __asm__ __volatile__ ("add %[carryflag], %[e]\n\t" \
64        "adc %[e], %[z]\n\t" \
65        "mov $0, %1\n\t" \
66        "adc %[carryflag], %1\n\t" \
67        : [z] "=r" (sum), [carryflag] "=a" (carry) \
68        : "[z]" (x), [e] "r" (y), "[carryflag]" (carry) \
69        : "cc")
70
71#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
72   __asm__ __volatile__ ("add %[carryin], %[z1]\n\t" \
73         "adc %[e1], %[z1]\n\t" \
74         "adc %[e2], %[z2]\n\t" \
75         "mov $0, %[carryout]\n\t" \
76         "adc $0, %[carryout]\n\t" \
77         : [z1] "=a" (rslt1), [z2] "=r" (rslt2), [carryout] "=r" (carry) \
78         : "0" (y1), "1" (y2), [carryin] "2" (carry), \
79                   [e1] "b" (x1), [e2] "r" (x2)\
80         : "cc")
81
82#define adc128(first, second, carry, sum) \
83do {\
84  BitBlock_int64 rslt, x, y;\
85  x.bitblock = first;\
86  y.bitblock = second;\
87  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
88  sum = rslt.bitblock;\
89} while(0)
90
91
92
93#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
94  __asm__  (\
95        "add %[z1], %[z1]\n\t" \
96        "adc %[z2], %[z2]\n\t" \
97        "lea 0(%[carryflag], %[z1]), %[z1]\n\t" \
98        "setc %%al\n\t" \
99         : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
100         : "[z1]" (x1), "[z2]" (x2), \
101           "[carryflag]" (carry) \
102         : "cc")
103
104/*  Slow
105#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
106  __asm__  (\
107        "shld $1, %[z1], %[z2]\n\t" \
108        "lea 0(%[carryflag], %[z1], 2), %[z1]\n\t" \
109        "setc %%al\n\t" \
110         : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
111         : "[z1]" (x1), "[z2]" (x2), \
112           "[carryflag]" (carry) \
113         : "cc")
114*/
115
116#define advance_with_carry(cursor, carry, rslt)\
117do {\
118  BitBlock_int64 x, z;\
119  x.bitblock = cursor;\
120  double_int64_advance(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carry);\
121  rslt = z.bitblock;\
122} while(0)
123
124
125#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, borrow) \
126   __asm__ __volatile__ ("sub %[borrowin], %[z1]\n\t" \
127         "sbb %[e1], %[z1]\n\t" \
128         "sbb %[e2], %[z2]\n\t" \
129         "mov $0, %[borrowout]\n\t" \
130         "adc $0, %[borrowout]\n\t" \
131         : [z1] "=a" (rslt1), [z2] "=r" (rslt2), [borrowout] "=r" (borrow) \
132         : "0" (y1), "1" (y2), [borrowin] "2" (borrow), \
133                   [e1] "b" (x1), [e2] "r" (x2)\
134         : "cc")
135
136
137#define sbb128(first, second, borrow, diff) \
138do {\
139  BitBlock_int64 rslt, x, y;\
140  x.bitblock = first;\
141  y.bitblock = second;\
142  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
143                   rslt.int64[0], rslt.int64[1], borrow);\
144  diff = rslt.bitblock;\
145} while(0)
146
147#endif
148
149#if (CARRY_STRATEGY == ADC64_SAHF_STRATEGY)
150typedef uint64_t CarryType;
151
152#define Carry0 0
153
154#define test_carry(x) (((x)&256) > 0)
155
156#define carry_or(carry1, carry2) (carry1 | carry2)
157
158#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
159  __asm__  ("sahf\n\t" \
160        "adc %[e1], %[z1]\n\t" \
161        "adc %[e2], %[z2]\n\t" \
162        "lahf\n\t" \
163     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
164         : "[z1]" (x1), "[z2]" (x2), \
165           [e1] "r" (y1), [e2] "r" (y2), \
166           "[carryflag]" (carry) \
167         : "cc")
168
169#define adc128(first, second, carry, sum) \
170do {\
171  BitBlock_int64 rslt, x, y;\
172  x.bitblock = first;\
173  y.bitblock = second;\
174  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
175  sum = rslt.bitblock;\
176}while(0)
177
178
179
180#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
181  __asm__  ("sahf\n\t" \
182        "adc %[z1], %[z1]\n\t" \
183        "adc %[z2], %[z2]\n\t" \
184        "lahf\n\t" \
185     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
186         : "[z1]" (x1), "[z2]" (x2), \
187           "[carryflag]" (carry) \
188         : "cc")
189
190
191#define advance_with_carry(cursor, carry, rslt)\
192do {\
193  BitBlock_int64 x, z;\
194  x.bitblock = cursor;\
195  double_int64_advance(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carry);\
196  rslt = z.bitblock;\
197} while(0)
198
199
200
201
202#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
203  __asm__  ("sahf\n\t" \
204        "sbb %[e1], %[z1]\n\t" \
205        "sbb %[e2], %[z2]\n\t" \
206        "lahf\n\t" \
207     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
208         : "[z1]" (x1), "[z2]" (x2), \
209           [e1] "r" (y1), [e2] "r" (y2), \
210           "[carryflag]" (carry) \
211         : "cc")
212
213#define sbb128(first, second, borrow, diff) \
214do {\
215  BitBlock_int64 rslt, x, y;\
216  x.bitblock = first;\
217  y.bitblock = second;\
218  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
219                   rslt.int64[0], rslt.int64[1], borrow);\
220  diff = rslt.bitblock;\
221}while(0)
222
223#endif
224
225
226
227#if (CARRY_STRATEGY == SIMD_CARRY_STRATEGY)
228
229typedef SIMD_type CarryType;
230
231#define Carry0 simd_const_1(0)
232
233#define test_carry(x) bitblock_has_bit(x)
234
235#define carry_or(carry1, carry2) simd_or(carry1, carry2)
236
237#define adc128(x, y, carry,  sum) \
238do{ \
239  SIMD_type gen = simd_and(x, y); \
240  SIMD_type prop = simd_or(x, y); \
241  SIMD_type partial = simd_add_64(simd_add_64(x, y), carry); \
242  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
243  sum = simd_add_64(c1, partial); \
244  carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127); \
245} while(0)
246
247
248#define sbb128(x, y, borrow, difference) \
249do {\
250  SIMD_type gen = simd_andc(y, x); \
251  SIMD_type prop = simd_not(simd_xor(x, y)); \
252  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), borrow); \
253  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
254  difference = simd_sub_64(partial, b1); \
255  borrow = sisd_srli(simd_or(gen, simd_and(prop, difference)), 127); \
256}while(0)
257
258
259#define advance_with_carry(cursor, carry, rslt)\
260do {\
261  SIMD_type shift_out = simd_srli_64(cursor, 63);\
262  SIMD_type low_bits = simd_mergel_64(shift_out, carry);\
263  carry = sisd_srli(shift_out, 64);\
264  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
265} while(0)
266
267#endif
268#endif
269
270
Note: See TracBrowser for help on using the repository browser.