source: trunk/lib/block_carry.h @ 890

Last change on this file since 890 was 759, checked in by cameron, 9 years ago

Revert to uin64_t CarryType? for now.

File size: 7.8 KB
Line 
1/*  Block Addition, Subtraction and Shifts with Carry
2    Copyright (C) 2010, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6
7This file defines addition, subtract and shift operations on
8128-bit blocks.   Different versions of the operations are
9selectable with the CARRY_STRATEGY preprocessor constant.
10
11Each implementation defines the following "abstract data type"
12for block operations with carry.
13
14Typename:   CarryType
15Constant:   Carry0  represents a value of 0 for the carry bit.
16Predicate:  test_carry(x) returns nonzero if a carry bit is 1, 0 otherwise.
17Function:   carry_or(carry1, carry2) forms the logical or of two carries.
18Function:   adc128(x, y, carry, sum) computes (carry, sum) = x + y + carry,
19Function:   advance_with_carry(cursor, carry, rslt)
20                 computes (carry, rslt) = cursor + cursor + carry
21Function:   sbb128(x, y, borrow, diff)
22                 computes (borrow, diff) = y - x - borrow
23
24*/
25#ifndef BLOCK_CARRY_H
26#define BLOCK_CARRY_H
27
28
29typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64;
30
31
32
33/*------------------------------------------------------------*/
34#include "sse_simd.h"
35
36#define SIMD_CARRY_STRATEGY 1
37#define ADC64_STRATEGY 2
38#define ADC64_SAHF_STRATEGY 3
39
40#ifdef ADC64
41#ifdef SAHFLAHF
42#define CARRY_STRATEGY ADC64_SAHF_STRATEGY
43#else
44#define CARRY_STRATEGY ADC64_STRATEGY
45#endif
46#else
47#ifdef SAHFLAHF
48#define CARRY_STRATEGY ADC64_SAHF_STRATEGY
49#else
50#define CARRY_STRATEGY SIMD_CARRY_STRATEGY
51#endif
52#endif
53
54#if (CARRY_STRATEGY == ADC64_STRATEGY)
55typedef uint64_t CarryType;
56
57#define Carry0 0
58
59#define test_carry(x) ((x) > 0)
60
61#define carry_or(carry1, carry2) (carry1 | carry2)
62
63#define clc() \
64  __asm__ __volatile__ ("clc\n\t":::)
65
66#define adc(x,y,carry,sum) \
67  __asm__ __volatile__ ("add %[carryflag], %[e]\n\t" \
68        "adc %[e], %[z]\n\t" \
69        "mov $0, %1\n\t" \
70        "adc %[carryflag], %1\n\t" \
71        : [z] "=r" (sum), [carryflag] "=a" (carry) \
72        : "[z]" (x), [e] "r" (y), "[carryflag]" (carry) \
73        : "cc")
74
75#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
76   __asm__ __volatile__ ("neg %[carryflag]\n\t" \
77         "adc %[e1], %[z1]\n\t" \
78         "adc %[e2], %[z2]\n\t" \
79         "mov $0, %[carryflag]\n\t" \
80         "adc $0, %[carryflag]\n\t" \
81     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=r" (carry) \
82         : "[z1]" (x1), "[z2]" (x2), \
83           [e1] "r" (y1), [e2] "r" (y2), \
84           "[carryflag]" (carry) \
85         : "cc")
86
87
88#define adc128(first, second, carry, sum) \
89do {\
90  BitBlock_int64 rslt, x, y;\
91  x.bitblock = first;\
92  y.bitblock = second;\
93  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
94  sum = rslt.bitblock;\
95} while(0)
96
97
98
99#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
100  __asm__  (\
101        "add %[z1], %[z1]\n\t" \
102        "adc %[z2], %[z2]\n\t" \
103        "lea 0(%[carryflag], %[z1]), %[z1]\n\t" \
104        "setc %%al\n\t" \
105         : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
106         : "[z1]" (x1), "[z2]" (x2), \
107           "[carryflag]" (carry) \
108         : "cc")
109
110/*  Slow
111#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
112  __asm__  (\
113        "shld $1, %[z1], %[z2]\n\t" \
114        "lea 0(%[carryflag], %[z1], 2), %[z1]\n\t" \
115        "setc %%al\n\t" \
116         : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
117         : "[z1]" (x1), "[z2]" (x2), \
118           "[carryflag]" (carry) \
119         : "cc")
120*/
121
122#define advance_with_carry(cursor, carry, rslt)\
123do {\
124  BitBlock_int64 x, z;\
125  x.bitblock = cursor;\
126  double_int64_advance(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carry);\
127  rslt = z.bitblock;\
128} while(0)
129
130#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, brw) \
131  __asm__  ("neg %[borrowflag]\n\t" \
132        "sbb %[e1], %[z1]\n\t" \
133        "sbb %[e2], %[z2]\n\t" \
134         "mov $0, %[borrowflag]\n\t" \
135         "sbb $0, %[borrowflag]\n\t" \
136     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [borrowflag] "=a" (brw) \
137         : "[z1]" (x1), "[z2]" (x2), \
138           [e1] "r" (y1), [e2] "r" (y2), \
139           "[borrowflag]" (brw) \
140         : "cc")
141
142#define sbb128(first, second, borrow, diff) \
143do {\
144  BitBlock_int64 rslt, x, y;\
145  x.bitblock = first;\
146  y.bitblock = second;\
147  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
148                   rslt.int64[0], rslt.int64[1], borrow);\
149  diff = rslt.bitblock;\
150} while(0)
151
152
153#endif
154
155#if (CARRY_STRATEGY == ADC64_SAHF_STRATEGY)
156typedef uint64_t CarryType;
157
158#define Carry0 0
159
160#define test_carry(x) (((x)&256) > 0)
161
162#define carry_or(carry1, carry2) (carry1 | carry2)
163
164#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
165  __asm__  ("sahf\n\t" \
166        "adc %[e1], %[z1]\n\t" \
167        "adc %[e2], %[z2]\n\t" \
168        "lahf\n\t" \
169     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
170         : "[z1]" (x1), "[z2]" (x2), \
171           [e1] "r" (y1), [e2] "r" (y2), \
172           "[carryflag]" (carry) \
173         : "cc")
174
175#define adc128(first, second, carry, sum) \
176do {\
177  BitBlock_int64 rslt, x, y;\
178  x.bitblock = first;\
179  y.bitblock = second;\
180  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
181  sum = rslt.bitblock;\
182}while(0)
183
184
185
186#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
187  __asm__  ("sahf\n\t" \
188        "adc %[z1], %[z1]\n\t" \
189        "adc %[z2], %[z2]\n\t" \
190        "lahf\n\t" \
191     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
192         : "[z1]" (x1), "[z2]" (x2), \
193           "[carryflag]" (carry) \
194         : "cc")
195
196
197#define advance_with_carry(cursor, carry, rslt)\
198do {\
199  BitBlock_int64 x, z;\
200  x.bitblock = cursor;\
201  double_int64_advance(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carry);\
202  rslt = z.bitblock;\
203} while(0)
204
205
206
207
208#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
209  __asm__  ("sahf\n\t" \
210        "sbb %[e1], %[z1]\n\t" \
211        "sbb %[e2], %[z2]\n\t" \
212        "lahf\n\t" \
213     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
214         : "[z1]" (x1), "[z2]" (x2), \
215           [e1] "r" (y1), [e2] "r" (y2), \
216           "[carryflag]" (carry) \
217         : "cc")
218
219#define sbb128(first, second, borrow, diff) \
220do {\
221  BitBlock_int64 rslt, x, y;\
222  x.bitblock = first;\
223  y.bitblock = second;\
224  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
225                   rslt.int64[0], rslt.int64[1], borrow);\
226  diff = rslt.bitblock;\
227}while(0)
228
229#endif
230
231
232
233#if (CARRY_STRATEGY == SIMD_CARRY_STRATEGY)
234
235typedef SIMD_type CarryType;
236
237#define Carry0 simd_const_1(0)
238
239#define test_carry(x) bitblock_has_bit(x)
240
241#define carry_or(carry1, carry2) simd_or(carry1, carry2)
242
243#define adc128(x, y, carry,  sum) \
244do{ \
245  SIMD_type gen = simd_and(x, y); \
246  SIMD_type prop = simd_or(x, y); \
247  SIMD_type partial = simd_add_64(simd_add_64(x, y), carry); \
248  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
249  sum = simd_add_64(c1, partial); \
250  carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127); \
251} while(0)
252
253
254#define sbb128(x, y, borrow, difference) \
255do {\
256  SIMD_type gen = simd_andc(y, x); \
257  SIMD_type prop = simd_not(simd_xor(x, y)); \
258  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), borrow); \
259  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
260  difference = simd_sub_64(partial, b1); \
261  borrow = sisd_srli(simd_or(gen, simd_and(prop, difference)), 127); \
262}while(0)
263
264
265#define advance_with_carry(cursor, carry, rslt)\
266do {\
267  SIMD_type shift_out = simd_srli_64(cursor, 63);\
268  SIMD_type low_bits = simd_mergel_64(shift_out, carry);\
269  carry = sisd_srli(shift_out, 64);\
270  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
271} while(0)
272
273#endif
274#endif
275
276
Note: See TracBrowser for help on using the repository browser.