source: trunk/lib/block_carry.h @ 462

Last change on this file since 462 was 462, checked in by cameron, 9 years ago

perflib and block_carry.h

File size: 6.9 KB
Line 
1/*  Block Addition, Subtraction and Shifts with Carry
2    Copyright (C) 2010, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6
7This file defines addition, subtract and shift operations on
8128-bit blocks.   Different versions of the operations are
9selectable with the CARRY_STRATEGY preprocessor constant.
10
11Each implementation defines the following "abstract data type"
12for block operations with carry.
13
14Typename:   CarryType
15Constant:   Carry0  represents a value of 0 for the carry bit.
16Predicate:  test_carry(x) returns nonzero if a carry bit is 1, 0 otherwise.
17Function:   carry_or(carry1, carry2) forms the logical or of two carries.
18Function:   adc128(x, y, carry, sum) computes (carry, sum) = x + y + carry,
19Function:   advance_with_carry(cursor, carry, rslt)
20                 computes (carry, rslt) = cursor + cursor + carry
21Function:   sbb128(x, y, borrow, diff)
22                 computes (borrow, diff) = y - x - borrow
23
24*/
25#ifndef BLOCK_CARRY_H
26#define BLOCK_CARRY_H
27
28
29
30
31
32/*------------------------------------------------------------*/
33#include "sse_simd.h"
34
35#define ADC64 1
36#define SIMD_CARRY 2
37#define ADC64_SAHF 3
38
39#define CARRY_STRATEGY ADC64_SAHF
40
41#if (CARRY_STRATEGY == ADC64)
42typedef uint64_t CarryType;
43
44#define Carry0 0
45
46#define test_carry(x) ((x) > 0)
47
48#define carry_or(carry1, carry2) (carry1 | carry2)
49
50#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
51  __asm__  ("add %[carryflag], %[e1]\n\t" \
52        "adc %[e1], %[z1]\n\t" \
53        "adc %[e2], %[z2]\n\t" \
54        "mov $0, %[carryflag]\n\t" \
55        "adc $0, %[carryflag]\n\t" \
56        : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=r" (carry) \
57        : "[z1]" (x1), "[z2]" (x2), \
58           [e1] "r" (y1), [e2] "r" (y2), \
59           "[carryflag]" (carry) \
60        : "cc")
61
62#define adc128(first, second, carry, sum) \
63do\
64{\
65  union {__m128i bitblock;\
66         uint64_t int64[2];} rslt;\
67\
68  union {__m128i bitblock;\
69         uint64_t int64[2];} x;\
70\
71  union {__m128i bitblock;\
72         uint64_t int64[2];} y;\
73\
74  x.bitblock = first;\
75  y.bitblock = second;\
76\
77  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
78  sum = rslt.bitblock;\
79}while(0)
80
81
82
83#define advance_with_carry(cursor, carry, rslt)\
84    adc128(cursor, cursor, carry, rslt)
85
86
87#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
88  __asm__  ("sahf\n\t" \
89        "sbb %[e1], %[z1]\n\t" \
90        "sbb %[e2], %[z2]\n\t" \
91        "lahf\n\t" \
92     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
93         : "[z1]" (x1), "[z2]" (x2), \
94           [e1] "r" (y1), [e2] "r" (y2), \
95           "[carryflag]" (carry) \
96         : "cc")
97
98#define sbb128(first, second, borrow, diff) \
99do\
100{ union {__m128i bitblock;\
101         uint64_t int64[2];} rslt;\
102\
103  union {__m128i bitblock;\
104         uint64_t int64[2];} x;\
105\
106  union {__m128i bitblock;\
107         uint64_t int64[2];} y;\
108\
109  x.bitblock = first;\
110  y.bitblock = second;\
111\
112  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
113                   rslt.int64[0], rslt.int64[1], borrow);\
114  diff = rslt.bitblock;\
115}while(0)
116
117#endif
118
119#if (CARRY_STRATEGY == ADC64_SAHF)
120typedef uint64_t CarryType;
121
122#define Carry0 0
123
124#define test_carry(x) (((x)&256) > 0)
125
126#define carry_or(carry1, carry2) (carry1 | carry2)
127
128#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
129  __asm__  ("sahf\n\t" \
130        "adc %[e1], %[z1]\n\t" \
131        "adc %[e2], %[z2]\n\t" \
132        "lahf\n\t" \
133     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
134         : "[z1]" (x1), "[z2]" (x2), \
135           [e1] "r" (y1), [e2] "r" (y2), \
136           "[carryflag]" (carry) \
137         : "cc")
138
139#define adc128(first, second, carry, sum) \
140do\
141{\
142  union {__m128i bitblock;\
143         uint64_t int64[2];} rslt;\
144\
145  union {__m128i bitblock;\
146         uint64_t int64[2];} x;\
147\
148  union {__m128i bitblock;\
149         uint64_t int64[2];} y;\
150\
151  x.bitblock = first;\
152  y.bitblock = second;\
153\
154  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
155  sum = rslt.bitblock;\
156}while(0)
157
158
159
160#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
161  __asm__  ("sahf\n\t" \
162        "adc %[z1], %[z1]\n\t" \
163        "adc %[z2], %[z2]\n\t" \
164        "lahf\n\t" \
165     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
166         : "[z1]" (x1), "[z2]" (x2), \
167           "[carryflag]" (carry) \
168         : "cc")
169
170
171#define advance_with_carry(cursor, carry, rslt)\
172do\
173{\
174  union {__m128i bitblock;\
175         uint64_t int64[2];} z;\
176\
177  union {__m128i bitblock;\
178         uint64_t int64[2];} x;\
179\
180  x.bitblock = cursor;\
181\
182  double_int64_advance(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carry);\
183  rslt = z.bitblock;\
184}while(0)
185
186
187
188
189#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \
190  __asm__  ("sahf\n\t" \
191        "sbb %[e1], %[z1]\n\t" \
192        "sbb %[e2], %[z2]\n\t" \
193        "lahf\n\t" \
194     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
195         : "[z1]" (x1), "[z2]" (x2), \
196           [e1] "r" (y1), [e2] "r" (y2), \
197           "[carryflag]" (carry) \
198         : "cc")
199
200#define sbb128(first, second, borrow, diff) \
201do\
202{ union {__m128i bitblock;\
203         uint64_t int64[2];} rslt;\
204\
205  union {__m128i bitblock;\
206         uint64_t int64[2];} x;\
207\
208  union {__m128i bitblock;\
209         uint64_t int64[2];} y;\
210\
211  x.bitblock = first;\
212  y.bitblock = second;\
213\
214  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
215                   rslt.int64[0], rslt.int64[1], borrow);\
216  diff = rslt.bitblock;\
217}while(0)
218
219#endif
220
221
222
223#if (CARRY_STRATEGY == SIMD_CARRY)
224
225typedef SIMD_type CarryType;
226
227#define Carry0 simd_const_1(0)
228
229#define test_carry(x) bitblock_has_bit(x)
230
231#define carry_or(carry1, carry2) simd_or(carry1, carry2)
232
233#define adc128(x, y, carry,  sum) \
234do{ \
235  SIMD_type gen = simd_and(x, y); \
236  SIMD_type prop = simd_or(x, y); \
237  SIMD_type partial = simd_add_64(simd_add_64(x, y), carry); \
238  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
239  sum = simd_add_64(c1, partial); \
240  carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127); \
241} while(0)
242
243
244#define sbb128(x, y, borrow, difference) \
245do {\
246  SIMD_type gen = simd_andc(y, x); \
247  SIMD_type prop = simd_not(simd_xor(x, y)); \
248  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), borrow); \
249  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
250  difference = simd_sub_64(partial, b1); \
251  borrow = sisd_srli(simd_or(gen, simd_and(prop, difference)), 127); \
252}while(0)
253
254
255#define advance_with_carry(cursor, carry, rslt)\
256do{\
257  SIMD_type shift_out = simd_srli_64(cursor, 63);\
258  SIMD_type low_bits = simd_mergel_64(shift_out, carry);\
259  carry = sisd_srli(shift_out, 64);\
260  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
261}while(0)
262
263#endif
264#endif
265
266
Note: See TracBrowser for help on using the repository browser.