source: trunk/lib/block_carry.h @ 1519

Last change on this file since 1519 was 1519, checked in by cameron, 8 years ago

Eliminate SAHFLAHF strategy

File size: 5.6 KB
Line 
1/*  Block Addition, Subtraction and Shifts with Carry
2    Copyright (C) 2010, Robert D. Cameron
3    Licensed to the public under the Open Software License 3.0.
4    Licensed to International Characters Inc.
5       under the Academic Free License version 3.0.
6
7This file defines addition, subtract and shift operations on
8128-bit blocks.   Different versions of the operations are
9selectable with the CARRY_STRATEGY preprocessor constant.
10
11Each implementation defines the following "abstract data type"
12for block operations with carry.
13
14Typename:   CarryType
15Constant:   Carry0  represents a value of 0 for the carry bit.
16Predicate:  test_carry(x) returns nonzero if a carry bit is 1, 0 otherwise.
17Function:   carry_or(carry1, carry2) forms the logical or of two carries.
18Function:   adc128(x, y, carry, sum) computes (carry, sum) = x + y + carry,
19Function:   advance_with_carry(cursor, carry, rslt)
20                 computes (carry, rslt) = cursor + cursor + carry
21Function:   sbb128(x, y, borrow, diff)
22                 computes (borrow, diff) = y - x - borrow
23
24*/
25#ifndef BLOCK_CARRY_H
26#define BLOCK_CARRY_H
27
28
29typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64;
30
31
32
33/*------------------------------------------------------------*/
34#include "sse_simd.h"
35
36#define SIMD_CARRY_STRATEGY 1
37#define ADC64_STRATEGY 2
38
39#ifdef ADC64
40#define CARRY_STRATEGY ADC64_STRATEGY
41#else
42#define CARRY_STRATEGY SIMD_CARRY_STRATEGY
43#endif
44
45#if (CARRY_STRATEGY == ADC64_STRATEGY)
46typedef uint64_t CarryType;
47
48#define Carry0 0
49
50#define carry_flip(c) ((c) ^ 1)
51
52#define test_carry(x) ((x) > 0)
53
54#define carry_or(carry1, carry2) (carry1 | carry2)
55
56#define adc(x,y,carry,sum) \
57  __asm__ __volatile__ ("add %[carryflag], %[e]\n\t" \
58        "adc %[e], %[z]\n\t" \
59        "mov $0, %1\n\t" \
60        "adc %[carryflag], %1\n\t" \
61        : [z] "=r" (sum), [carryflag] "=a" (carry) \
62        : "[z]" (x), [e] "r" (y), "[carryflag]" (carry) \
63        : "cc")
64
65#define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \
66   __asm__ __volatile__ ("neg %[carryflag]\n\t" \
67         "adc %[e1], %[z1]\n\t" \
68         "adc %[e2], %[z2]\n\t" \
69         "mov $0, %[carryflag]\n\t" \
70         "adc $0, %[carryflag]\n\t" \
71     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=r" (carry) \
72         : "[z1]" (x1), "[z2]" (x2), \
73           [e1] "r" (y1), [e2] "r" (y2), \
74           "[carryflag]" (carry) \
75         : "cc")
76
77
78#define adc128(first, second, carry, sum) \
79do {\
80  BitBlock_int64 rslt, x, y;\
81  x.bitblock = first;\
82  y.bitblock = second;\
83  double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\
84  sum = rslt.bitblock;\
85} while(0)
86
87
88
89#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
90  __asm__  (\
91        "add %[z1], %[z1]\n\t" \
92        "adc %[z2], %[z2]\n\t" \
93        "lea 0(%[carryflag], %[z1]), %[z1]\n\t" \
94        "setc %%al\n\t" \
95         : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
96         : "[z1]" (x1), "[z2]" (x2), \
97           "[carryflag]" (carry) \
98         : "cc")
99
100/*  Slow
101#define double_int64_advance(x1, x2, rslt1, rslt2, carry) \
102  __asm__  (\
103        "shld $1, %[z1], %[z2]\n\t" \
104        "lea 0(%[carryflag], %[z1], 2), %[z1]\n\t" \
105        "setc %%al\n\t" \
106         : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \
107         : "[z1]" (x1), "[z2]" (x2), \
108           "[carryflag]" (carry) \
109         : "cc")
110*/
111
112#define advance_with_carry(cursor, carry, rslt)\
113do {\
114  BitBlock_int64 x, z;\
115  x.bitblock = cursor;\
116  double_int64_advance(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carry);\
117  rslt = z.bitblock;\
118} while(0)
119
120#define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, brw) \
121  __asm__  ("neg %[borrowflag]\n\t" \
122        "sbb %[e1], %[z1]\n\t" \
123        "sbb %[e2], %[z2]\n\t" \
124         "mov $0, %[borrowflag]\n\t" \
125         "sbb $0, %[borrowflag]\n\t" \
126     : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [borrowflag] "=a" (brw) \
127         : "[z1]" (x1), "[z2]" (x2), \
128           [e1] "r" (y1), [e2] "r" (y2), \
129           "[borrowflag]" (brw) \
130         : "cc")
131
132#define sbb128(first, second, borrow, diff) \
133do {\
134  BitBlock_int64 rslt, x, y;\
135  x.bitblock = first;\
136  y.bitblock = second;\
137  double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \
138                   rslt.int64[0], rslt.int64[1], borrow);\
139  diff = rslt.bitblock;\
140} while(0)
141
142
143#endif
144
145
146
147#if (CARRY_STRATEGY == SIMD_CARRY_STRATEGY)
148
149typedef SIMD_type CarryType;
150
151#define Carry0 simd_const_1(0)
152
153#define carry_flip(c) simd_xor(c, sisd_from_int(1))
154
155#define test_carry(x) bitblock_has_bit(x)
156
157#define carry_or(carry1, carry2) simd_or(carry1, carry2)
158
159#define adc128(x, y, carry,  sum) \
160do{ \
161  SIMD_type gen = simd_and(x, y); \
162  SIMD_type prop = simd_or(x, y); \
163  SIMD_type partial = simd_add_64(simd_add_64(x, y), carry); \
164  SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \
165  sum = simd_add_64(c1, partial); \
166  carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127); \
167} while(0)
168
169
170#define sbb128(x, y, borrow, difference) \
171do {\
172  SIMD_type gen = simd_andc(y, x); \
173  SIMD_type prop = simd_not(simd_xor(x, y)); \
174  SIMD_type partial = simd_sub_64(simd_sub_64(x, y), borrow); \
175  SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \
176  difference = simd_sub_64(partial, b1); \
177  borrow = sisd_srli(simd_or(gen, simd_and(prop, difference)), 127); \
178}while(0)
179
180
181#define advance_with_carry(cursor, carry, rslt)\
182do {\
183  SIMD_type shift_out = simd_srli_64(cursor, 63);\
184  SIMD_type low_bits = simd_mergel_64(shift_out, carry);\
185  carry = sisd_srli(shift_out, 64);\
186  rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\
187} while(0)
188
189#endif
190#endif
191
192
Note: See TracBrowser for help on using the repository browser.