Changeset 454 for proto/Compiler/workspace/sse_simd.h
 Timestamp:
 Jul 9, 2010, 3:37:39 PM (9 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

proto/Compiler/workspace/sse_simd.h
r433 r454 27 27 #endif 28 28 typedef __m128i SIMD_type; 29 30 31 #define double_int64_adc(x1, x2, y1, y2, rslt1, rslt2, carry) \32 __asm__ ("sahf\n\t" \33 "adc %[e1], %[z1]\n\t" \34 "adc %[e2], %[z2]\n\t" \35 "lahf\n\t" \36 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \37 : "[z1]" (x1), "[z2]" (x2), \38 [e1] "r" (y1), [e2] "r" (y2), \39 "[carryflag]" (carry) \40 : "cc")41 42 #define adc128(first, second, carry, sum) \43 do\44 {\45 union {__m128i bitblock;\46 uint64_t int64[2];} rslt;\47 \48 union {__m128i bitblock;\49 uint64_t int64[2];} x;\50 \51 union {__m128i bitblock;\52 uint64_t int64[2];} y;\53 \54 x.bitblock = first;\55 y.bitblock = second;\56 \57 double_int64_adc(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carry);\58 sum = rslt.bitblock;\59 }while(0)60 61 62 63 #define double_int64_sbb(x1, x2, y1, y2, rslt1, rslt2, carry) \64 __asm__ ("sahf\n\t" \65 "sbb %[e1], %[z1]\n\t" \66 "sbb %[e2], %[z2]\n\t" \67 "lahf\n\t" \68 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [carryflag] "=a" (carry) \69 : "[z1]" (x1), "[z2]" (x2), \70 [e1] "r" (y1), [e2] "r" (y2), \71 "[carryflag]" (carry) \72 : "cc")73 74 #define sbb128(first, second, carry, sum) \75 do\76 { union {__m128i bitblock;\77 uint64_t int64[2];} rslt;\78 \79 union {__m128i bitblock;\80 uint64_t int64[2];} x;\81 \82 union {__m128i bitblock;\83 uint64_t int64[2];} y;\84 \85 x.bitblock = first;\86 y.bitblock = second;\87 \88 double_int64_sbb(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \89 rslt.int64[0], rslt.int64[1], carry);\90 sum = rslt.bitblock;\91 }while(0)92 93 94 95 #define adc128_simd(x, y, carry, sum) \96 do{ \97 SIMD_type gen = simd_and(x, y); \98 SIMD_type prop = simd_or(x, y); \99 SIMD_type partial = simd_add_64(simd_add_64(x, y), carry); \100 SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \101 sum = simd_add_64(c1, partial); \102 carry = sisd_srli(simd_or(gen, simd_andc(prop, sum)), 127); \103 } while(0)104 105 106 #define sbb128_simd(x, y, borrow, difference) \107 do {\108 SIMD_type gen = simd_andc(y, x); \109 SIMD_type prop = simd_not(simd_xor(x, y)); \110 SIMD_type partial = simd_sub_64(simd_sub_64(x, y), borrow); \111 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \112 difference = simd_sub_64(partial, b1); \113 borrow = sisd_srli(simd_or(gen, simd_and(prop, difference)), 127); \114 }while(0)115 116 117 #define advance_with_carry(cursor, carry, rslt)\118 do{\119 SIMD_type shift_out = simd_srli_64(cursor, 63);\120 SIMD_type low_bits = simd_mergel_64(shift_out, carry);\121 carry = sisd_srli(shift_out, 64);\122 rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\123 }while(0)124 125 126 29 127 30
Note: See TracChangeset
for help on using the changeset viewer.