[1544] | 1 | #ifndef CARRYQ_HPP_ |
---|
| 2 | #define CARRYQ_HPP_ |
---|
| 3 | |
---|
[1950] | 4 | /*============================================================================= |
---|
[2005] | 5 | carryQ.hpp - Pablo compiler support for carry introduction. |
---|
| 6 | Ken Herdy, Robert D. Cameron |
---|
[2719] | 7 | Copyright (C) 2012, Robert D. Cameron, Kenneth S. Herdy. |
---|
| 8 | Licensed to the public under the Open Software License 3.0. |
---|
| 9 | Licensed to International Characters Inc. |
---|
| 10 | under the Academic Free License version 3.0. |
---|
[2005] | 11 | April 2012 |
---|
[1950] | 12 | =============================================================================*/ |
---|
| 13 | |
---|
[2958] | 14 | #include <string.h> |
---|
[1544] | 15 | #include <stdint.h> |
---|
| 16 | #include <iostream> |
---|
| 17 | |
---|
| 18 | #include "bitblock.hpp" |
---|
| 19 | #include "stdio.h" |
---|
| 20 | |
---|
[1950] | 21 | /////////////////////////////////////////////////////////////////////////////// |
---|
| 22 | // |
---|
| 23 | // Carry method variants. |
---|
| 24 | // |
---|
| 25 | // BitBlock_op_ci_co() - standard block non while loop statement and in final block if ignore the carry out |
---|
| 26 | // BitBlock_op_co() - standard block while loop and in final block while loop if ignore carry out |
---|
| 27 | // BitBlock_op_ci() - final block non while loop statement |
---|
| 28 | // BitBlock_op() - final while loop statement |
---|
| 29 | // |
---|
| 30 | // BitBlock_op_ci(), BitBlock_op() methods not implemented to reduce the total number of |
---|
| 31 | // methods and Pablo compiler complexity. |
---|
| 32 | // |
---|
| 33 | /////////////////////////////////////////////////////////////////////////////// |
---|
[1544] | 34 | |
---|
[1694] | 35 | #define interpose32(x,y,pos) interpose32_<pos>(x,y) |
---|
[1663] | 36 | template<uint32_t n> |
---|
[1694] | 37 | IDISA_ALWAYS_INLINE BitBlock interpose32_(BitBlock s, BitBlock s32) { |
---|
[1662] | 38 | return simd_or(simd<32>::slli<n>(s), simd<32>::srli<32-n>(s32)); |
---|
[1659] | 39 | } |
---|
| 40 | |
---|
[2207] | 41 | template<uint32_t n> |
---|
| 42 | IDISA_ALWAYS_INLINE BitBlock interpose64_(BitBlock s, BitBlock s64) { |
---|
| 43 | return simd_or(simd<64>::slli<n>(s), simd<64>::srli<64-n>(s64)); |
---|
| 44 | } |
---|
| 45 | |
---|
[2958] | 46 | template <uint16_t CarryCount, uint16_t AdvanceNCount> class CarryArray; |
---|
[1544] | 47 | |
---|
[2958] | 48 | #define CarryDeclare(name, carry1_count, carryN_count)\ |
---|
| 49 | CarryArray<carry1_count, carryN_count> name; |
---|
[1544] | 50 | |
---|
| 51 | // Array of BitBlock implementation. |
---|
[2207] | 52 | template <uint16_t CarryCount, uint16_t AdvanceNCount> |
---|
[2213] | 53 | class CarryArray { |
---|
[1544] | 54 | |
---|
| 55 | public: |
---|
[1933] | 56 | |
---|
| 57 | #define Carry0 simd<BLOCK_SIZE>::constant<0>() |
---|
| 58 | #define Carry1 simd<BLOCK_SIZE>::constant<1>() |
---|
| 59 | |
---|
[2221] | 60 | BitBlock cq[CarryCount + AdvanceNCount]; |
---|
| 61 | //BitBlock pending64[AdvanceNCount]; |
---|
[1544] | 62 | CarryArray() |
---|
| 63 | { |
---|
[2221] | 64 | memset (cq, 0, sizeof(BitBlock) * (CarryCount + AdvanceNCount)); |
---|
| 65 | //memset(pending64, 0, sizeof(BitBlock) * AdvanceNCount); |
---|
[1544] | 66 | } |
---|
| 67 | ~CarryArray() {} |
---|
| 68 | |
---|
[1996] | 69 | IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_ci_co(BitBlock strm, BitBlock carryin, uint16_t carryno) |
---|
| 70 | { |
---|
| 71 | BitBlock rslt; |
---|
| 72 | advance_with_carry(strm, carryin, cq[carryno], rslt); |
---|
| 73 | return rslt; |
---|
| 74 | } |
---|
| 75 | |
---|
| 76 | IDISA_ALWAYS_INLINE BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, BitBlock carryin, const uint16_t carryno) |
---|
| 77 | { |
---|
| 78 | BitBlock sum; |
---|
| 79 | adc(strm1, strm2, carryin, cq[carryno], sum); |
---|
| 80 | return sum; |
---|
| 81 | } |
---|
| 82 | |
---|
| 83 | IDISA_ALWAYS_INLINE BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, BitBlock carryin, uint16_t carryno) |
---|
| 84 | { |
---|
| 85 | BitBlock diff; |
---|
| 86 | sbb(strm1, strm2, carryin, cq[carryno], diff); |
---|
| 87 | return diff; |
---|
| 88 | } |
---|
| 89 | |
---|
| 90 | IDISA_ALWAYS_INLINE BitBlock BitBlock_scantofirst(BitBlock charclass, BitBlock carryin, uint16_t carryno) |
---|
| 91 | { |
---|
| 92 | BitBlock marker; |
---|
[2261] | 93 | // BitBlock c = carry_flip(carryin); |
---|
| 94 | adc(simd<BLOCK_SIZE>::constant<0>(), simd_not(charclass), carryin, cq[carryno], marker); |
---|
| 95 | // cq[carryno] = carry_flip(cq[carryno]); |
---|
[1996] | 96 | return simd_and(marker, charclass); |
---|
| 97 | } |
---|
| 98 | |
---|
| 99 | IDISA_ALWAYS_INLINE BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, BitBlock carryin, uint16_t carryno) |
---|
| 100 | { |
---|
| 101 | BitBlock markers1; |
---|
| 102 | adc(markers0, charclass, carryin, cq[carryno], markers1); |
---|
| 103 | return simd_andc(markers1, charclass); |
---|
| 104 | } |
---|
| 105 | |
---|
[2042] | 106 | IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_then_scanthru(BitBlock markers0, BitBlock charclass, BitBlock carryin, uint16_t carryno) |
---|
| 107 | { |
---|
| 108 | BitBlock markers1; |
---|
[2207] | 109 | //assert(!bitblock::any(simd_and(markers0, charclass))); |
---|
[2042] | 110 | adc(markers0, simd_or(charclass, markers0), carryin, cq[carryno], markers1); |
---|
| 111 | return simd_andc(markers1, charclass); |
---|
| 112 | } |
---|
| 113 | |
---|
| 114 | IDISA_ALWAYS_INLINE BitBlock BitBlock_span_upto(BitBlock starts, BitBlock follows, BitBlock carryin, uint16_t carryno) |
---|
| 115 | { |
---|
| 116 | BitBlock span; |
---|
| 117 | sbb(follows, starts, carryin, cq[carryno], span); |
---|
| 118 | return span; |
---|
| 119 | } |
---|
| 120 | |
---|
| 121 | IDISA_ALWAYS_INLINE BitBlock BitBlock_inclusive_span(BitBlock starts, BitBlock ends, BitBlock carryin, uint16_t carryno) |
---|
| 122 | { |
---|
| 123 | BitBlock span; |
---|
| 124 | sbb(ends, starts, carryin, cq[carryno], span); |
---|
| 125 | return simd_or(span, ends); |
---|
| 126 | } |
---|
| 127 | |
---|
| 128 | IDISA_ALWAYS_INLINE BitBlock BitBlock_exclusive_span(BitBlock starts, BitBlock ends, BitBlock carryin, uint16_t carryno) |
---|
| 129 | { |
---|
| 130 | BitBlock span; |
---|
| 131 | sbb(ends, starts, carryin, cq[carryno], span); |
---|
| 132 | return simd_andc(span, starts); |
---|
| 133 | } |
---|
| 134 | |
---|
| 135 | |
---|
[1996] | 136 | IDISA_ALWAYS_INLINE BitBlock BitBlock_advance32_ci_co(BitBlock strm, uint32_t pending_in, uint32_t & pending_out) |
---|
| 137 | { |
---|
| 138 | pending_out = (uint32_t) mvmd<32>::extract< (sizeof(BitBlock)/sizeof(pending_out))-1 >(strm); |
---|
| 139 | return simd_or(simd<BLOCK_SIZE>::slli<32>(strm), mvmd<BLOCK_SIZE>::fill((uint64_t)pending_in)); |
---|
| 140 | } |
---|
| 141 | |
---|
[2207] | 142 | template <int n> IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_n_(BitBlock strm, BitBlock pending_in, uint16_t pendingno) |
---|
| 143 | { |
---|
| 144 | BitBlock half_block_shifted = esimd<BLOCK_SIZE/2>::mergel(strm, pending_in); |
---|
[2221] | 145 | cq[CarryCount + pendingno] = bitblock::srli<BLOCK_SIZE/2>(strm); |
---|
| 146 | //pending64[pendingno] = bitblock::srli<BLOCK_SIZE/2>(strm); |
---|
[2207] | 147 | BitBlock result = simd_or(simd<BLOCK_SIZE/2>::srli<(BLOCK_SIZE/2)-n>(half_block_shifted), |
---|
| 148 | simd<BLOCK_SIZE/2>::slli<n>(strm)); |
---|
| 149 | return result; |
---|
| 150 | } |
---|
| 151 | |
---|
[1544] | 152 | IDISA_ALWAYS_INLINE bool CarryTest(uint16_t carryno, uint16_t carry_count) |
---|
| 153 | { |
---|
| 154 | BitBlock c1 = cq[carryno]; |
---|
| 155 | int ubound = carryno + carry_count; |
---|
| 156 | for (int i = carryno + 1; i < ubound ; i++) { |
---|
| 157 | c1 = carry_or(c1, cq[i]); |
---|
| 158 | } |
---|
| 159 | return test_carry(c1); |
---|
| 160 | } |
---|
| 161 | |
---|
[2794] | 162 | // Experimental |
---|
| 163 | IDISA_ALWAYS_INLINE BitBlock PabloJCarryTest(uint16_t carryno, uint16_t carry_count) |
---|
| 164 | { |
---|
| 165 | BitBlock c1 = cq[carryno]; |
---|
| 166 | int ubound = carryno + carry_count; |
---|
| 167 | for (int i = carryno + 1; i < ubound ; i++) { |
---|
| 168 | c1 = carry_or(c1, cq[i]); |
---|
| 169 | } |
---|
| 170 | return c1; |
---|
| 171 | } |
---|
| 172 | |
---|
[1544] | 173 | IDISA_ALWAYS_INLINE void CarryDequeueEnqueue(uint16_t carryno, uint16_t carry_count) |
---|
| 174 | { |
---|
| 175 | return; |
---|
| 176 | } |
---|
| 177 | |
---|
| 178 | IDISA_ALWAYS_INLINE void CarryQ_Adjust(uint16_t carry_count) |
---|
| 179 | { |
---|
| 180 | return; |
---|
| 181 | } |
---|
| 182 | |
---|
[2213] | 183 | IDISA_ALWAYS_INLINE void CarryCombine(BitBlock local_cq[], uint16_t carryno, uint16_t carry_count) |
---|
[1544] | 184 | { |
---|
| 185 | for (int i = 0; i < carry_count; i++) { |
---|
| 186 | cq[carryno+i] = carry_or(cq[carryno+i], local_cq[i]); |
---|
| 187 | } |
---|
| 188 | } |
---|
| 189 | |
---|
[2268] | 190 | IDISA_ALWAYS_INLINE void CarryCombine1(uint16_t carryno, uint16_t carry2) |
---|
| 191 | { |
---|
| 192 | cq[carryno] = carry_or(cq[carryno], cq[carry2]); |
---|
| 193 | cq[carry2] = Carry0; |
---|
| 194 | } |
---|
| 195 | |
---|
[1996] | 196 | IDISA_ALWAYS_INLINE BitBlock get_carry_in(uint16_t carryno) const |
---|
| 197 | { |
---|
| 198 | return carry2bitblock(cq[carryno]); |
---|
| 199 | } |
---|
| 200 | |
---|
[2221] | 201 | // IDISA_ALWAYS_INLINE BitBlock get_pending64(uint16_t advance_n_blkno) const |
---|
| 202 | // { |
---|
| 203 | // return pending64[advance_n_blkno]; |
---|
| 204 | // } |
---|
| 205 | |
---|
| 206 | IDISA_ALWAYS_INLINE BitBlock get_pending64(uint16_t advance_n_blkno) const |
---|
| 207 | { |
---|
| 208 | return cq[CarryCount + advance_n_blkno]; |
---|
| 209 | } |
---|
| 210 | |
---|
[2261] | 211 | //private: |
---|
[1544] | 212 | // helpers |
---|
| 213 | IDISA_ALWAYS_INLINE BitBlock carry_flip(BitBlock carry) const |
---|
| 214 | { |
---|
| 215 | return simd_xor(carry, Carry1); |
---|
| 216 | } |
---|
| 217 | |
---|
| 218 | IDISA_ALWAYS_INLINE bool test_carry(BitBlock carry) const |
---|
| 219 | { |
---|
[1826] | 220 | return bitblock::any(carry); |
---|
[1544] | 221 | } |
---|
| 222 | |
---|
| 223 | IDISA_ALWAYS_INLINE BitBlock carry_or(BitBlock carry1, BitBlock carry2) const |
---|
| 224 | { |
---|
| 225 | return simd_or(carry1, carry2); |
---|
| 226 | } |
---|
[1933] | 227 | |
---|
| 228 | #undef Carry0 |
---|
| 229 | #undef Carry1 |
---|
[1544] | 230 | |
---|
| 231 | }; |
---|
| 232 | |
---|
[1950] | 233 | #endif // CARRYQ_HPP_ |
---|