[3909] | 1 | #ifndef CARRYQ_HPP_ |
---|
| 2 | #define CARRYQ_HPP_ |
---|
| 3 | |
---|
| 4 | /*============================================================================= |
---|
| 5 | carryQ.hpp - Pablo compiler support for carry introduction. |
---|
| 6 | Ken Herdy, Robert D. Cameron |
---|
| 7 | Copyright (C) 2012, Robert D. Cameron, Kenneth S. Herdy. |
---|
| 8 | Licensed to the public under the Open Software License 3.0. |
---|
| 9 | Licensed to International Characters Inc. |
---|
| 10 | under the Academic Free License version 3.0. |
---|
| 11 | April 2012 |
---|
| 12 | =============================================================================*/ |
---|
| 13 | |
---|
| 14 | #include <string.h> |
---|
| 15 | #include <stdint.h> |
---|
| 16 | #include <iostream> |
---|
| 17 | |
---|
| 18 | #include "bitblock.hpp" |
---|
| 19 | #include "stdio.h" |
---|
| 20 | |
---|
| 21 | /////////////////////////////////////////////////////////////////////////////// |
---|
| 22 | // |
---|
| 23 | // Carry method variants. |
---|
| 24 | // |
---|
| 25 | // BitBlock_op_ci_co() - standard block non while loop statement and in final block if ignore the carry out |
---|
| 26 | // BitBlock_op_co() - standard block while loop and in final block while loop if ignore carry out |
---|
| 27 | // BitBlock_op_ci() - final block non while loop statement |
---|
| 28 | // BitBlock_op() - final while loop statement |
---|
| 29 | // |
---|
| 30 | // BitBlock_op_ci(), BitBlock_op() methods not implemented to reduce the total number of |
---|
| 31 | // methods and Pablo compiler complexity. |
---|
| 32 | // |
---|
| 33 | /////////////////////////////////////////////////////////////////////////////// |
---|
| 34 | |
---|
| 35 | #define interpose32(x,y,pos) interpose32_<pos>(x,y) |
---|
| 36 | template<uint32_t n> |
---|
| 37 | IDISA_ALWAYS_INLINE BitBlock interpose32_(BitBlock s, BitBlock s32) { |
---|
| 38 | return simd_or(simd<32>::slli<n>(s), simd<32>::srli<32-n>(s32)); |
---|
| 39 | } |
---|
| 40 | |
---|
| 41 | template<uint32_t n> |
---|
| 42 | IDISA_ALWAYS_INLINE BitBlock interpose64_(BitBlock s, BitBlock s64) { |
---|
| 43 | return simd_or(simd<64>::slli<n>(s), simd<64>::srli<64-n>(s64)); |
---|
| 44 | } |
---|
| 45 | |
---|
| 46 | template <uint16_t CarryCount, uint16_t AdvanceNCount> class CarryArray; |
---|
| 47 | |
---|
| 48 | #define LocalCarryCombine(carrySet, localCarry, carryNo, carryCount)\ |
---|
| 49 | carrySet.CarryCombine(localCarry.cq, carryNo, carryCount); |
---|
| 50 | |
---|
| 51 | #define CarryDeclare(name, carry1_count, carryN_count)\ |
---|
| 52 | CarryArray<carry1_count, carryN_count> name; |
---|
| 53 | |
---|
| 54 | #define LocalCarryDeclare(name, count)\ |
---|
| 55 | CarryArray<count, 0> name; |
---|
| 56 | |
---|
| 57 | // Array of BitBlock implementation. |
---|
| 58 | template <uint16_t CarryCount, uint16_t AdvanceNCount> |
---|
| 59 | class CarryArray { |
---|
| 60 | |
---|
| 61 | public: |
---|
| 62 | |
---|
| 63 | #ifndef CARRY64 |
---|
| 64 | #define carry_value_0 simd<BLOCK_SIZE>::constant<0>() |
---|
| 65 | #define carry_value_1 simd<BLOCK_SIZE>::constant<1>() |
---|
| 66 | #endif |
---|
| 67 | #ifdef CARRY64 |
---|
| 68 | #define carry_value_0 0 |
---|
| 69 | #define carry_value_1 1 |
---|
| 70 | #endif |
---|
| 71 | |
---|
| 72 | carry_t cq[CarryCount + AdvanceNCount]; |
---|
| 73 | //BitBlock pending64[AdvanceNCount]; |
---|
| 74 | CarryArray() |
---|
| 75 | { |
---|
| 76 | memset (cq, 0, sizeof(carry_t) * (CarryCount + AdvanceNCount)); |
---|
| 77 | //memset(pending64, 0, sizeof(BitBlock) * AdvanceNCount); |
---|
| 78 | } |
---|
| 79 | ~CarryArray() {} |
---|
| 80 | |
---|
| 81 | IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_ci_co(BitBlock strm, carry_t carryin, uint16_t carryno) |
---|
| 82 | { |
---|
| 83 | BitBlock rslt; |
---|
| 84 | advance_with_carry(strm, carryin, cq[carryno], rslt); |
---|
| 85 | return rslt; |
---|
| 86 | } |
---|
| 87 | |
---|
| 88 | IDISA_ALWAYS_INLINE BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, carry_t carryin, const uint16_t carryno) |
---|
| 89 | { |
---|
| 90 | BitBlock sum; |
---|
| 91 | adc(strm1, strm2, carryin, cq[carryno], sum); |
---|
| 92 | return sum; |
---|
| 93 | } |
---|
| 94 | |
---|
| 95 | IDISA_ALWAYS_INLINE BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, carry_t carryin, uint16_t carryno) |
---|
| 96 | { |
---|
| 97 | BitBlock diff; |
---|
| 98 | sbb(strm1, strm2, carryin, cq[carryno], diff); |
---|
| 99 | return diff; |
---|
| 100 | } |
---|
| 101 | |
---|
| 102 | IDISA_ALWAYS_INLINE BitBlock BitBlock_scantofirst(BitBlock charclass, carry_t carryin, uint16_t carryno) |
---|
| 103 | { |
---|
| 104 | BitBlock marker; |
---|
| 105 | // BitBlock c = carry_flip(carryin); |
---|
| 106 | adc(simd<BLOCK_SIZE>::constant<0>(), simd_not(charclass), carryin, cq[carryno], marker); |
---|
| 107 | // cq[carryno] = carry_flip(cq[carryno]); |
---|
| 108 | return simd_and(marker, charclass); |
---|
| 109 | } |
---|
| 110 | |
---|
| 111 | IDISA_ALWAYS_INLINE BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, carry_t carryin, uint16_t carryno) |
---|
| 112 | { |
---|
| 113 | BitBlock markers1; |
---|
| 114 | adc(markers0, charclass, carryin, cq[carryno], markers1); |
---|
| 115 | return simd_andc(markers1, charclass); |
---|
| 116 | } |
---|
| 117 | |
---|
| 118 | IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_then_scanthru(BitBlock markers0, BitBlock charclass, carry_t carryin, uint16_t carryno) |
---|
| 119 | { |
---|
| 120 | BitBlock markers1; |
---|
| 121 | //assert(!bitblock::any(simd_and(markers0, charclass))); |
---|
| 122 | adc(markers0, simd_or(charclass, markers0), carryin, cq[carryno], markers1); |
---|
| 123 | return simd_andc(markers1, charclass); |
---|
| 124 | } |
---|
| 125 | |
---|
| 126 | IDISA_ALWAYS_INLINE BitBlock BitBlock_span_upto(BitBlock starts, BitBlock follows, carry_t carryin, uint16_t carryno) |
---|
| 127 | { |
---|
| 128 | BitBlock span; |
---|
| 129 | sbb(follows, starts, carryin, cq[carryno], span); |
---|
| 130 | return span; |
---|
| 131 | } |
---|
| 132 | |
---|
| 133 | IDISA_ALWAYS_INLINE BitBlock BitBlock_inclusive_span(BitBlock starts, BitBlock ends, carry_t carryin, uint16_t carryno) |
---|
| 134 | { |
---|
| 135 | BitBlock span; |
---|
| 136 | sbb(ends, starts, carryin, cq[carryno], span); |
---|
| 137 | return simd_or(span, ends); |
---|
| 138 | } |
---|
| 139 | |
---|
| 140 | IDISA_ALWAYS_INLINE BitBlock BitBlock_exclusive_span(BitBlock starts, BitBlock ends, carry_t carryin, uint16_t carryno) |
---|
| 141 | { |
---|
| 142 | BitBlock span; |
---|
| 143 | sbb(ends, starts, carryin, cq[carryno], span); |
---|
| 144 | return simd_andc(span, starts); |
---|
| 145 | } |
---|
| 146 | |
---|
| 147 | /* |
---|
| 148 | IDISA_ALWAYS_INLINE BitBlock BitBlock_advance32_ci_co(BitBlock strm, uint32_t pending_in, uint32_t & pending_out) |
---|
| 149 | { |
---|
| 150 | pending_out = (uint32_t) mvmd<32>::extract< (sizeof(BitBlock)/sizeof(pending_out))-1 >(strm); |
---|
| 151 | return simd_or(simd<BLOCK_SIZE>::slli<32>(strm), mvmd<BLOCK_SIZE>::fill((uint64_t)pending_in)); |
---|
| 152 | } |
---|
| 153 | */ |
---|
| 154 | |
---|
| 155 | /* |
---|
| 156 | template <int n> IDISA_ALWAYS_INLINE BitBlock BitBlock_advance_n_(BitBlock strm, carry_t pending_in, uint16_t pendingno) |
---|
| 157 | { |
---|
| 158 | BitBlock half_block_shifted = esimd<BLOCK_SIZE/2>::mergel(strm, pending_in); |
---|
| 159 | cq[CarryCount + pendingno] = bitblock::srli<BLOCK_SIZE/2>(strm); |
---|
| 160 | //pending64[pendingno] = bitblock::srli<BLOCK_SIZE/2>(strm); |
---|
| 161 | BitBlock result = simd_or(simd<BLOCK_SIZE/2>::srli<(BLOCK_SIZE/2)-n>(half_block_shifted), |
---|
| 162 | simd<BLOCK_SIZE/2>::slli<n>(strm)); |
---|
| 163 | return result; |
---|
| 164 | } |
---|
| 165 | */ |
---|
| 166 | |
---|
| 167 | IDISA_ALWAYS_INLINE bool CarryTest(uint16_t carryno, uint16_t carry_count) |
---|
| 168 | { |
---|
| 169 | carry_t c1 = cq[carryno]; |
---|
| 170 | int ubound = carryno + carry_count; |
---|
| 171 | for (int i = carryno + 1; i < ubound ; i++) { |
---|
| 172 | c1 = carry_or(c1, cq[i]); |
---|
| 173 | } |
---|
| 174 | return test_carry(c1); |
---|
| 175 | } |
---|
| 176 | |
---|
| 177 | IDISA_ALWAYS_INLINE carry_t CarryRange(uint16_t carryno, uint16_t carry_count) |
---|
| 178 | { |
---|
| 179 | carry_t c1 = cq[carryno]; |
---|
| 180 | int ubound = carryno + carry_count; |
---|
| 181 | for (int i = carryno + 1; i < ubound ; i++) { |
---|
| 182 | c1 = carry_or(c1, cq[i]); |
---|
| 183 | } |
---|
| 184 | return c1; |
---|
| 185 | } |
---|
| 186 | |
---|
| 187 | IDISA_ALWAYS_INLINE void CarryDequeueEnqueue(uint16_t carryno, uint16_t carry_count) |
---|
| 188 | { |
---|
| 189 | return; |
---|
| 190 | } |
---|
| 191 | |
---|
| 192 | // Deprecated (renamed) |
---|
| 193 | IDISA_ALWAYS_INLINE void CarryQ_Adjust(uint16_t carry_count) |
---|
| 194 | { |
---|
| 195 | return; |
---|
| 196 | } |
---|
| 197 | |
---|
| 198 | IDISA_ALWAYS_INLINE void CarryAdjust(uint16_t carry_count) |
---|
| 199 | { |
---|
| 200 | return; |
---|
| 201 | } |
---|
| 202 | |
---|
| 203 | IDISA_ALWAYS_INLINE void CarryCombine(carry_t local_cq[], uint16_t carryno, uint16_t carry_count) |
---|
| 204 | { |
---|
| 205 | for (int i = 0; i < carry_count; i++) { |
---|
| 206 | cq[carryno+i] = carry_or(cq[carryno+i], local_cq[i]); |
---|
| 207 | } |
---|
| 208 | } |
---|
| 209 | |
---|
| 210 | IDISA_ALWAYS_INLINE void CarryCombine1(uint16_t carryno, uint16_t carry2) |
---|
| 211 | { |
---|
| 212 | cq[carryno] = carry_or(cq[carryno], cq[carry2]); |
---|
| 213 | cq[carry2] = carry_value_0; |
---|
| 214 | } |
---|
| 215 | |
---|
| 216 | IDISA_ALWAYS_INLINE carry_t get_carry_in(uint16_t carryno) const |
---|
| 217 | { |
---|
| 218 | return cq[carryno]; |
---|
| 219 | } |
---|
| 220 | |
---|
| 221 | // Deprecated (renamed) |
---|
| 222 | IDISA_ALWAYS_INLINE carry_t GetCarry(uint16_t carryno) const |
---|
| 223 | { |
---|
| 224 | return cq[carryno]; |
---|
| 225 | } |
---|
| 226 | |
---|
| 227 | IDISA_ALWAYS_INLINE void SetCarry(carry_t carryVal, uint16_t carryno) |
---|
| 228 | { |
---|
| 229 | cq[carryno] = carryVal; |
---|
| 230 | } |
---|
| 231 | |
---|
| 232 | |
---|
| 233 | // Deprecated in PabloJ, retained for legacy compiler. |
---|
| 234 | IDISA_ALWAYS_INLINE carry_t get_pending64(uint16_t advance_n_blkno) const |
---|
| 235 | { |
---|
| 236 | return cq[CarryCount + advance_n_blkno]; |
---|
| 237 | } |
---|
| 238 | |
---|
| 239 | IDISA_ALWAYS_INLINE carry_t Pending64(uint16_t advance_n_blkno) const |
---|
| 240 | { |
---|
| 241 | return cq[CarryCount + advance_n_blkno]; |
---|
| 242 | } |
---|
| 243 | |
---|
| 244 | //private: |
---|
| 245 | // helpers |
---|
| 246 | #ifndef CARRY64 |
---|
| 247 | // Deprecated (renamed) |
---|
| 248 | IDISA_ALWAYS_INLINE carry_t carry_flip(carry_t carry) const |
---|
| 249 | { |
---|
| 250 | return simd_xor(carry, carry_value_1); |
---|
| 251 | } |
---|
| 252 | |
---|
| 253 | IDISA_ALWAYS_INLINE BitBlock CarryFlip(carry_t carry) const |
---|
| 254 | { |
---|
| 255 | return simd_xor(carry, carry_value_1); |
---|
| 256 | } |
---|
| 257 | |
---|
| 258 | IDISA_ALWAYS_INLINE bool test_carry(carry_t carry) const |
---|
| 259 | { |
---|
| 260 | return bitblock::any(carry); |
---|
| 261 | } |
---|
| 262 | |
---|
| 263 | IDISA_ALWAYS_INLINE carry_t carry_or(carry_t carry1, carry_t carry2) const |
---|
| 264 | { |
---|
| 265 | return simd_or(carry1, carry2); |
---|
| 266 | } |
---|
| 267 | #endif |
---|
| 268 | #ifdef CARRY64 |
---|
| 269 | // Deprecated (renamed) |
---|
| 270 | IDISA_ALWAYS_INLINE carry_t carry_flip(carry_t carry) const |
---|
| 271 | { |
---|
| 272 | return carry ^ 1; |
---|
| 273 | } |
---|
| 274 | |
---|
| 275 | IDISA_ALWAYS_INLINE BitBlock CarryFlip(carry_t carry) const |
---|
| 276 | { |
---|
| 277 | return carry ^ 1; |
---|
| 278 | } |
---|
| 279 | |
---|
| 280 | IDISA_ALWAYS_INLINE bool test_carry(carry_t carry) const |
---|
| 281 | { |
---|
| 282 | return carry != 0; |
---|
| 283 | } |
---|
| 284 | |
---|
| 285 | IDISA_ALWAYS_INLINE carry_t carry_or(carry_t carry1, carry_t carry2) const |
---|
| 286 | { |
---|
| 287 | return carry1 | carry2; |
---|
| 288 | } |
---|
| 289 | #endif |
---|
| 290 | |
---|
| 291 | }; |
---|
| 292 | |
---|
| 293 | #endif // CARRYQ_HPP_ |
---|