 Timestamp:
 Mar 10, 2011, 6:28:29 PM (8 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/carryQ.h
r925 r928 160 160 SIMD_type carryQ_co_mask = sisd_slli(carryQ_ci_mask, 127); 161 161 162 #define carryQ_adc128_ci_co(x, y, carryQ, sum) \163 do{ \164 SIMD_type gen = simd_and(x, y); \165 SIMD_type prop = simd_or(x, y); \166 SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask)); \167 carryQ = simd_srli_64(carryQ, 1); \168 SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \169 sum = simd_add_64(c1, partial); \170 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask)); \171 } while(0)172 173 #define carryQ_adc128_co(x, y, carryQ, sum) \174 do{ \175 SIMD_type gen = simd_and(x, y); \176 SIMD_type prop = simd_or(x, y); \177 SIMD_type partial = simd_add_64(x, y); \178 carryQ = simd_srli_64(carryQ, 1); \179 SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); \180 sum = simd_add_64(c1, partial); \181 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask)); \182 } while(0)183 184 #define carryQ_sbb128_ci_co(x, y, carryQ, difference) \185 do {\186 SIMD_type gen = simd_andc(y, x); \187 SIMD_type prop = simd_not(simd_xor(x, y)); \188 SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask)); \189 carryQ = simd_srli_64(carryQ, 1); \190 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \191 difference = simd_sub_64(partial, b1); \192 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, difference)), carryQ_co_mask)); \193 }while(0)194 195 #define carryQ_sbb128_co(x, y, carryQ, difference) \196 do {\197 SIMD_type gen = simd_andc(y, x); \198 SIMD_type prop = simd_not(simd_xor(x, y)); \199 SIMD_type partial = simd_sub_64(x, y); \200 carryQ = simd_srli_64(carryQ, 1); \201 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); \202 difference = simd_sub_64(partial, b1); \203 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, difference)), carryQ_co_mask)); \204 }while(0)205 206 #define carryQ_advance_with_carry_ci_co(cursor, carryQ, rslt)\207 do {\208 SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);\209 SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask);\210 carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out); \211 SIMD_type shift_out = simd_srli_64(cursor, 63);\212 SIMD_type low_bits = simd_mergel_64(shift_out, carry_in);\213 rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\214 } while(0)215 216 #define carryQ_advance_with_carry_co(cursor, carryQ, rslt)\217 do {\218 SIMD_type carry_out = simd_and(cursor, carryQ_co_mask);\219 carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out); \220 SIMD_type shift_out = simd_srli_64(cursor, 63);\221 SIMD_type low_bits = simd_mergel_64(shift_out, simd_const_1(0));\222 rslt = simd_or(simd_add_64(cursor, cursor), low_bits);\223 } while(0)224 225 226 162 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, const int carryno) __attribute__ ((always_inline)); 227 163 … … 264 200 static inline void CarryCombine(CarryQtype & cq, CarryQtype & local_cq, const int carryno, const int carry_count) __attribute__ ((always_inline)); 265 201 266 267 268 269 270 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, const int carryno) { 271 BitBlock rslt; 272 carryQ_advance_with_carry_ci_co(strm, cq, rslt); 273 return rslt; 274 } 275 276 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & cq, const int carryno) { 277 BitBlock rslt; 278 carryQ_advance_with_carry_co(strm, cq, rslt); 279 return rslt; 280 } 281 282 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) { 283 BitBlock sum; 284 carryQ_adc128_ci_co(strm1, strm2, cq, sum); 285 return sum; 286 } 287 288 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) { 289 BitBlock sum; 290 carryQ_adc128_co(strm1, strm2, cq, sum); 291 return sum; 292 } 293 294 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) { 295 BitBlock diff; 296 carryQ_sbb128_ci_co(strm1, strm2, cq, diff); 297 return diff; 298 } 299 300 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) { 301 BitBlock diff; 302 carryQ_sbb128_co(strm1, strm2, cq, diff); 303 return diff; 202 static inline BitBlock BitBlock_advance_ci_co(BitBlock cursor, CarryQtype & carryQ, const int carryno) { 203 SIMD_type carry_out = simd_and(cursor, carryQ_co_mask); 204 SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask); 205 carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out); 206 SIMD_type shift_out = simd_srli_64(cursor, 63); 207 SIMD_type low_bits = simd_mergel_64(shift_out, carry_in); 208 return simd_or(simd_add_64(cursor, cursor), low_bits); 209 } 210 211 static inline BitBlock BitBlock_advance_co(BitBlock cursor, CarryQtype & carryQ, const int carryno) { 212 SIMD_type carry_out = simd_and(cursor, carryQ_co_mask); 213 carryQ = simd_or(simd_srli_64(carryQ, 1), carry_out); 214 SIMD_type shift_out = simd_srli_64(cursor, 63); 215 SIMD_type low_bits = simd_mergel_64(shift_out, simd_const_1(0)); 216 return simd_or(simd_add_64(cursor, cursor), low_bits); 217 } 218 219 static inline BitBlock BitBlock_advance_ci(BitBlock cursor, CarryQtype & carryQ, const int carryno) { 220 SIMD_type carry_in = simd_and(carryQ, carryQ_ci_mask); 221 carryQ = simd_srli_64(carryQ, 1); 222 SIMD_type shift_out = simd_srli_64(cursor, 63); 223 SIMD_type low_bits = simd_mergel_64(shift_out, carry_in); 224 return simd_or(simd_add_64(cursor, cursor), low_bits); 225 } 226 227 static inline BitBlock BitBlock_advance(BitBlock cursor) { 228 return sisd_srli(cursor, 1); 229 } 230 231 232 static inline BitBlock BitBlock_add_ci_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) { 233 BitBlock sum; 234 SIMD_type gen = simd_and(x, y); 235 SIMD_type prop = simd_or(x, y); 236 SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask)); 237 carryQ = simd_srli_64(carryQ, 1); 238 SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); 239 sum = simd_add_64(c1, partial); 240 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask)); 241 return sum; 242 } 243 244 static inline BitBlock BitBlock_add_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) { 245 BitBlock sum; 246 SIMD_type gen = simd_and(x, y); 247 SIMD_type prop = simd_or(x, y); 248 SIMD_type partial = simd_add_64(x, y); 249 carryQ = simd_srli_64(carryQ, 1); 250 SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); 251 sum = simd_add_64(c1, partial); 252 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_andc(prop, sum)), carryQ_co_mask)); 253 return sum; 254 } 255 256 static inline BitBlock BitBlock_add_ci(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) { 257 BitBlock sum; 258 SIMD_type gen = simd_and(x, y); 259 SIMD_type prop = simd_or(x, y); 260 SIMD_type partial = simd_add_64(simd_add_64(x, y), simd_and(carryQ, carryQ_ci_mask)); 261 carryQ = simd_srli_64(carryQ, 1); 262 SIMD_type c1 = sisd_slli(simd_srli_64(simd_or(gen, simd_andc(prop, partial)), 63), 64); 263 sum = simd_add_64(c1, partial); 264 return sum; 265 } 266 267 static inline BitBlock BitBlock_add(BitBlock x, BitBlock y) { 268 return simd_add_128(x, y); 269 } 270 271 static inline BitBlock BitBlock_sub_ci_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) { 272 BitBlock diff; 273 SIMD_type gen = simd_andc(y, x); 274 SIMD_type prop = simd_not(simd_xor(x, y)); 275 SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask)); 276 carryQ = simd_srli_64(carryQ, 1); 277 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); 278 diff = simd_sub_64(partial, b1); 279 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, diff)), carryQ_co_mask)); 280 return diff; 281 } 282 283 static inline BitBlock BitBlock_sub_co(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) { 284 BitBlock diff; 285 SIMD_type gen = simd_andc(y, x); 286 SIMD_type prop = simd_not(simd_xor(x, y)); 287 SIMD_type partial = simd_sub_64(x, y); 288 carryQ = simd_srli_64(carryQ, 1); 289 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); 290 diff = simd_sub_64(partial, b1); 291 carryQ = simd_or(carryQ, simd_and(simd_or(gen, simd_and(prop, diff)), carryQ_co_mask)); 292 return diff; 293 } 294 295 static inline BitBlock BitBlock_sub_ci(BitBlock x, BitBlock y, CarryQtype & carryQ, const int carryno) { 296 BitBlock diff; 297 SIMD_type gen = simd_andc(y, x); 298 SIMD_type prop = simd_not(simd_xor(x, y)); 299 SIMD_type partial = simd_sub_64(simd_sub_64(x, y), simd_and(carryQ, carryQ_ci_mask)); 300 carryQ = simd_srli_64(carryQ, 1); 301 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); 302 diff = simd_sub_64(partial, b1); 303 return diff; 304 } 305 306 static inline BitBlock BitBlock_sub(BitBlock x, BitBlock y) { 307 BitBlock diff; 308 SIMD_type gen = simd_andc(y, x); 309 SIMD_type prop = simd_not(simd_xor(x, y)); 310 SIMD_type partial = simd_sub_64(x, y); 311 SIMD_type b1 = sisd_slli(simd_srli_64(simd_or(gen, simd_and(prop, partial)), 63), 64); 312 diff = simd_sub_64(partial, b1); 313 return diff; 304 314 } 305 315 306 316 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) { 307 BitBlock markers1; 308 carryQ_adc128_ci_co(markers0, charclass, cq, markers1); 309 return simd_andc(markers1, charclass); 317 return simd_andc(BitBlock_add_ci_co(markers0, charclass, cq, carryno), charclass); 310 318 } 311 319 312 320 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) { 313 BitBlock markers1; 314 carryQ_adc128_co(markers0, charclass, cq, markers1); 315 return simd_andc(markers1, charclass); 321 return simd_andc(BitBlock_add_co(markers0, charclass, cq, carryno), charclass); 322 } 323 324 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) { 325 return simd_andc(BitBlock_add_ci(markers0, charclass, cq, carryno), charclass); 326 } 327 328 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) { 329 return simd_andc(BitBlock_add(markers0, charclass), charclass); 316 330 } 317 331 … … 539 553 540 554 541 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, const int carryno) { 542 BitBlock rslt; 543 carryQ_advance_with_carry_ci_co(strm, cq, rslt); 544 return rslt; 545 } 546 547 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & cq, const int carryno) { 548 BitBlock rslt; 549 carryQ_advance_with_carry_co(strm, cq, rslt); 550 return rslt; 551 } 555 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & carryQ, const int carryno) { 556 BitBlock_int64 x, z; 557 x.bitblock = strm; 558 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 559 "adc %[z1], %[z1]\n\t" 560 "adc %[z2], %[z2]\n\t" 561 "adc $0, %[cq]\n\t" 562 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ) 563 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 564 "[cq]" (carryQ) 565 : "cc"); 566 return z.bitblock; 567 } 568 569 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & carryQ, const int carryno) { 570 BitBlock_int64 x, z; 571 x.bitblock = strm; 572 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 573 "add %[z1], %[z1]\n\t" 574 "adc %[z2], %[z2]\n\t" 575 "adc $0, %[cq]\n\t" 576 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ) 577 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 578 "[cq]" (carryQ) 579 : "cc"); 580 return z.bitblock; 581 } 582 583 static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype & carryQ, const int carryno) { 584 BitBlock_int64 x, z; 585 x.bitblock = strm; 586 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 587 "adc %[z1], %[z1]\n\t" 588 "add %[z2], %[z2]\n\t" 589 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ) 590 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 591 "[cq]" (carryQ) 592 : "cc"); 593 return z.bitblock; 594 } 595 596 static inline BitBlock BitBlock_advance(BitBlock strm) { 597 BitBlock_int64 x, z; 598 x.bitblock = strm; 599 __asm__ __volatile__ ( 600 "add %[z1], %[z1]\n\t" 601 "adc %[z2], %[z2]\n\t" 602 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]) 603 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]) 604 : "cc"); 605 return z.bitblock; 606 } 607 552 608 553 609 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) {
Note: See TracChangeset
for help on using the changeset viewer.