 Timestamp:
 Mar 10, 2011, 8:57:52 PM (8 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/lib/carryQ.h
r928 r929 315 315 316 316 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) { 317 317 return simd_andc(BitBlock_add_ci_co(markers0, charclass, cq, carryno), charclass); 318 318 } 319 319 320 320 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) { 321 321 return simd_andc(BitBlock_add_co(markers0, charclass, cq, carryno), charclass); 322 322 } 323 323 324 324 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) { 325 325 return simd_andc(BitBlock_add_ci(markers0, charclass, cq, carryno), charclass); 326 326 } 327 327 328 328 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) { 329 return simd_andc(BitBlock_add(markers0, charclass), charclass); 330 } 329 return simd_andc(BitBlock_add(markers0, charclass), charclass); 330 } 331 331 332 332 333 typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64; … … 390 391 typedef union {SIMD_type bitblock; uint64_t int64[2];} BitBlock_int64; 391 392 392 #define double_int64_adc_ci_co(x1, x2, y1, y2, rslt1, rslt2, carryQ) \393 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \394 "adc %[e1], %[z1]\n\t" \395 "adc %[e2], %[z2]\n\t" \396 "adc $0, %[cq]\n\t" \397 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \398 : "[z1]" (x1), "[z2]" (x2), \399 [e1] "r" (y1), [e2] "r" (y2), \400 "[cq]" (carryQ) \401 : "cc")402 403 404 #define carryQ_adc128_ci_co(first, second, carryQ, sum) \405 do {\406 BitBlock_int64 rslt, x, y;\407 x.bitblock = first;\408 y.bitblock = second;\409 double_int64_adc_ci_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carryQ);\410 sum = rslt.bitblock;\411 } while(0)412 413 #define double_int64_adc_co(x1, x2, y1, y2, rslt1, rslt2, carryQ) \414 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \415 "add %[e1], %[z1]\n\t" \416 "adc %[e2], %[z2]\n\t" \417 "adc $0, %[cq]\n\t" \418 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \419 : "[z1]" (x1), "[z2]" (x2), \420 [e1] "r" (y1), [e2] "r" (y2), \421 "[cq]" (carryQ) \422 : "cc")423 424 425 #define carryQ_adc128_co(first, second, carryQ, sum) \426 do {\427 BitBlock_int64 rslt, x, y;\428 x.bitblock = first;\429 y.bitblock = second;\430 double_int64_adc_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], rslt.int64[0], rslt.int64[1], carryQ);\431 sum = rslt.bitblock;\432 } while(0)433 434 435 #define double_int64_sbb_ci_co(x1, x2, y1, y2, rslt1, rslt2, brwQ) \436 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \437 "sbb %[e1], %[z1]\n\t" \438 "sbb %[e2], %[z2]\n\t" \439 "adc $0, %[cq]\n\t" \440 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (brwQ) \441 : "[z1]" (x1), "[z2]" (x2), \442 [e1] "r" (y1), [e2] "r" (y2), \443 "[cq]" (brwQ) \444 : "cc")445 446 #define carryQ_sbb128_ci_co(first, second, borrowQ, diff) \447 do {\448 BitBlock_int64 rslt, x, y;\449 x.bitblock = first;\450 y.bitblock = second;\451 double_int64_sbb_ci_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \452 rslt.int64[0], rslt.int64[1], borrowQ);\453 diff = rslt.bitblock;\454 } while(0)455 456 #define double_int64_sbb_co(x1, x2, y1, y2, rslt1, rslt2, brwQ) \457 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \458 "sub %[e1], %[z1]\n\t" \459 "sbb %[e2], %[z2]\n\t" \460 "adc $0, %[cq]\n\t" \461 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (brwQ) \462 : "[z1]" (x1), "[z2]" (x2), \463 [e1] "r" (y1), [e2] "r" (y2), \464 "[cq]" (brwQ) \465 : "cc")466 467 #define carryQ_sbb128_co(first, second, borrowQ, diff) \468 do {\469 BitBlock_int64 rslt, x, y;\470 x.bitblock = first;\471 y.bitblock = second;\472 double_int64_sbb_co(x.int64[0], x.int64[1], y.int64[0], y.int64[1], \473 rslt.int64[0], rslt.int64[1], borrowQ);\474 diff = rslt.bitblock;\475 } while(0)476 477 #define double_int64_advance_ci_co(x1, x2, rslt1, rslt2, carryQ) \478 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \479 "adc %[z1], %[z1]\n\t" \480 "adc %[z2], %[z2]\n\t" \481 "adc $0, %[cq]\n\t" \482 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \483 : "[z1]" (x1), "[z2]" (x2), \484 "[cq]" (carryQ) \485 : "cc")486 487 #define carryQ_advance_with_carry_ci_co(cursor, carryQ, rslt)\488 do {\489 BitBlock_int64 x, z;\490 x.bitblock = cursor;\491 double_int64_advance_ci_co(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carryQ);\492 rslt = z.bitblock;\493 } while(0)494 495 496 #define double_int64_advance_co(x1, x2, rslt1, rslt2, carryQ) \497 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" \498 "add %[z1], %[z1]\n\t" \499 "adc %[z2], %[z2]\n\t" \500 "adc $0, %[cq]\n\t" \501 : [z1] "=r" (rslt1), [z2] "=r" (rslt2), [cq] "=r" (carryQ) \502 : "[z1]" (x1), "[z2]" (x2), \503 "[cq]" (carryQ) \504 : "cc")505 506 #define carryQ_advance_with_carry_co(cursor, carryQ, rslt)\507 do {\508 BitBlock_int64 x, z;\509 x.bitblock = cursor;\510 double_int64_advance_co(x.int64[0], x.int64[1], z.int64[0], z.int64[1], carryQ);\511 rslt = z.bitblock;\512 } while(0)513 514 393 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & cq, const int carryno) __attribute__ ((always_inline)); 515 394 … … 554 433 555 434 static inline BitBlock BitBlock_advance_ci_co(BitBlock strm, CarryQtype & carryQ, const int carryno) { 556 557 558 559 560 561 562 563 564 565 566 435 BitBlock_int64 x, z; 436 x.bitblock = strm; 437 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 438 "adc %[z1], %[z1]\n\t" 439 "adc %[z2], %[z2]\n\t" 440 "adc $0, %[cq]\n\t" 441 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ) 442 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 443 "[cq]" (carryQ) 444 : "cc"); 445 return z.bitblock; 567 446 } 568 447 569 448 static inline BitBlock BitBlock_advance_co(BitBlock strm, CarryQtype & carryQ, const int carryno) { 570 571 572 573 574 575 576 577 578 579 580 449 BitBlock_int64 x, z; 450 x.bitblock = strm; 451 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 452 "add %[z1], %[z1]\n\t" 453 "adc %[z2], %[z2]\n\t" 454 "adc $0, %[cq]\n\t" 455 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ) 456 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 457 "[cq]" (carryQ) 458 : "cc"); 459 return z.bitblock; 581 460 } 582 461 583 462 static inline BitBlock BitBlock_advance_ci(BitBlock strm, CarryQtype & carryQ, const int carryno) { 584 585 586 587 588 "add%[z2], %[z2]\n\t"589 590 591 592 593 463 BitBlock_int64 x, z; 464 x.bitblock = strm; 465 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 466 "adc %[z1], %[z1]\n\t" 467 "adc %[z2], %[z2]\n\t" 468 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]), [cq] "=r" (carryQ) 469 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 470 "[cq]" (carryQ) 471 : "cc"); 472 return z.bitblock; 594 473 } 595 474 596 475 static inline BitBlock BitBlock_advance(BitBlock strm) { 597 BitBlock_int64 x, z; 598 x.bitblock = strm; 599 __asm__ __volatile__ ( 600 "add %[z1], %[z1]\n\t" 601 "adc %[z2], %[z2]\n\t" 602 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]) 603 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]) 604 : "cc"); 605 return z.bitblock; 606 } 607 608 609 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) { 610 BitBlock sum; 611 carryQ_adc128_ci_co(strm1, strm2, cq, sum); 612 return sum; 613 } 614 615 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) { 616 BitBlock sum; 617 carryQ_adc128_co(strm1, strm2, cq, sum); 618 return sum; 619 } 620 621 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) { 622 BitBlock diff; 623 carryQ_sbb128_ci_co(strm1, strm2, cq, diff); 624 return diff; 625 } 626 627 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & cq, const int carryno) { 628 BitBlock diff; 629 carryQ_sbb128_co(strm1, strm2, cq, diff); 630 return diff; 631 } 476 BitBlock_int64 x, z; 477 x.bitblock = strm; 478 __asm__ __volatile__ ("add %[z1], %[z1]\n\t" 479 "adc %[z2], %[z2]\n\t" 480 : [z1] "=r" (z.int64[0]), [z2] "=r" (z.int64[1]) 481 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]) 482 : "cc"); 483 return z.bitblock; 484 } 485 486 487 static inline BitBlock BitBlock_add_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) { 488 BitBlock_int64 rslt, x, y; 489 x.bitblock = strm1; 490 y.bitblock = strm2; 491 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 492 "adc %[e1], %[z1]\n\t" 493 "adc %[e2], %[z2]\n\t" 494 "adc $0, %[cq]\n\t" 495 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ) 496 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 497 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]), 498 "[cq]" (carryQ) 499 : "cc"); 500 return rslt.bitblock; 501 } 502 503 static inline BitBlock BitBlock_add_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) { 504 BitBlock_int64 rslt, x, y; 505 x.bitblock = strm1; 506 y.bitblock = strm2; 507 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 508 "add %[e1], %[z1]\n\t" 509 "adc %[e2], %[z2]\n\t" 510 "adc $0, %[cq]\n\t" 511 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ) 512 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 513 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]), 514 "[cq]" (carryQ) 515 : "cc"); 516 return rslt.bitblock; 517 } 518 519 static inline BitBlock BitBlock_add_ci(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) { 520 BitBlock_int64 rslt, x, y; 521 x.bitblock = strm1; 522 y.bitblock = strm2; 523 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 524 "adc %[e1], %[z1]\n\t" 525 "adc %[e2], %[z2]\n\t" 526 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ) 527 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 528 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]), 529 "[cq]" (carryQ) 530 : "cc"); 531 return rslt.bitblock; 532 } 533 534 static inline BitBlock BitBlock_add(BitBlock strm1, BitBlock strm2) { 535 BitBlock_int64 rslt, x, y; 536 x.bitblock = strm1; 537 y.bitblock = strm2; 538 __asm__ __volatile__ ("add %[e1], %[z1]\n\t" 539 "adc %[e2], %[z2]\n\t" 540 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]) 541 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 542 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]) 543 : "cc"); 544 return rslt.bitblock; 545 } 546 547 548 549 static inline BitBlock BitBlock_sub_ci_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) { 550 BitBlock_int64 rslt, x, y; 551 x.bitblock = strm1; 552 y.bitblock = strm2; 553 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 554 "sbb %[e1], %[z1]\n\t" 555 "sbb %[e2], %[z2]\n\t" 556 "adc $0, %[cq]\n\t" 557 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ) 558 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 559 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]), 560 "[cq]" (carryQ) 561 : "cc"); 562 return rslt.bitblock; 563 } 564 565 566 static inline BitBlock BitBlock_sub_co(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) { 567 BitBlock_int64 rslt, x, y; 568 x.bitblock = strm1; 569 y.bitblock = strm2; 570 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 571 "sub %[e1], %[z1]\n\t" 572 "sbb %[e2], %[z2]\n\t" 573 "adc $0, %[cq]\n\t" 574 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ) 575 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 576 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]), 577 "[cq]" (carryQ) 578 : "cc"); 579 return rslt.bitblock; 580 } 581 582 583 static inline BitBlock BitBlock_sub_ci(BitBlock strm1, BitBlock strm2, CarryQtype & carryQ, const int carryno) { 584 BitBlock_int64 rslt, x, y; 585 x.bitblock = strm1; 586 y.bitblock = strm2; 587 __asm__ __volatile__ ("add %[cq], %[cq]\n\t" 588 "sbb %[e1], %[z1]\n\t" 589 "sbb %[e2], %[z2]\n\t" 590 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]), [cq] "=r" (carryQ) 591 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 592 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]), 593 "[cq]" (carryQ) 594 : "cc"); 595 return rslt.bitblock; 596 } 597 598 599 static inline BitBlock BitBlock_sub(BitBlock strm1, BitBlock strm2) { 600 BitBlock_int64 rslt, x, y; 601 x.bitblock = strm1; 602 y.bitblock = strm2; 603 __asm__ __volatile__ ("sub %[e1], %[z1]\n\t" 604 "sbb %[e2], %[z2]\n\t" 605 : [z1] "=r" (rslt.int64[0]), [z2] "=r" (rslt.int64[1]) 606 : "[z1]" (x.int64[0]), "[z2]" (x.int64[1]), 607 [e1] "r" (y.int64[0]), [e2] "r" (y.int64[1]) 608 : "cc"); 609 return rslt.bitblock; 610 } 611 632 612 633 613 static inline BitBlock BitBlock_scanthru_ci_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) { 634 BitBlock markers1; 635 carryQ_adc128_ci_co(markers0, charclass, cq, markers1); 636 return simd_andc(markers1, charclass); 614 return simd_andc(BitBlock_add_ci_co(markers0, charclass, cq, carryno), charclass); 637 615 } 638 616 639 617 static inline BitBlock BitBlock_scanthru_co(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) { 640 BitBlock markers1; 641 carryQ_adc128_co(markers0, charclass, cq, markers1); 642 return simd_andc(markers1, charclass); 618 return simd_andc(BitBlock_add_co(markers0, charclass, cq, carryno), charclass); 619 } 620 621 static inline BitBlock BitBlock_scanthru_ci(BitBlock markers0, BitBlock charclass, CarryQtype & cq, const int carryno) { 622 return simd_andc(BitBlock_add_ci(markers0, charclass, cq, carryno), charclass); 623 } 624 625 static inline BitBlock BitBlock_scanthru(BitBlock markers0, BitBlock charclass) { 626 return simd_andc(BitBlock_add(markers0, charclass), charclass); 643 627 } 644 628
Note: See TracChangeset
for help on using the changeset viewer.