Changeset 5713 for icGREP/icgrepdevel/icgrep/pablo
 Timestamp:
 Oct 27, 2017, 1:21:46 PM (20 months ago)
 Location:
 icGREP/icgrepdevel/icgrep/pablo
 Files:

 2 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/pablo/carry_manager.cpp
r5712 r5713 618 618 Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) { 619 619 const auto shiftAmount = advance>getAmount(); 620 Value * popcount_f = Intrinsic::getDeclaration(b>getModule(), Intrinsic::ctpop, b>getSizeTy()); 621 Value * PEXT_f = nullptr; 622 Value * PDEP_f = nullptr; 623 unsigned bitWidth = sizeof(size_t) * 8; 624 if (bitWidth == 64) { 625 PEXT_f = Intrinsic::getDeclaration(b>getModule(), Intrinsic::x86_bmi_pext_64); 626 PDEP_f = Intrinsic::getDeclaration(b>getModule(), Intrinsic::x86_bmi_pdep_64); 627 } 628 else if ((bitWidth == 32) && (shiftAmount < 32)) { 629 PEXT_f = Intrinsic::getDeclaration(b>getModule(), Intrinsic::x86_bmi_pext_32); 630 PDEP_f = Intrinsic::getDeclaration(b>getModule(), Intrinsic::x86_bmi_pdep_32); 631 } 632 else { 633 llvm::report_fatal_error("indexed_advance unsupported bit width"); 634 } 635 if (LLVM_LIKELY(shiftAmount < bitWidth)) { 620 if (LLVM_LIKELY(shiftAmount < LONG_ADVANCE_BREAKPOINT)) { 636 621 Value * const carryIn = getNextCarryIn(b); 637 Value * shiftVal = b>getSize(shiftAmount); 638 Value * carry = b>mvmd_extract(bitWidth, carryIn, 0); 639 Value * result = b>allZeroes(); 640 for (unsigned i = 0; i < b>getBitBlockWidth()/bitWidth; i++) { 641 Value * s = b>mvmd_extract(bitWidth, strm, i); 642 Value * ix = b>mvmd_extract(bitWidth, index_strm, i); 643 Value * ix_popcnt = b>CreateCall(popcount_f, {ix}); 644 Value * bits = b>CreateCall(PEXT_f, {s, ix}); 645 Value * adv = b>CreateOr(b>CreateShl(bits, shiftAmount), carry); 646 Value * popcount_small = b>CreateICmpULT(ix_popcnt, shiftVal); 647 Value * carry_if_popcount_small = 648 b>CreateOr(b>CreateShl(bits, b>CreateSub(shiftVal, ix_popcnt)), 649 b>CreateLShr(carry, ix_popcnt)); 650 Value * carry_if_popcount_large = b>CreateLShr(bits, b>CreateSub(ix_popcnt, shiftVal)); 651 carry = b>CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large); 652 result = b>mvmd_insert(bitWidth, result, b>CreateCall(PDEP_f, {adv, ix}), i); 653 } 654 Value * carryOut = b>mvmd_insert(bitWidth, b>allZeroes(), carry, 0); 622 Value * carryOut, * result; 623 std::tie(carryOut, result) = b>bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount); 655 624 setNextCarryOut(b, carryOut); 656 625 return result; 657 626 } else if (shiftAmount <= b>getBitBlockWidth()) { 658 // A single bitblock still holds all the shifted bits. In this case, we know 659 // that the shift amount is always greater than the popcount of the individual 660 // elements that we deal with. This simplifies some of the logic. 661 Type * iBitBlock = b>getIntNTy(b>getBitBlockWidth()); 662 Value * carryInPtr = b>CreateGEP(mCurrentFrame, {b>getInt32(0), b>getInt32(mCurrentFrameIndex++), b>getInt32(0)}); 663 Value * carryIn = b>CreateBlockAlignedLoad(carryInPtr); 664 Value * shiftVal = b>getSize(shiftAmount); 665 Value * carry = b>CreateBitCast(carryIn, iBitBlock); 666 Value * result = b>allZeroes(); 667 for (unsigned i = 0; i < b>getBitBlockWidth()/bitWidth; i++) { 668 Value * s = b>mvmd_extract(bitWidth, strm, i); 669 Value * ix = b>mvmd_extract(bitWidth, index_strm, i); 670 Value * ix_popcnt = b>CreateCall(popcount_f, {ix}); 671 Value * bits = b>CreateCall(PEXT_f, {s, ix}); // All these bits are shifted out (appended to carry). 672 result = b>mvmd_insert(bitWidth, result, b>CreateCall(PDEP_f, {b>mvmd_extract(bitWidth, carry, 0), ix}), i); 673 carry = b>CreateLShr(carry, b>CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits. 674 carry = b>CreateOr(carry, b>CreateShl(b>CreateZExt(bits, iBitBlock), b>CreateZExt(b>CreateSub(shiftVal, ix_popcnt), iBitBlock))); 675 } 676 b>CreateBlockAlignedStore(b>CreateBitCast(carry, b>getBitBlockType()), carryInPtr); 627 Value * carryPtr = b>CreateGEP(mCurrentFrame, {b>getInt32(0), b>getInt32(mCurrentFrameIndex++), b>getInt32(0)}); 628 Value * carryIn = b>CreateBlockAlignedLoad(carryPtr); 629 Value * carryOut, * result; 630 std::tie(carryOut, result) = b>bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount); 631 b>CreateBlockAlignedStore(carryOut, carryPtr); 677 632 if ((mIfDepth > 0) && mCarryInfo>hasExplicitSummary()) { 678 633 addToCarryOutSummary(b, strm); 
icGREP/icgrepdevel/icgrep/pablo/carrypack_manager.cpp
r5712 r5713 631 631 Value * CarryManager::indexedAdvanceCarryInCarryOut(const std::unique_ptr<kernel::KernelBuilder> & b, const IndexedAdvance * const advance, Value * const strm, Value * const index_strm) { 632 632 const auto shiftAmount = advance>getAmount(); 633 Value * popcount_f = Intrinsic::getDeclaration(b>getModule(), Intrinsic::ctpop, b>getSizeTy()); 634 Value * PEXT_f = nullptr; 635 Value * PDEP_f = nullptr; 636 unsigned bitWidth = sizeof(size_t) * 8; 637 if (bitWidth == 64) { 638 PEXT_f = Intrinsic::getDeclaration(b>getModule(), Intrinsic::x86_bmi_pext_64); 639 PDEP_f = Intrinsic::getDeclaration(b>getModule(), Intrinsic::x86_bmi_pdep_64); 640 } 641 else if ((bitWidth == 32) && (shiftAmount < 32)) { 642 PEXT_f = Intrinsic::getDeclaration(b>getModule(), Intrinsic::x86_bmi_pext_32); 643 PDEP_f = Intrinsic::getDeclaration(b>getModule(), Intrinsic::x86_bmi_pdep_32); 644 } 645 else { 646 llvm::report_fatal_error("indexed_advance unsupported bit width"); 647 } 648 if (LLVM_LIKELY(shiftAmount < bitWidth)) { 633 if (LLVM_LIKELY(shiftAmount < mElementWidth)) { 649 634 Value * const carryIn = getNextCarryIn(b); 650 Value * shiftVal = b>getSize(shiftAmount); 651 Value * carry = b>mvmd_extract(bitWidth, carryIn, 0); 652 Value * result = b>allZeroes(); 653 for (unsigned i = 0; i < b>getBitBlockWidth()/bitWidth; i++) { 654 Value * s = b>mvmd_extract(bitWidth, strm, i); 655 Value * ix = b>mvmd_extract(bitWidth, index_strm, i); 656 Value * ix_popcnt = b>CreateCall(popcount_f, {ix}); 657 Value * bits = b>CreateCall(PEXT_f, {s, ix}); 658 Value * adv = b>CreateOr(b>CreateShl(bits, shiftAmount), carry); 659 Value * popcount_small = b>CreateICmpULT(ix_popcnt, shiftVal); 660 Value * carry_if_popcount_small = 661 b>CreateOr(b>CreateShl(bits, b>CreateSub(shiftVal, ix_popcnt)), 662 b>CreateLShr(carry, ix_popcnt)); 663 Value * carry_if_popcount_large = b>CreateLShr(bits, b>CreateSub(ix_popcnt, shiftVal)); 664 carry = b>CreateSelect(popcount_small, carry_if_popcount_small, carry_if_popcount_large); 665 result = b>mvmd_insert(bitWidth, result, b>CreateCall(PDEP_f, {adv, ix}), i); 666 } 667 Value * carryOut = b>mvmd_insert(bitWidth, b>allZeroes(), carry, 0); 635 Value * carryOut, * result; 636 std::tie(carryOut, result) = b>bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount); 668 637 setNextCarryOut(b, carryOut); 669 638 return result; 670 639 } else if (shiftAmount <= b>getBitBlockWidth()) { 671 // A single bitblock still holds all the shifted bits. In this case, we know 672 // that the shift amount is always greater than the popcount of the individual 673 // elements that we deal with. This simplifies some of the logic. 674 Type * iBitBlock = b>getIntNTy(b>getBitBlockWidth()); 675 Value * carryInPtr = b>CreateGEP(mCurrentFrame, {b>getInt32(0), b>getInt32(mCurrentFrameIndex++), b>getInt32(0)}); 676 Value * carryIn = b>CreateBlockAlignedLoad(carryInPtr); 677 Value * shiftVal = b>getSize(shiftAmount); 678 Value * carry = b>CreateBitCast(carryIn, iBitBlock); 679 Value * result = b>allZeroes(); 680 for (unsigned i = 0; i < b>getBitBlockWidth()/bitWidth; i++) { 681 Value * s = b>mvmd_extract(bitWidth, strm, i); 682 Value * ix = b>mvmd_extract(bitWidth, index_strm, i); 683 Value * ix_popcnt = b>CreateCall(popcount_f, {ix}); 684 Value * bits = b>CreateCall(PEXT_f, {s, ix}); // All these bits are shifted out (appended to carry). 685 result = b>mvmd_insert(bitWidth, result, b>CreateCall(PDEP_f, {b>mvmd_extract(bitWidth, carry, 0), ix}), i); 686 carry = b>CreateLShr(carry, b>CreateZExt(ix_popcnt, iBitBlock)); // Remove the carry bits consumed, make room for new bits. 687 carry = b>CreateOr(carry, b>CreateShl(b>CreateZExt(bits, iBitBlock), b>CreateZExt(b>CreateSub(shiftVal, ix_popcnt), iBitBlock))); 688 } 689 b>CreateBlockAlignedStore(b>CreateBitCast(carry, b>getBitBlockType()), carryInPtr); 640 Value * carryPtr = b>CreateGEP(mCurrentFrame, {b>getInt32(0), b>getInt32(mCurrentFrameIndex++), b>getInt32(0)}); 641 Value * carryIn = b>CreateBlockAlignedLoad(carryPtr); 642 Value * carryOut, * result; 643 std::tie(carryOut, result) = b>bitblock_indexed_advance(strm, index_strm, carryIn, shiftAmount); 644 b>CreateBlockAlignedStore(carryOut, carryPtr); 690 645 if ((mIfDepth > 0) && mCarryInfo>hasExplicitSummary()) { 691 646 addToCarryOutSummary(b, strm); … … 697 652 } 698 653 } 699 700 654 701 655
Note: See TracChangeset
for help on using the changeset viewer.