Changeset 5782 for icGREP/icgrep-devel/icgrep/kernels/kernel.cpp
- Timestamp:
- Dec 15, 2017, 12:44:01 PM (14 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/kernels/kernel.cpp
r5771 r5782 624 624 625 625 /** ------------------------------------------------------------------------------------------------------------- * 626 * @brief roundUp627 ** ------------------------------------------------------------------------------------------------------------- */628 unsigned roundUp(const ProcessingRate::RateValue & r) {629 if (LLVM_LIKELY(r.denominator() == 1)) {630 return r.numerator();631 } else {632 return (r.numerator() + r.denominator() - 1) / r.denominator();633 }634 }635 636 /** ------------------------------------------------------------------------------------------------------------- *637 626 * @brief getItemAlignment 638 627 ** ------------------------------------------------------------------------------------------------------------- */ 639 628 inline unsigned MultiBlockKernel::getItemAlignment(const Binding & binding) const { 640 629 const auto & rate = binding.getRate(); 641 if (rate.isFixed() ) {642 const auto &r = rate.getRate();643 constauto n = (r.numerator() * mStride);630 if (rate.isFixed() && binding.nonDeferred()) { 631 const auto r = rate.getRate(); 632 auto n = (r.numerator() * mStride); 644 633 if (LLVM_LIKELY(r.denominator() == 1)) { 645 634 return n; … … 675 664 } 676 665 666 using AttributeId = kernel::Attribute::KindId; 667 using RateValue = ProcessingRate::RateValue; 668 677 669 const auto inputSetCount = mStreamSetInputs.size(); 678 670 const auto outputSetCount = mStreamSetOutputs.size(); … … 682 674 AllocaInst * temporaryInputBuffer[inputSetCount]; 683 675 for (unsigned i = 0; i < inputSetCount; ++i) { 684 const auto& input = mStreamSetInputs[i];676 const Binding & input = mStreamSetInputs[i]; 685 677 const ProcessingRate & rate = input.getRate(); 686 678 if (isTransitivelyUnknownRate(rate)) { 687 679 report_fatal_error("MultiBlock kernels do not support unknown rate input streams or streams relative to an unknown rate input."); 688 } else if (rate.isFixed() && input.nonDeferred() &&!requiresBufferedFinalStride(input)) {680 } else if (rate.isFixed() && !requiresBufferedFinalStride(input)) { 689 681 temporaryInputBuffer[i] = nullptr; 690 682 } else { 691 683 Type * const ty = mStreamSetInputBuffers[i]->getStreamSetBlockType(); 692 const auto ub = getUpperBound(rate); 693 Constant * arraySize = b->getInt64(roundUp(ub)); 684 auto ub = getUpperBound(rate); 685 if (LLVM_UNLIKELY(input.hasLookahead())) { 686 ub += RateValue(input.getLookahead(), mStride); 687 } 688 Constant * const arraySize = b->getInt64(ceiling(ub)); 694 689 AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize); 695 690 assert (ptr->isStaticAlloca()); … … 700 695 AllocaInst * temporaryOutputBuffer[outputSetCount]; 701 696 for (unsigned i = 0; i < outputSetCount; i++) { 702 const auto& output = mStreamSetOutputs[i];697 const Binding & output = mStreamSetOutputs[i]; 703 698 const ProcessingRate & rate = output.getRate(); 704 if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) || (rate.isFixed() && output.nonDeferred() &&!requiresBufferedFinalStride(output)))) {699 if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) || (rate.isFixed() && !requiresBufferedFinalStride(output)))) { 705 700 temporaryOutputBuffer[i] = nullptr; 706 701 } else { … … 710 705 } 711 706 Type * const ty = mStreamSetOutputBuffers[i]->getStreamSetBlockType(); 712 Constant * arraySize = b->getInt64(roundUp(ub));707 Constant * const arraySize = b->getInt64(ceiling(ub)); 713 708 AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize); 714 709 assert (ptr->isStaticAlloca()); … … 742 737 // linearly available strides. 743 738 Value * numOfStrides = nullptr; 744 mInitialAvailableItemCount. resize(inputSetCount);739 mInitialAvailableItemCount.assign(mAvailableItemCount.begin(), mAvailableItemCount.end()); 745 740 mInitialProcessedItemCount.resize(inputSetCount); 746 741 mStreamSetInputBaseAddress.resize(inputSetCount); 747 742 Value * inputStrideSize[inputSetCount]; 748 743 for (unsigned i = 0; i < inputSetCount; i++) { 749 const auto& input = mStreamSetInputs[i];744 const Binding & input = mStreamSetInputs[i]; 750 745 const auto & name = input.getName(); 751 746 const ProcessingRate & rate = input.getRate(); 752 Value * const ic = b->getProcessedItemCount(name); 753 mInitialProcessedItemCount[i] = ic; 747 Value * processed = b->getProcessedItemCount(name); 748 //b->CallPrintInt(getName() + "_" + name + "_processed", processed); 749 750 mInitialProcessedItemCount[i] = processed; 751 Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(processed, LOG_2_BLOCK_WIDTH)); 752 754 753 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 755 b->CreateAssert(b->CreateICmpUGE(mAvailableItemCount[i], ic), 756 "processed item count cannot exceed the available item count"); 757 } 758 assert (ic->getType() == mAvailableItemCount[i]->getType()); 759 Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], ic); 760 Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH)); 761 mInitialAvailableItemCount[i] = mAvailableItemCount[i]; 762 mAvailableItemCount[i] = b->getLinearlyAccessibleItems(name, ic, unprocessed); 763 764 // Are our linearly accessible items sufficient for a stride? 754 b->CreateAssert(b->CreateICmpULT(processed, mAvailableItemCount[i]), "processed item count must be less than the available item count"); 755 } 756 757 Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], processed); 758 //b->CallPrintInt(getName() + "_" + name + "_unprocessed", unprocessed); 759 760 Value * avail = b->getLinearlyAccessibleItems(name, processed, unprocessed); 761 //b->CallPrintInt(getName() + "_" + name + "_avail", avail); 762 763 764 // Ensure that everything between SâP/Sâ, and Sân*(P + L)/Sâ is linearly available, where S is 765 // the stride size, P is the current processed position, L is the lookahead amount and n â â€+. 766 767 Value * remaining = avail; 768 if (LLVM_UNLIKELY(input.hasLookahead())) { 769 Constant * const lookahead = b->getSize(input.getLookahead()); 770 remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO); 771 //b->CallPrintInt(getName() + "_" + name + "_remaining", remaining); 772 } 773 765 774 inputStrideSize[i] = getStrideSize(b, rate); 766 Value * accessibleStrides = b->CreateUDiv(mAvailableItemCount[i], inputStrideSize[i]); 775 776 Value * accessibleStrides = b->CreateUDiv(remaining, inputStrideSize[i]); 777 778 //b->CallPrintInt(getName() + "_" + name + "_accessibleStrides", accessibleStrides); 779 767 780 AllocaInst * const tempBuffer = temporaryInputBuffer[i]; 768 781 if (tempBuffer) { … … 779 792 780 793 b->SetInsertPoint(copyFromBack); 781 Value * const temporaryAvailable = b->CreateUMin(unprocessed, inputStrideSize[i]); 794 Value * const temporarySize = b->CreateMul(tempBuffer->getArraySize(), b->getSize(mStride)); 795 Value * const temporaryAvailable = b->CreateUMin(unprocessed, temporarySize); 782 796 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 783 b->CreateAssert(b->CreateICmpULE( mAvailableItemCount[i], temporaryAvailable),784 "linearly available cannot be greater than temporarily available");785 } 786 Value * const offset = b->CreateAnd( ic, BLOCK_WIDTH_MASK);797 b->CreateAssert(b->CreateICmpULE(avail, temporaryAvailable), 798 "linearly available item count cannot exceed the temporarily available item count"); 799 } 800 Value * const offset = b->CreateAnd(processed, BLOCK_WIDTH_MASK); 787 801 Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize()); 788 802 b->CreateMemZero(tempBuffer, bufferSize, blockAlignment); 789 803 const auto copyAlignment = getItemAlignment(mStreamSetInputs[i]); 790 b->CreateStreamCpy(name, tempBuffer, ZERO, baseBuffer, offset, mAvailableItemCount[i], copyAlignment);804 b->CreateStreamCpy(name, tempBuffer, ZERO, baseBuffer, offset, avail, copyAlignment); 791 805 Value * const temporaryStrides = b->CreateSelect(b->CreateICmpULT(unprocessed, inputStrideSize[i]), ZERO, ONE); 792 806 BasicBlock * const copyToBackEnd = b->GetInsertBlock(); 793 b->CreateCondBr(b->CreateICmpNE( mAvailableItemCount[i], temporaryAvailable), copyFromFront, resume);807 b->CreateCondBr(b->CreateICmpNE(temporaryAvailable, unprocessed), copyFromFront, resume); 794 808 795 809 b->SetInsertPoint(copyFromFront); 796 Value * const remaining = b->CreateSub(temporaryAvailable, mAvailableItemCount[i]);810 Value * const remaining = b->CreateSub(temporaryAvailable, avail); 797 811 Value * const baseAddress = b->getBaseAddress(name); 798 b->CreateStreamCpy(name, tempBuffer, mAvailableItemCount[i], baseAddress, ZERO, remaining, copyAlignment);812 b->CreateStreamCpy(name, tempBuffer, avail, baseAddress, ZERO, remaining, copyAlignment); 799 813 BasicBlock * const copyToFrontEnd = b->GetInsertBlock(); 800 814 b->CreateBr(resume); … … 808 822 809 823 PHINode * const phiAvailItemCount = b->CreatePHI(b->getSizeTy(), 3); 810 phiAvailItemCount->addIncoming( mAvailableItemCount[i], entry);824 phiAvailItemCount->addIncoming(avail, entry); 811 825 phiAvailItemCount->addIncoming(temporaryAvailable, copyToBackEnd); 812 826 phiAvailItemCount->addIncoming(temporaryAvailable, copyToFrontEnd); 813 mAvailableItemCount[i]= phiAvailItemCount;827 avail = phiAvailItemCount; 814 828 815 829 PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2); … … 819 833 accessibleStrides = phiStrides; 820 834 } 821 835 mAvailableItemCount[i] = avail; 822 836 mStreamSetInputBaseAddress[i] = baseBuffer; 823 837 numOfStrides = b->CreateUMin(numOfStrides, accessibleStrides); … … 833 847 const auto & name = output.getName(); 834 848 const ProcessingRate & rate = output.getRate(); 835 Value * const ic = b->getProducedItemCount(name); 836 Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH)); 849 Value * const produced = b->getProducedItemCount(name); 850 851 //b->CallPrintInt(getName() + "_" + name + "_produced", produced); 852 853 Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(produced, LOG_2_BLOCK_WIDTH)); 837 854 assert (baseBuffer->getType()->isPointerTy()); 838 linearlyWritable[i] = b->getLinearlyWritableItems(name, ic); 855 linearlyWritable[i] = b->getLinearlyWritableItems(name, produced); 856 857 //b->CallPrintInt(getName() + "_" + name + "_linearlyWritable", linearlyWritable[i]); 858 839 859 outputStrideSize[i] = getStrideSize(b, rate); 840 860 // Is the number of linearly writable items sufficient for a stride? … … 842 862 AllocaInst * const tempBuffer = temporaryOutputBuffer[i]; 843 863 Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]); 864 //b->CallPrintInt(getName() + "_" + name + "_writableStrides", writableStrides); 865 866 844 867 // Do we require a temporary buffer to write to? 845 868 if (tempBuffer) { 846 869 assert (tempBuffer->getType() == baseBuffer->getType()); 847 870 BasicBlock * const entry = b->GetInsertBlock(); 848 BasicBlock * const useTemporary = b->CreateBasicBlock(name + "UseTemporary");871 BasicBlock * const clearBuffer = b->CreateBasicBlock(name + "ClearTemporaryBuffer"); 849 872 BasicBlock * const resume = b->CreateBasicBlock(name + "Resume"); 850 873 Value * const requiresCopy = b->CreateICmpEQ(writableStrides, ZERO); 851 852 b->CreateUnlikelyCondBr(requiresCopy, useTemporary, resume); 853 854 // Clear the buffer after use since we may end up reusing it within the same stride 855 b->SetInsertPoint(useTemporary); 874 b->CreateUnlikelyCondBr(requiresCopy, clearBuffer, resume); 875 // Clear the output buffer prior to using it 876 b->SetInsertPoint(clearBuffer); 856 877 Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize()); 857 878 b->CreateMemZero(tempBuffer, bufferSize, blockAlignment); 858 879 b->CreateBr(resume); 859 880 // Select the appropriate buffer / stride # 860 881 b->SetInsertPoint(resume); 861 882 PHINode * const phiBuffer = b->CreatePHI(baseBuffer->getType(), 3); 862 883 phiBuffer->addIncoming(baseBuffer, entry); 863 phiBuffer->addIncoming(tempBuffer, useTemporary);884 phiBuffer->addIncoming(tempBuffer, clearBuffer); 864 885 baseBuffer = phiBuffer; 865 886 PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2); 866 887 phiStrides->addIncoming(writableStrides, entry); 867 phiStrides->addIncoming(ONE, useTemporary);888 phiStrides->addIncoming(ONE, clearBuffer); 868 889 writableStrides = phiStrides; 869 870 890 } 871 891 numOfStrides = b->CreateUMin(numOfStrides, writableStrides); 872 892 } 873 mInitialProducedItemCount[i] = ic;893 mInitialProducedItemCount[i] = produced; 874 894 mStreamSetOutputBaseAddress[i] = baseBuffer; 875 895 } … … 885 905 } 886 906 for (unsigned i = 0; i < inputSetCount; ++i) { 887 const ProcessingRate & rate = mStreamSetInputs[i].getRate(); 888 if (rate.isFixed() && mStreamSetInputs[i].nonDeferred()) { 907 const auto & input = mStreamSetInputs[i]; 908 const ProcessingRate & rate = input.getRate(); 909 if (rate.isFixed() && input.nonDeferred()) { 889 910 mAvailableItemCount[i] = b->CreateSelect(mIsFinal, mAvailableItemCount[i], b->CreateMul(numOfStrides, inputStrideSize[i])); 890 911 } … … 896 917 897 918 for (unsigned i = 0; i < inputSetCount; ++i) { 898 const ProcessingRate & rate = mStreamSetInputs[i].getRate(); 899 if (rate.isFixed() && mStreamSetInputs[i].nonDeferred()) { 919 const auto & input = mStreamSetInputs[i]; 920 const ProcessingRate & rate = input.getRate(); 921 if (rate.isFixed() && input.nonDeferred()) { 900 922 Value * const ic = b->CreateAdd(mInitialProcessedItemCount[i], mAvailableItemCount[i]); 901 b->setProcessedItemCount( mStreamSetInputs[i].getName(), ic);923 b->setProcessedItemCount(input.getName(), ic); 902 924 } 903 925 } 904 926 905 927 for (unsigned i = 0; i < outputSetCount; ++i) { 906 const ProcessingRate & rate = mStreamSetOutputs[i].getRate(); 928 const auto & output = mStreamSetOutputs[i]; 929 const ProcessingRate & rate = output.getRate(); 907 930 if (rate.isFixed()) { 908 assert ( mStreamSetOutputs[i].nonDeferred());931 assert (output.nonDeferred()); 909 932 Value * const produced = b->CreateMul(numOfStrides, outputStrideSize[i]); 910 933 Value * const ic = b->CreateAdd(mInitialProducedItemCount[i], produced); 911 b->setProducedItemCount( mStreamSetOutputs[i].getName(), ic);934 b->setProducedItemCount(output.getName(), ic); 912 935 } 913 936 } … … 990 1013 Value * hasMoreStrides = b->getTrue(); 991 1014 for (unsigned i = 0; i < inputSetCount; ++i) { 992 const auto & name = mStreamSetInputs[i].getName(); 1015 const Binding & input = mStreamSetInputs[i]; 1016 const auto & name = input.getName(); 993 1017 Value * const avail = mInitialAvailableItemCount[i]; 994 1018 Value * const processed = b->getProcessedItemCount(name); 995 1019 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 996 b->CreateAssert(b->CreateICmpULE(processed, avail), name + ": processed data cannot exceed available data"); 997 } 998 Value * const remaining = b->CreateSub(avail, processed); 1020 b->CreateAssert(b->CreateICmpULE(processed, avail), getName() + "." + name + ": processed data exceeds available data"); 1021 } 1022 Value * remaining = b->CreateSub(avail, processed); 1023 if (LLVM_UNLIKELY(input.hasAttribute(AttributeId::LookAhead))) { 1024 Constant * const lookahead = b->getSize(input.findAttribute(AttributeId::LookAhead).amount()); 1025 remaining = b->CreateSelect(b->CreateICmpULT(lookahead, remaining), b->CreateSub(remaining, lookahead), ZERO); 1026 } 999 1027 Value * const remainingStrides = b->CreateUDiv(remaining, inputStrideSize[i]); 1000 1028 Value * const hasRemainingStrides = b->CreateICmpNE(remainingStrides, ZERO); … … 1013 1041 Value * const consumed = b->getConsumedItemCount(name); 1014 1042 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 1015 b->CreateAssert(b->CreateICmpULE(consumed, produced), name + ": consumed data cannot exceedproduced data");1043 b->CreateAssert(b->CreateICmpULE(consumed, produced), getName() + "." + name + ": consumed data exceeds produced data"); 1016 1044 } 1017 1045 Value * const unconsumed = b->CreateSub(produced, consumed); 1018 1046 Value * const capacity = b->getCapacity(name); 1019 1047 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 1020 b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), name + ": unconsumed data cannot exceedcapacity");1048 b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), getName() + "." + name + ": unconsumed data exceeds capacity"); 1021 1049 } 1022 1050 Value * const remaining = b->CreateSub(capacity, unconsumed); … … 1183 1211 for (const Attribute & attr : output.getAttributes()) { 1184 1212 if (attr.isAdd()) { 1185 produced = b->CreateAdd(produced, b->getSize(attr. getAmount()));1213 produced = b->CreateAdd(produced, b->getSize(attr.amount())); 1186 1214 } else if (attr.isRoundUpTo()) { 1187 produced = b->CreateRoundUp(produced, b->getSize(attr. getAmount()));1215 produced = b->CreateRoundUp(produced, b->getSize(attr.amount())); 1188 1216 } 1189 1217 }
Note: See TracChangeset
for help on using the changeset viewer.