Changeset 5771
- Timestamp:
- Dec 9, 2017, 5:05:16 PM (15 months ago)
- Location:
- icGREP/icgrep-devel/icgrep
- Files:
-
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp
r5761 r5771 93 93 DataLayout DL(b->getModule()); 94 94 IntegerType * const intPtrTy = cast<IntegerType>(DL.getIntPtrType(Ptr->getType())); 95 Value * sz = ConstantExpr::get Trunc(ConstantExpr::getSizeOf(Base->getAllocatedType()), intPtrTy);95 Value * sz = ConstantExpr::getBitCast(ConstantExpr::getSizeOf(Base->getAllocatedType()), intPtrTy); 96 96 if (dyn_cast_or_null<Constant>(Base->getArraySize()) && !cast<Constant>(Base->getArraySize())->isNullValue()) { 97 97 sz = b->CreateMul(sz, b->CreateZExtOrTrunc(Base->getArraySize(), intPtrTy)); … … 966 966 #endif 967 967 968 void CBuilder::__CreateAssert(Value * const assertion, const Twine failureMessage) {968 void CBuilder::__CreateAssert(Value * const assertion, const Twine & failureMessage) { 969 969 if (LLVM_UNLIKELY(isa<Constant>(assertion))) { 970 970 if (LLVM_UNLIKELY(cast<Constant>(assertion)->isNullValue())) { … … 1082 1082 IRBuilder<>::CreateCall(function, {assertion, GetString(failureMessage.toStringRef(tmp)), trace, depth}); 1083 1083 } else { // if assertions are not enabled, make it a compiler assumption. 1084 IRBuilder<>::CreateAssumption(assertion); 1084 1085 // INVESTIGATE: while interesting, this does not seem to produce faster code and only provides a trivial reduction 1086 // of compiled code size in LLVM 3.8 but nearly doubles compilation time. This may have been improved with later 1087 // versions of LLVM but it's likely that assumptions ought to be hand placed once they're prove to improve performance. 1088 1089 // IRBuilder<>::CreateAssumption(assertion); 1085 1090 } 1086 1091 } -
icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h
r5761 r5771 236 236 virtual llvm::StoreInst * CreateAtomicStoreRelease(llvm::Value * val, llvm::Value * ptr); 237 237 238 void CreateAssert(llvm::Value * assertion, const llvm::Twine failureMessage) {238 void CreateAssert(llvm::Value * assertion, const llvm::Twine & failureMessage) { 239 239 if (LLVM_UNLIKELY(assertion->getType()->isVectorTy())) { 240 240 assertion = CreateBitCast(assertion, getIntNTy(assertion->getType()->getPrimitiveSizeInBits())); … … 243 243 } 244 244 245 void CreateAssertZero(llvm::Value * assertion, const llvm::Twine failureMessage) {245 void CreateAssertZero(llvm::Value * assertion, const llvm::Twine & failureMessage) { 246 246 if (LLVM_UNLIKELY(assertion->getType()->isVectorTy())) { 247 247 assertion = CreateBitCast(assertion, getIntNTy(assertion->getType()->getPrimitiveSizeInBits())); … … 355 355 bool hasAddressSanitizer() const; 356 356 357 void __CreateAssert(llvm::Value * assertion, const llvm::Twine failureMessage);357 void __CreateAssert(llvm::Value * assertion, const llvm::Twine & failureMessage); 358 358 359 359 llvm::Function * LinkFunction(llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const; -
icGREP/icgrep-devel/icgrep/base64.cpp
r5755 r5771 53 53 Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, int32Ty, outputType, nullptr)); 54 54 main->setCallingConv(CallingConv::C); 55 Function::arg_iteratorargs = main->arg_begin();55 auto args = main->arg_begin(); 56 56 57 57 Value * const fileDescriptor = &*(args++); -
icGREP/icgrep-devel/icgrep/grep_engine.cpp
r5770 r5771 85 85 const unsigned n = filenames.size(); 86 86 mResultStrs.resize(n); 87 mFileStatus.resize(n); 88 for (unsigned i = 0; i < n; i++) { 89 mResultStrs[i] = make_unique<std::stringstream>(); 90 mFileStatus[i] = FileStatus::Pending; 91 } 87 mFileStatus.resize(n, FileStatus::Pending); 92 88 inputFiles = filenames; 93 89 } … … 221 217 friend class EmitMatchesEngine; 222 218 public: 223 EmitMatch(std::string linePrefix, std:: stringstream *strm) : mLinePrefix(linePrefix), mLineCount(0), mTerminated(true), mResultStr(strm) {}219 EmitMatch(std::string linePrefix, std::ostringstream & strm) : mLinePrefix(linePrefix), mLineCount(0), mTerminated(true), mResultStr(strm) {} 224 220 void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override; 225 221 void finalize_match(char * buffer_end) override; … … 228 224 size_t mLineCount; 229 225 bool mTerminated; 230 std:: stringstream*mResultStr;226 std::ostringstream & mResultStr; 231 227 }; 232 228 … … 237 233 void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) { 238 234 if (WithFilenameFlag) { 239 *mResultStr << mLinePrefix;235 mResultStr << mLinePrefix; 240 236 } 241 237 if (LineNumberFlag) { … … 243 239 // the line number so that lines are numbered from 1. 244 240 if (InitialTabFlag) { 245 *mResultStr << lineNum+1 << "\t:";241 mResultStr << lineNum+1 << "\t:"; 246 242 } 247 243 else { 248 *mResultStr << lineNum+1 << ":";244 mResultStr << lineNum+1 << ":"; 249 245 } 250 246 } 251 247 size_t bytes = line_end - line_start + 1; 252 mResultStr ->write(line_start, bytes);248 mResultStr.write(line_start, bytes); 253 249 mLineCount++; 254 250 unsigned last_byte = *line_end; … … 268 264 269 265 void EmitMatch::finalize_match(char * buffer_end) { 270 if (!mTerminated) *mResultStr << "\n";266 if (!mTerminated) mResultStr << "\n"; 271 267 } 272 268 … … 321 317 auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain()); 322 318 323 int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx] .get());319 int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]); 324 320 if (fileDescriptor == -1) return 0; 325 321 … … 331 327 uint64_t CountOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) { 332 328 uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx); 333 if (WithFilenameFlag) *mResultStrs[fileIdx] << linePrefix(fileName);334 *mResultStrs[fileIdx] << grepResult << "\n";329 if (WithFilenameFlag) mResultStrs[fileIdx] << linePrefix(fileName); 330 mResultStrs[fileIdx] << grepResult << "\n"; 335 331 return grepResult; 336 332 } … … 348 344 uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx); 349 345 if (grepResult == mRequiredCount) { 350 *mResultStrs[fileIdx] << linePrefix(fileName);346 mResultStrs[fileIdx] << linePrefix(fileName); 351 347 } 352 348 return grepResult; … … 357 353 auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain()); 358 354 359 int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx] .get());355 int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]); 360 356 if (fileDescriptor == -1) return 0; 361 EmitMatch accum(linePrefix(fileName), mResultStrs[fileIdx] .get());357 EmitMatch accum(linePrefix(fileName), mResultStrs[fileIdx]); 362 358 f(fileDescriptor, reinterpret_cast<intptr_t>(&accum)); 363 359 close(fileDescriptor); … … 367 363 368 364 // Open a file and return its file desciptor. 369 int32_t GrepEngine::openFile(const std::string & fileName, std:: stringstream *msgstrm) {365 int32_t GrepEngine::openFile(const std::string & fileName, std::ostringstream & msgstrm) { 370 366 if (fileName == "-") { 371 367 return STDIN_FILENO; … … 377 373 if (!NoMessagesFlag) { 378 374 if (errno == EACCES) { 379 *msgstrm << "icgrep: " << fileName << ": Permission denied.\n";375 msgstrm << "icgrep: " << fileName << ": Permission denied.\n"; 380 376 } 381 377 else if (errno == ENOENT) { 382 *msgstrm << "icgrep: " << fileName << ": No such file.\n";378 msgstrm << "icgrep: " << fileName << ": No such file.\n"; 383 379 } 384 380 else { 385 *msgstrm << "icgrep: " << fileName << ": Failed.\n";381 msgstrm << "icgrep: " << fileName << ": Failed.\n"; 386 382 } 387 383 } … … 390 386 if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) { 391 387 if (!NoMessagesFlag) { 392 *msgstrm << "icgrep: " << fileName << ": Is a directory.\n";388 msgstrm << "icgrep: " << fileName << ": Is a directory.\n"; 393 389 } 394 390 close(fileDescriptor); … … 433 429 void * GrepEngine::DoGrepThreadMethod() { 434 430 435 autofileIdx = mNextFileToGrep++;431 unsigned fileIdx = mNextFileToGrep++; 436 432 while (fileIdx < inputFiles.size()) { 437 const size_tgrepResult = doGrep(inputFiles[fileIdx], fileIdx);433 const auto grepResult = doGrep(inputFiles[fileIdx], fileIdx); 438 434 mFileStatus[fileIdx] = FileStatus::GrepComplete; 439 435 if (grepResult > 0) { … … 449 445 } 450 446 451 autoprintIdx = mNextFileToPrint++;447 unsigned printIdx = mNextFileToPrint++; 452 448 while (printIdx < inputFiles.size()) { 453 449 const bool readyToPrint = ((printIdx == 0) || (mFileStatus[printIdx - 1] == FileStatus::PrintComplete)) && (mFileStatus[printIdx] == FileStatus::GrepComplete); 454 450 if (readyToPrint) { 455 const auto output = mResultStrs[printIdx] ->str();451 const auto output = mResultStrs[printIdx].str(); 456 452 if (!output.empty()) { 457 mWriteMutex.lock(); 458 std::cout << output; 459 mWriteMutex.unlock(); 453 llvm::outs() << output; 460 454 } 461 455 mFileStatus[printIdx] = FileStatus::PrintComplete; 462 456 printIdx = mNextFileToPrint++; 463 457 } else { 464 mCacheMutex.lock();465 458 mGrepDriver->performIncrementalCacheCleanupStep(); 466 mCacheMutex.unlock();467 459 } 468 460 sched_yield(); -
icGREP/icgrep-devel/icgrep/grep_engine.h
r5761 r5771 9 9 #include <kernels/streamset.h> 10 10 #include <toolchain/grep_pipeline.h> 11 #include <string> // for string11 #include <string> 12 12 #include <vector> 13 13 #include <sstream> 14 #include <mutex>15 14 #include <atomic> 16 15 … … 40 39 virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx); 41 40 std::string linePrefix(std::string fileName); 42 int32_t openFile(const std::string & fileName, std:: stringstream *msgstrm);41 int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm); 43 42 44 43 Driver * mGrepDriver; … … 47 46 std::atomic<unsigned> mNextFileToPrint; 48 47 std::vector<std::string> inputFiles; 49 std::vector<std:: unique_ptr<std::stringstream>> mResultStrs;48 std::vector<std::ostringstream> mResultStrs; 50 49 std::vector<FileStatus> mFileStatus; 51 std::mutex mWriteMutex; 52 std::mutex mCacheMutex; 50 bool grepMatchFound; 53 51 54 55 bool grepMatchFound;56 57 52 std::string mFileSuffix; 58 53 bool mMoveMatchesToEOL; -
icGREP/icgrep-devel/icgrep/kernels/kernel.cpp
r5761 r5771 680 680 // Define and allocate the temporary buffer area in the prolog. 681 681 const auto blockAlignment = b->getBitBlockWidth() / 8; 682 Value* temporaryInputBuffer[inputSetCount];682 AllocaInst * temporaryInputBuffer[inputSetCount]; 683 683 for (unsigned i = 0; i < inputSetCount; ++i) { 684 685 // TODO: if this is a fixed rate input stream and the pipeline guarantees it will not call the kernel unless 686 // there is sufficient input and all buffers will be sized sufficiently for the input, we ought to be able to 687 // avoid the temporary buffer checks. 688 689 const ProcessingRate & rate = mStreamSetInputs[i].getRate(); 690 Type * const ty = mStreamSetInputBuffers[i]->getStreamSetBlockType(); 691 const auto ub = getUpperBound(rate); 692 if (ub.numerator() == 0) { 684 const auto & input = mStreamSetInputs[i]; 685 const ProcessingRate & rate = input.getRate(); 686 if (isTransitivelyUnknownRate(rate)) { 693 687 report_fatal_error("MultiBlock kernels do not support unknown rate input streams or streams relative to an unknown rate input."); 694 } else { 695 temporaryInputBuffer[i] = b->CreateAlignedAlloca(ty, blockAlignment, b->getSize(roundUp(ub))); 696 Type * const sty = temporaryInputBuffer[i]->getType()->getPointerElementType(); 697 b->CreateStore(Constant::getNullValue(sty), temporaryInputBuffer[i]); 698 } 699 } 700 701 Value * temporaryOutputBuffer[outputSetCount]; 688 } else if (rate.isFixed() && input.nonDeferred() && !requiresBufferedFinalStride(input)) { 689 temporaryInputBuffer[i] = nullptr; 690 } else { 691 Type * const ty = mStreamSetInputBuffers[i]->getStreamSetBlockType(); 692 const auto ub = getUpperBound(rate); 693 Constant * arraySize = b->getInt64(roundUp(ub)); 694 AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize); 695 assert (ptr->isStaticAlloca()); 696 temporaryInputBuffer[i] = ptr; 697 } 698 } 699 700 AllocaInst * temporaryOutputBuffer[outputSetCount]; 702 701 for (unsigned i = 0; i < outputSetCount; i++) { 703 const ProcessingRate & rate = mStreamSetOutputs[i].getRate();704 Type * const ty = mStreamSetOutputBuffers[i]->getStreamSetBlockType();705 if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) )) {702 const auto & output = mStreamSetOutputs[i]; 703 const ProcessingRate & rate = output.getRate(); 704 if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) || (rate.isFixed() && output.nonDeferred() && !requiresBufferedFinalStride(output)))) { 706 705 temporaryOutputBuffer[i] = nullptr; 707 706 } else { … … 710 709 ub += mStreamSetOutputBuffers[i]->overflowSize(); 711 710 } 712 temporaryOutputBuffer[i] = b->CreateAlignedAlloca(ty, blockAlignment, b->getSize(roundUp(ub))); 713 Type * const sty = temporaryOutputBuffer[i]->getType()->getPointerElementType(); 714 b->CreateStore(Constant::getNullValue(sty), temporaryOutputBuffer[i]); 711 Type * const ty = mStreamSetOutputBuffers[i]->getStreamSetBlockType(); 712 Constant * arraySize = b->getInt64(roundUp(ub)); 713 AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize); 714 assert (ptr->isStaticAlloca()); 715 temporaryOutputBuffer[i] = ptr; 715 716 } 716 717 } … … 751 752 Value * const ic = b->getProcessedItemCount(name); 752 753 mInitialProcessedItemCount[i] = ic; 753 b->CreateAssert(b->CreateICmpUGE(mAvailableItemCount[i], ic), "processed item count cannot exceed the available item count"); 754 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 755 b->CreateAssert(b->CreateICmpUGE(mAvailableItemCount[i], ic), 756 "processed item count cannot exceed the available item count"); 757 } 754 758 assert (ic->getType() == mAvailableItemCount[i]->getType()); 755 759 Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], ic); 756 757 mStreamSetInputBaseAddress[i] = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH)); 760 Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH)); 758 761 mInitialAvailableItemCount[i] = mAvailableItemCount[i]; 759 762 mAvailableItemCount[i] = b->getLinearlyAccessibleItems(name, ic, unprocessed); … … 761 764 // Are our linearly accessible items sufficient for a stride? 762 765 inputStrideSize[i] = getStrideSize(b, rate); 763 764 766 Value * accessibleStrides = b->CreateUDiv(mAvailableItemCount[i], inputStrideSize[i]); 765 if (!rate.isFixed() || (requiresBufferedFinalStride(input) && input.nonDeferred())) { 767 AllocaInst * const tempBuffer = temporaryInputBuffer[i]; 768 if (tempBuffer) { 766 769 767 770 // Since we trust that the pipeline won't call this kernel unless there is enough data to process a stride, whenever … … 777 780 b->SetInsertPoint(copyFromBack); 778 781 Value * const temporaryAvailable = b->CreateUMin(unprocessed, inputStrideSize[i]); 779 780 b->CreateAssert(b->CreateICmpULE(mAvailableItemCount[i], temporaryAvailable), "linearly available cannot be greater than temporarily available"); 781 Value * const tempBufferPtr = temporaryInputBuffer[i]; 782 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 783 b->CreateAssert(b->CreateICmpULE(mAvailableItemCount[i], temporaryAvailable), 784 "linearly available cannot be greater than temporarily available"); 785 } 782 786 Value * const offset = b->CreateAnd(ic, BLOCK_WIDTH_MASK); 787 Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize()); 788 b->CreateMemZero(tempBuffer, bufferSize, blockAlignment); 783 789 const auto copyAlignment = getItemAlignment(mStreamSetInputs[i]); 784 b->CreateMemZero(tempBufferPtr, ConstantExpr::getSizeOf(tempBufferPtr->getType()), blockAlignment); 785 b->CreateStreamCpy(name, tempBufferPtr, ZERO, mStreamSetInputBaseAddress[i] , offset, mAvailableItemCount[i], copyAlignment); 790 b->CreateStreamCpy(name, tempBuffer, ZERO, baseBuffer, offset, mAvailableItemCount[i], copyAlignment); 786 791 Value * const temporaryStrides = b->CreateSelect(b->CreateICmpULT(unprocessed, inputStrideSize[i]), ZERO, ONE); 787 792 BasicBlock * const copyToBackEnd = b->GetInsertBlock(); … … 791 796 Value * const remaining = b->CreateSub(temporaryAvailable, mAvailableItemCount[i]); 792 797 Value * const baseAddress = b->getBaseAddress(name); 793 b->CreateStreamCpy(name, tempBuffer Ptr, mAvailableItemCount[i], baseAddress, ZERO, remaining, copyAlignment);798 b->CreateStreamCpy(name, tempBuffer, mAvailableItemCount[i], baseAddress, ZERO, remaining, copyAlignment); 794 799 BasicBlock * const copyToFrontEnd = b->GetInsertBlock(); 795 800 b->CreateBr(resume); 796 801 797 802 b->SetInsertPoint(resume); 798 PHINode * const bufferPtr = b->CreatePHI( mStreamSetInputBaseAddress[i]->getType(), 3);799 bufferPtr->addIncoming( mStreamSetInputBaseAddress[i], entry);800 bufferPtr->addIncoming(tempBuffer Ptr, copyToBackEnd);801 bufferPtr->addIncoming(tempBuffer Ptr, copyToFrontEnd);802 mStreamSetInputBaseAddress[i]= bufferPtr;803 PHINode * const bufferPtr = b->CreatePHI(baseBuffer->getType(), 3); 804 bufferPtr->addIncoming(baseBuffer , entry); 805 bufferPtr->addIncoming(tempBuffer, copyToBackEnd); 806 bufferPtr->addIncoming(tempBuffer, copyToFrontEnd); 807 baseBuffer = bufferPtr; 803 808 804 809 PHINode * const phiAvailItemCount = b->CreatePHI(b->getSizeTy(), 3); … … 808 813 mAvailableItemCount[i] = phiAvailItemCount; 809 814 810 PHINode * const phiNumOfStrides = b->CreatePHI(b->getSizeTy(), 2); 811 phiNumOfStrides->addIncoming(accessibleStrides, entry); 812 phiNumOfStrides->addIncoming(temporaryStrides, copyToBackEnd); 813 phiNumOfStrides->addIncoming(temporaryStrides, copyToFrontEnd); 814 accessibleStrides = phiNumOfStrides; 815 } 815 PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2); 816 phiStrides->addIncoming(accessibleStrides, entry); 817 phiStrides->addIncoming(temporaryStrides, copyToBackEnd); 818 phiStrides->addIncoming(temporaryStrides, copyToFrontEnd); 819 accessibleStrides = phiStrides; 820 } 821 822 mStreamSetInputBaseAddress[i] = baseBuffer; 816 823 numOfStrides = b->CreateUMin(numOfStrides, accessibleStrides); 817 824 } … … 819 826 // Now determine the linearly writeable strides 820 827 Value * linearlyWritable[outputSetCount]; 821 Value * baseOutputBuffer[outputSetCount];822 828 Value * outputStrideSize[outputSetCount]; 823 829 mInitialProducedItemCount.resize(outputSetCount); … … 828 834 const ProcessingRate & rate = output.getRate(); 829 835 Value * const ic = b->getProducedItemCount(name); 830 baseOutputBuffer[i] = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH)); 831 assert (baseOutputBuffer[i]->getType()->isPointerTy()); 832 linearlyWritable[i] = b->getLinearlyWritableItems(name, ic); 836 Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH)); 837 assert (baseBuffer->getType()->isPointerTy()); 838 linearlyWritable[i] = b->getLinearlyWritableItems(name, ic); 839 outputStrideSize[i] = getStrideSize(b, rate); 840 // Is the number of linearly writable items sufficient for a stride? 841 if (outputStrideSize[i]) { 842 AllocaInst * const tempBuffer = temporaryOutputBuffer[i]; 843 Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]); 844 // Do we require a temporary buffer to write to? 845 if (tempBuffer) { 846 assert (tempBuffer->getType() == baseBuffer->getType()); 847 BasicBlock * const entry = b->GetInsertBlock(); 848 BasicBlock * const useTemporary = b->CreateBasicBlock(name + "UseTemporary"); 849 BasicBlock * const resume = b->CreateBasicBlock(name + "Resume"); 850 Value * const requiresCopy = b->CreateICmpEQ(writableStrides, ZERO); 851 852 b->CreateUnlikelyCondBr(requiresCopy, useTemporary, resume); 853 854 // Clear the buffer after use since we may end up reusing it within the same stride 855 b->SetInsertPoint(useTemporary); 856 Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize()); 857 b->CreateMemZero(tempBuffer, bufferSize, blockAlignment); 858 b->CreateBr(resume); 859 860 b->SetInsertPoint(resume); 861 PHINode * const phiBuffer = b->CreatePHI(baseBuffer->getType(), 3); 862 phiBuffer->addIncoming(baseBuffer, entry); 863 phiBuffer->addIncoming(tempBuffer, useTemporary); 864 baseBuffer = phiBuffer; 865 PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2); 866 phiStrides->addIncoming(writableStrides, entry); 867 phiStrides->addIncoming(ONE, useTemporary); 868 writableStrides = phiStrides; 869 870 } 871 numOfStrides = b->CreateUMin(numOfStrides, writableStrides); 872 } 833 873 mInitialProducedItemCount[i] = ic; 834 outputStrideSize[i] = nullptr; 835 if (temporaryOutputBuffer[i]) { 836 outputStrideSize[i] = getStrideSize(b, rate); 837 // Is the number of linearly writable items sufficient for a stride? 838 Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]); 839 if (!rate.isFixed() || requiresBufferedFinalStride(output)) { 840 Value * const requiresCopy = b->CreateICmpEQ(writableStrides, ZERO); 841 assert (temporaryOutputBuffer[i]->getType() == baseOutputBuffer[i]->getType()); 842 baseOutputBuffer[i] = b->CreateSelect(requiresCopy, temporaryOutputBuffer[i], baseOutputBuffer[i]); 843 writableStrides = b->CreateSelect(requiresCopy, ONE, writableStrides); 844 } 845 numOfStrides = b->CreateUMin(numOfStrides, writableStrides); 846 assert (temporaryOutputBuffer[i]->getType() == baseOutputBuffer[i]->getType()); 847 } 848 mStreamSetOutputBaseAddress[i] = baseOutputBuffer[i]; 874 mStreamSetOutputBaseAddress[i] = baseBuffer; 849 875 } 850 876 … … 854 880 if (LLVM_LIKELY(numOfStrides != nullptr)) { 855 881 mIsFinal = b->CreateAnd(mIsFinal, b->CreateICmpEQ(numOfStrides, ZERO)); 856 Value * const hasStride = b->CreateOr(b->CreateICmpNE(numOfStrides, ZERO), mIsFinal); 857 b->CreateAssert(hasStride, getName() + " has insufficient input data or output space for one stride"); 882 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 883 Value * const hasStride = b->CreateOr(b->CreateICmpNE(numOfStrides, ZERO), mIsFinal); 884 b->CreateAssert(hasStride, getName() + " has insufficient input data or output space for one stride"); 885 } 858 886 for (unsigned i = 0; i < inputSetCount; ++i) { 859 887 const ProcessingRate & rate = mStreamSetInputs[i].getRate(); … … 908 936 // Copy back data to the actual output buffers. 909 937 for (unsigned i = 0; i < outputSetCount; i++) { 910 Value* const tempBuffer = temporaryOutputBuffer[i];938 AllocaInst * const tempBuffer = temporaryOutputBuffer[i]; 911 939 if (LLVM_UNLIKELY(tempBuffer == nullptr)) { 912 940 continue; 913 941 } 914 Value * const baseBuffer = baseOutputBuffer[i];942 Value * const baseBuffer = mStreamSetOutputBaseAddress[i]; 915 943 assert ("stack corruption likely" && (tempBuffer->getType() == baseBuffer->getType())); 916 944 const auto & name = mStreamSetOutputs[i].getName(); 917 945 BasicBlock * const copyToBack = b->CreateBasicBlock(name + "CopyToBack"); 918 946 BasicBlock * const copyToFront = b->CreateBasicBlock(name + "CopyToFront"); 919 BasicBlock * const clearBuffer = b->CreateBasicBlock(name + "ClearBuffer");920 947 BasicBlock * const resume = b->CreateBasicBlock(name + "ResumeCopyBack"); 921 948 // If we used a temporary buffer, copy it back to the original output buffer … … 930 957 b->CreateStreamCpy(name, baseBuffer, offset, tempBuffer, ZERO, toWrite, alignment); 931 958 // If we required a temporary output buffer, we will probably need to write to the beginning of the buffer as well. 932 b->CreateLikelyCondBr(b->CreateICmpULT(toWrite, newlyProduced), copyToFront, clearBuffer);959 b->CreateLikelyCondBr(b->CreateICmpULT(toWrite, newlyProduced), copyToFront, resume); 933 960 934 961 b->SetInsertPoint(copyToFront); … … 936 963 Value * const baseAddress = b->getBaseAddress(name); 937 964 b->CreateStreamCpy(name, baseAddress, ZERO, tempBuffer, toWrite, remaining, alignment); 938 b->CreateBr(clearBuffer);939 // Clear the buffer after use since we may end up reusing it within the same stride940 b->SetInsertPoint(clearBuffer);941 942 965 b->CreateBr(resume); 943 966 … … 970 993 Value * const avail = mInitialAvailableItemCount[i]; 971 994 Value * const processed = b->getProcessedItemCount(name); 972 b->CreateAssert(b->CreateICmpULE(processed, avail), name + ": processed data cannot exceed available data"); 995 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 996 b->CreateAssert(b->CreateICmpULE(processed, avail), name + ": processed data cannot exceed available data"); 997 } 973 998 Value * const remaining = b->CreateSub(avail, processed); 974 999 Value * const remainingStrides = b->CreateUDiv(remaining, inputStrideSize[i]); … … 987 1012 if (LLVM_LIKELY(outputStrideSize[i] != nullptr)) { 988 1013 Value * const consumed = b->getConsumedItemCount(name); 989 b->CreateAssert(b->CreateICmpULE(consumed, produced), name + ": consumed data cannot exceed produced data"); 1014 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 1015 b->CreateAssert(b->CreateICmpULE(consumed, produced), name + ": consumed data cannot exceed produced data"); 1016 } 990 1017 Value * const unconsumed = b->CreateSub(produced, consumed); 991 1018 Value * const capacity = b->getCapacity(name); 992 b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), name + ": unconsumed data cannot exceed capacity"); 1019 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 1020 b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), name + ": unconsumed data cannot exceed capacity"); 1021 } 993 1022 Value * const remaining = b->CreateSub(capacity, unconsumed); 994 1023 Value * const remainingStrides = b->CreateUDiv(remaining, outputStrideSize[i]); … … 1181 1210 BasicBlock * const doFinalBlock = b->CreateBasicBlock(getName() + "_doFinalBlock"); 1182 1211 BasicBlock * const segmentDone = b->CreateBasicBlock(getName() + "_segmentDone"); 1183 b->CreateAssert(b->CreateXor(b->CreateIsNotNull(numOfBlocks), mIsFinal), 1184 "numOfStrides cannot be 0 unless this is the final stride and must be 0 if it is"); 1212 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 1213 b->CreateAssert(b->CreateXor(b->CreateIsNotNull(numOfBlocks), mIsFinal), 1214 "numOfStrides cannot be 0 unless this is the final stride and must be 0 if it is"); 1215 } 1185 1216 const auto inputSetCount = mStreamSetInputs.size(); 1186 1217 Value * baseProcessedIndex[inputSetCount]; -
icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp
r5755 r5771 2 2 #include <kernels/streamset.h> 3 3 #include <kernels/kernel_builder.h> 4 #include <toolchain/toolchain.h> 5 #include <llvm/Support/Compiler.h> 4 6 5 7 namespace llvm { class Value; } … … 131 133 b->CreateAlignedStore(merge0, b->CreateBitCast(b->CreateGEP(outputPtr, offset), bitBlockPtrTy), 1); 132 134 Value * const nextOffset1 = b->CreateZExt(b->CreateExtractElement(unitCounts, b->getInt32(2 * j)), i32Ty); 133 b->CreateAssert(b->CreateICmpULE(offset, nextOffset1), "deletion offset is not monotonically non-decreasing"); 134 135 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 136 b->CreateAssert(b->CreateICmpULE(offset, nextOffset1), "deletion offset is not monotonically non-decreasing"); 137 } 135 138 Value * const merge1 = b->bitCast(b->esimd_mergeh(8, hi_bytes[j], lo_bytes[j])); 136 139 b->CreateAlignedStore(merge1, b->CreateBitCast(b->CreateGEP(outputPtr, nextOffset1), bitBlockPtrTy), 1); 137 140 Value * const nextOffset2 = b->CreateZExt(b->CreateExtractElement(unitCounts, b->getInt32(2 * j + 1)), i32Ty); 138 b->CreateAssert(b->CreateICmpULE(nextOffset1, nextOffset2), "deletion offset is not monotonically non-decreasing"); 139 141 if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) { 142 b->CreateAssert(b->CreateICmpULE(nextOffset1, nextOffset2), "deletion offset is not monotonically non-decreasing"); 143 } 140 144 offset = nextOffset2; 141 145 } -
icGREP/icgrep-devel/icgrep/kernels/source_kernel.cpp
r5761 r5771 232 232 Value * const bytesRead = b->CreateReadCall(fd, sourceBuffer, bytesToRead); 233 233 Value * const itemsRead = b->CreateUDiv(bytesRead, codeUnitBytes); 234 b->CreateAssert(b->CreateICmpULE(itemsRead, itemsToRead), "read more items than expected");235 234 Value * const itemsBuffered = b->CreateAdd(buffered, itemsRead); 236 235 b->setBufferedSize("sourceBuffer", itemsBuffered); -
icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp
r5733 r5771 429 429 b->SetInsertPoint(resume); 430 430 431 if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) { 432 b->CreateAssertZero(b->CreateOr(finalBorrow, finalCarry), 433 "CarryManager: loop post-condition violated: final borrow and carry must be zero!"); 434 } 431 b->CreateAssertZero(b->CreateOr(finalBorrow, finalCarry), 432 "CarryManager: loop post-condition violated: final borrow and carry must be zero!"); 435 433 436 434 assert (!mLoopIndicies.empty()); -
icGREP/icgrep-devel/icgrep/pablo/carrypack_manager.cpp
r5733 r5771 452 452 iBuilder->SetInsertPoint(resume); 453 453 454 if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) { 455 iBuilder->CreateAssertZero(iBuilder->CreateOr(finalBorrow, finalCarry), 456 "CarryPackManager: loop post-condition violated: final borrow and carry must be zero!"); 457 } 454 iBuilder->CreateAssertZero(iBuilder->CreateOr(finalBorrow, finalCarry), 455 "CarryPackManager: loop post-condition violated: final borrow and carry must be zero!"); 458 456 459 457 assert (!mLoopIndicies.empty()); -
icGREP/icgrep-devel/icgrep/toolchain/object_cache.cpp
r5761 r5771 119 119 if (kernel->hasSignature()) { 120 120 const MDString * const sig = getSignature(M.get()); 121 assert ("signature is missing from kernel file: possible module naming conflict ?" && sig);121 assert ("signature is missing from kernel file: possible module naming conflict or change in the LLVM metadata storage policy?" && sig); 122 122 if (LLVM_UNLIKELY(sig == nullptr || !sig->getString().equals(kernel->makeSignature(idb)))) { 123 123 goto invalid; … … 200 200 201 201 void ParabixObjectCache::performIncrementalCacheCleanupStep() { 202 if (mCacheCleanupIterator != fs::directory_iterator()) { 203 const auto e = mCacheCleanupIterator->path(); 204 mCacheCleanupIterator++; 202 mCleanupMutex.lock(); 203 if (LLVM_UNLIKELY(mCleanupIterator == fs::directory_iterator())) { 204 mCleanupMutex.unlock(); 205 } else { 206 const auto e = mCleanupIterator->path(); 207 mCleanupIterator++; 208 mCleanupMutex.unlock(); 209 205 210 // Simple clean-up policy: files that haven't been touched by the 206 211 // driver in MaxCacheEntryHours are deleted. … … 232 237 sys::fs::create_directories(mCachePath); 233 238 } 234 fs::directory_iterator it(p); 235 mCacheCleanupIterator = it; 239 mCleanupIterator = fs::directory_iterator(p); 236 240 } 237 241 -
icGREP/icgrep-devel/icgrep/toolchain/object_cache.h
r5761 r5771 15 15 #include <vector> 16 16 #include <string> 17 #include <mutex> 17 18 18 19 namespace llvm { class Module; } … … 48 49 void performIncrementalCacheCleanupStep(); 49 50 private: 51 std::mutex mCleanupMutex; 52 boost::filesystem::directory_iterator mCleanupIterator; 50 53 ModuleCache mCachedObject; 51 54 const Path mCachePath; 52 boost::filesystem::directory_iterator mCacheCleanupIterator;53 55 }; 54 56 -
icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp
r5759 r5771 123 123 TargetOptions Options; 124 124 125 const cl::OptionCategory * codegen_flags() {125 const cl::OptionCategory * LLVM_READONLY codegen_flags() { 126 126 return &CodeGenOptions; 127 127 } 128 128 129 bool DebugOptionIsSet(const DebugFlags flag) {129 bool LLVM_READONLY DebugOptionIsSet(const DebugFlags flag) { 130 130 return DebugOptions.isSet(flag); 131 131 } -
icGREP/icgrep-devel/icgrep/toolchain/toolchain.h
r5757 r5771 38 38 namespace codegen { 39 39 40 const llvm::cl::OptionCategory * codegen_flags();40 const llvm::cl::OptionCategory * LLVM_READONLY codegen_flags(); 41 41 42 42 // Command Parameters … … 52 52 }; 53 53 54 bool DebugOptionIsSet(const DebugFlags flag);54 bool LLVM_READONLY DebugOptionIsSet(const DebugFlags flag); 55 55 56 56 extern bool PipelineParallel;
Note: See TracChangeset
for help on using the changeset viewer.