Changeset 5771 for icGREP/icgrep-devel


Ignore:
Timestamp:
Dec 9, 2017, 5:05:16 PM (19 months ago)
Author:
nmedfort
Message:

Minor changes and hopefully a fix for bug exposed by base64 test

Location:
icGREP/icgrep-devel/icgrep
Files:
14 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5761 r5771  
    9393    DataLayout DL(b->getModule());
    9494    IntegerType * const intPtrTy = cast<IntegerType>(DL.getIntPtrType(Ptr->getType()));
    95     Value * sz = ConstantExpr::getTrunc(ConstantExpr::getSizeOf(Base->getAllocatedType()), intPtrTy);
     95    Value * sz = ConstantExpr::getBitCast(ConstantExpr::getSizeOf(Base->getAllocatedType()), intPtrTy);
    9696    if (dyn_cast_or_null<Constant>(Base->getArraySize()) && !cast<Constant>(Base->getArraySize())->isNullValue()) {
    9797        sz = b->CreateMul(sz, b->CreateZExtOrTrunc(Base->getArraySize(), intPtrTy));
     
    966966#endif
    967967
    968 void CBuilder::__CreateAssert(Value * const assertion, const Twine failureMessage) {
     968void CBuilder::__CreateAssert(Value * const assertion, const Twine & failureMessage) {
    969969    if (LLVM_UNLIKELY(isa<Constant>(assertion))) {
    970970        if (LLVM_UNLIKELY(cast<Constant>(assertion)->isNullValue())) {
     
    10821082        IRBuilder<>::CreateCall(function, {assertion, GetString(failureMessage.toStringRef(tmp)), trace, depth});
    10831083    } else { // if assertions are not enabled, make it a compiler assumption.
    1084         IRBuilder<>::CreateAssumption(assertion);
     1084
     1085        // INVESTIGATE: while interesting, this does not seem to produce faster code and only provides a trivial reduction
     1086        // of compiled code size in LLVM 3.8 but nearly doubles compilation time. This may have been improved with later
     1087        // versions of LLVM but it's likely that assumptions ought to be hand placed once they're prove to improve performance.
     1088
     1089        // IRBuilder<>::CreateAssumption(assertion);
    10851090    }
    10861091}
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5761 r5771  
    236236    virtual llvm::StoreInst *  CreateAtomicStoreRelease(llvm::Value * val, llvm::Value * ptr);
    237237
    238     void CreateAssert(llvm::Value * assertion, const llvm::Twine failureMessage) {
     238    void CreateAssert(llvm::Value * assertion, const llvm::Twine & failureMessage) {
    239239        if (LLVM_UNLIKELY(assertion->getType()->isVectorTy())) {
    240240            assertion = CreateBitCast(assertion, getIntNTy(assertion->getType()->getPrimitiveSizeInBits()));
     
    243243    }
    244244
    245     void CreateAssertZero(llvm::Value * assertion, const llvm::Twine failureMessage) {
     245    void CreateAssertZero(llvm::Value * assertion, const llvm::Twine & failureMessage) {
    246246        if (LLVM_UNLIKELY(assertion->getType()->isVectorTy())) {
    247247            assertion = CreateBitCast(assertion, getIntNTy(assertion->getType()->getPrimitiveSizeInBits()));
     
    355355    bool hasAddressSanitizer() const;
    356356
    357     void __CreateAssert(llvm::Value * assertion, const llvm::Twine failureMessage);
     357    void __CreateAssert(llvm::Value * assertion, const llvm::Twine & failureMessage);
    358358
    359359    llvm::Function * LinkFunction(llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5755 r5771  
    5353    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, int32Ty, outputType, nullptr));
    5454    main->setCallingConv(CallingConv::C);
    55     Function::arg_iterator args = main->arg_begin();
     55    auto args = main->arg_begin();
    5656   
    5757    Value * const fileDescriptor = &*(args++);
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5770 r5771  
    8585    const unsigned n = filenames.size();
    8686    mResultStrs.resize(n);
    87     mFileStatus.resize(n);
    88     for (unsigned i = 0; i < n; i++) {
    89         mResultStrs[i] = make_unique<std::stringstream>();
    90         mFileStatus[i] = FileStatus::Pending;
    91     }
     87    mFileStatus.resize(n, FileStatus::Pending);
    9288    inputFiles = filenames;
    9389}
     
    221217    friend class EmitMatchesEngine;
    222218public:
    223     EmitMatch(std::string linePrefix, std::stringstream * strm) : mLinePrefix(linePrefix), mLineCount(0), mTerminated(true), mResultStr(strm) {}
     219    EmitMatch(std::string linePrefix, std::ostringstream & strm) : mLinePrefix(linePrefix), mLineCount(0), mTerminated(true), mResultStr(strm) {}
    224220    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
    225221    void finalize_match(char * buffer_end) override;
     
    228224    size_t mLineCount;
    229225    bool mTerminated;
    230     std::stringstream* mResultStr;
     226    std::ostringstream & mResultStr;
    231227};
    232228
     
    237233void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
    238234    if (WithFilenameFlag) {
    239         *mResultStr << mLinePrefix;
     235        mResultStr << mLinePrefix;
    240236    }
    241237    if (LineNumberFlag) {
     
    243239        // the line number so that lines are numbered from 1.
    244240        if (InitialTabFlag) {
    245             *mResultStr << lineNum+1 << "\t:";
     241            mResultStr << lineNum+1 << "\t:";
    246242        }
    247243        else {
    248             *mResultStr << lineNum+1 << ":";
     244            mResultStr << lineNum+1 << ":";
    249245        }
    250246    }
    251247    size_t bytes = line_end - line_start + 1;
    252     mResultStr->write(line_start, bytes);
     248    mResultStr.write(line_start, bytes);
    253249    mLineCount++;
    254250    unsigned last_byte = *line_end;
     
    268264
    269265void EmitMatch::finalize_match(char * buffer_end) {
    270     if (!mTerminated) *mResultStr << "\n";
     266    if (!mTerminated) mResultStr << "\n";
    271267}
    272268
     
    321317    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
    322318
    323     int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx].get());
     319    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
    324320    if (fileDescriptor == -1) return 0;
    325321
     
    331327uint64_t CountOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
    332328    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
    333     if (WithFilenameFlag) *mResultStrs[fileIdx] << linePrefix(fileName);
    334     *mResultStrs[fileIdx] << grepResult << "\n";
     329    if (WithFilenameFlag) mResultStrs[fileIdx] << linePrefix(fileName);
     330    mResultStrs[fileIdx] << grepResult << "\n";
    335331    return grepResult;
    336332}
     
    348344    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
    349345    if (grepResult == mRequiredCount) {
    350        *mResultStrs[fileIdx] << linePrefix(fileName);
     346       mResultStrs[fileIdx] << linePrefix(fileName);
    351347    }
    352348    return grepResult;
     
    357353    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
    358354
    359     int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx].get());
     355    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
    360356    if (fileDescriptor == -1) return 0;
    361     EmitMatch accum(linePrefix(fileName), mResultStrs[fileIdx].get());
     357    EmitMatch accum(linePrefix(fileName), mResultStrs[fileIdx]);
    362358    f(fileDescriptor, reinterpret_cast<intptr_t>(&accum));
    363359    close(fileDescriptor);
     
    367363
    368364// Open a file and return its file desciptor.
    369 int32_t GrepEngine::openFile(const std::string & fileName, std::stringstream * msgstrm) {
     365int32_t GrepEngine::openFile(const std::string & fileName, std::ostringstream & msgstrm) {
    370366    if (fileName == "-") {
    371367        return STDIN_FILENO;
     
    377373            if (!NoMessagesFlag) {
    378374                if (errno == EACCES) {
    379                     *msgstrm << "icgrep: " << fileName << ": Permission denied.\n";
     375                    msgstrm << "icgrep: " << fileName << ": Permission denied.\n";
    380376                }
    381377                else if (errno == ENOENT) {
    382                     *msgstrm << "icgrep: " << fileName << ": No such file.\n";
     378                    msgstrm << "icgrep: " << fileName << ": No such file.\n";
    383379                }
    384380                else {
    385                     *msgstrm << "icgrep: " << fileName << ": Failed.\n";
     381                    msgstrm << "icgrep: " << fileName << ": Failed.\n";
    386382                }
    387383            }
     
    390386        if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
    391387            if (!NoMessagesFlag) {
    392                 *msgstrm << "icgrep: " << fileName << ": Is a directory.\n";
     388                msgstrm << "icgrep: " << fileName << ": Is a directory.\n";
    393389            }
    394390            close(fileDescriptor);
     
    433429void * GrepEngine::DoGrepThreadMethod() {
    434430
    435     auto fileIdx = mNextFileToGrep++;
     431    unsigned fileIdx = mNextFileToGrep++;
    436432    while (fileIdx < inputFiles.size()) {
    437         const size_t grepResult = doGrep(inputFiles[fileIdx], fileIdx);
     433        const auto grepResult = doGrep(inputFiles[fileIdx], fileIdx);
    438434        mFileStatus[fileIdx] = FileStatus::GrepComplete;
    439435        if (grepResult > 0) {
     
    449445    }
    450446
    451     auto printIdx = mNextFileToPrint++;
     447    unsigned printIdx = mNextFileToPrint++;
    452448    while (printIdx < inputFiles.size()) {
    453449        const bool readyToPrint = ((printIdx == 0) || (mFileStatus[printIdx - 1] == FileStatus::PrintComplete)) && (mFileStatus[printIdx] == FileStatus::GrepComplete);
    454450        if (readyToPrint) {
    455             const auto output = mResultStrs[printIdx]->str();
     451            const auto output = mResultStrs[printIdx].str();
    456452            if (!output.empty()) {
    457                 mWriteMutex.lock();
    458                 std::cout << output;
    459                 mWriteMutex.unlock();
     453                llvm::outs() << output;
    460454            }
    461455            mFileStatus[printIdx] = FileStatus::PrintComplete;
    462456            printIdx = mNextFileToPrint++;
    463457        } else {
    464             mCacheMutex.lock();
    465458            mGrepDriver->performIncrementalCacheCleanupStep();
    466             mCacheMutex.unlock();
    467459        }
    468460        sched_yield();
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5761 r5771  
    99#include <kernels/streamset.h>
    1010#include <toolchain/grep_pipeline.h>
    11 #include <string>       // for string
     11#include <string>
    1212#include <vector>
    1313#include <sstream>
    14 #include <mutex>
    1514#include <atomic>
    1615
     
    4039    virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx);
    4140    std::string linePrefix(std::string fileName);
    42     int32_t openFile(const std::string & fileName, std::stringstream * msgstrm);
     41    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
    4342
    4443    Driver * mGrepDriver;
     
    4746    std::atomic<unsigned> mNextFileToPrint;
    4847    std::vector<std::string> inputFiles;
    49     std::vector<std::unique_ptr<std::stringstream>> mResultStrs;
     48    std::vector<std::ostringstream> mResultStrs;
    5049    std::vector<FileStatus> mFileStatus;
    51     std::mutex mWriteMutex;
    52     std::mutex mCacheMutex;
     50    bool grepMatchFound;
    5351
    54 
    55     bool grepMatchFound;
    56    
    5752    std::string mFileSuffix;
    5853    bool mMoveMatchesToEOL;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5761 r5771  
    680680    // Define and allocate the temporary buffer area in the prolog.
    681681    const auto blockAlignment = b->getBitBlockWidth() / 8;
    682     Value * temporaryInputBuffer[inputSetCount];
     682    AllocaInst * temporaryInputBuffer[inputSetCount];
    683683    for (unsigned i = 0; i < inputSetCount; ++i) {
    684 
    685         // TODO: if this is a fixed rate input stream and the pipeline guarantees it will not call the kernel unless
    686         // there is sufficient input and all buffers will be sized sufficiently for the input, we ought to be able to
    687         // avoid the temporary buffer checks.
    688 
    689         const ProcessingRate & rate = mStreamSetInputs[i].getRate();
    690         Type * const ty = mStreamSetInputBuffers[i]->getStreamSetBlockType();
    691         const auto ub = getUpperBound(rate);
    692         if (ub.numerator() == 0) {
     684        const auto & input = mStreamSetInputs[i];
     685        const ProcessingRate & rate = input.getRate();
     686        if (isTransitivelyUnknownRate(rate)) {
    693687            report_fatal_error("MultiBlock kernels do not support unknown rate input streams or streams relative to an unknown rate input.");
    694         } else {           
    695             temporaryInputBuffer[i] = b->CreateAlignedAlloca(ty, blockAlignment, b->getSize(roundUp(ub)));
    696             Type * const sty = temporaryInputBuffer[i]->getType()->getPointerElementType();
    697             b->CreateStore(Constant::getNullValue(sty), temporaryInputBuffer[i]);
    698         }       
    699     }
    700 
    701     Value * temporaryOutputBuffer[outputSetCount];
     688        } else if (rate.isFixed() && input.nonDeferred() && !requiresBufferedFinalStride(input)) {
     689            temporaryInputBuffer[i] = nullptr;
     690        } else {
     691            Type * const ty = mStreamSetInputBuffers[i]->getStreamSetBlockType();
     692            const auto ub = getUpperBound(rate);
     693            Constant * arraySize = b->getInt64(roundUp(ub));
     694            AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize);
     695            assert (ptr->isStaticAlloca());
     696            temporaryInputBuffer[i] = ptr;
     697        }
     698    }
     699
     700    AllocaInst * temporaryOutputBuffer[outputSetCount];
    702701    for (unsigned i = 0; i < outputSetCount; i++) {
    703         const ProcessingRate & rate = mStreamSetOutputs[i].getRate();
    704         Type * const ty = mStreamSetOutputBuffers[i]->getStreamSetBlockType();
    705         if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate))) {
     702        const auto & output = mStreamSetOutputs[i];
     703        const ProcessingRate & rate = output.getRate();
     704        if (LLVM_UNLIKELY(isTransitivelyUnknownRate(rate) || (rate.isFixed() && output.nonDeferred() && !requiresBufferedFinalStride(output)))) {
    706705            temporaryOutputBuffer[i] = nullptr;
    707706        } else {           
     
    710709                ub += mStreamSetOutputBuffers[i]->overflowSize();
    711710            }
    712             temporaryOutputBuffer[i] = b->CreateAlignedAlloca(ty, blockAlignment, b->getSize(roundUp(ub)));
    713             Type * const sty = temporaryOutputBuffer[i]->getType()->getPointerElementType();
    714             b->CreateStore(Constant::getNullValue(sty), temporaryOutputBuffer[i]);
     711            Type * const ty = mStreamSetOutputBuffers[i]->getStreamSetBlockType();
     712            Constant * arraySize = b->getInt64(roundUp(ub));
     713            AllocaInst * const ptr = b->CreateAlignedAlloca(ty, blockAlignment, arraySize);
     714            assert (ptr->isStaticAlloca());
     715            temporaryOutputBuffer[i] = ptr;
    715716        }
    716717    }
     
    751752        Value * const ic = b->getProcessedItemCount(name);
    752753        mInitialProcessedItemCount[i] = ic;
    753         b->CreateAssert(b->CreateICmpUGE(mAvailableItemCount[i], ic), "processed item count cannot exceed the available item count");
     754        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     755            b->CreateAssert(b->CreateICmpUGE(mAvailableItemCount[i], ic),
     756                            "processed item count cannot exceed the available item count");
     757        }
    754758        assert (ic->getType() == mAvailableItemCount[i]->getType());
    755759        Value * const unprocessed = b->CreateSub(mAvailableItemCount[i], ic);
    756 
    757         mStreamSetInputBaseAddress[i]  = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
     760        Value * baseBuffer  = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
    758761        mInitialAvailableItemCount[i] = mAvailableItemCount[i];
    759762        mAvailableItemCount[i] = b->getLinearlyAccessibleItems(name, ic, unprocessed);
     
    761764        // Are our linearly accessible items sufficient for a stride?
    762765        inputStrideSize[i] = getStrideSize(b, rate);
    763 
    764766        Value * accessibleStrides = b->CreateUDiv(mAvailableItemCount[i], inputStrideSize[i]);
    765         if (!rate.isFixed() || (requiresBufferedFinalStride(input) && input.nonDeferred())) {
     767        AllocaInst * const tempBuffer = temporaryInputBuffer[i];
     768        if (tempBuffer) {
    766769
    767770            // Since we trust that the pipeline won't call this kernel unless there is enough data to process a stride, whenever
     
    777780            b->SetInsertPoint(copyFromBack);
    778781            Value * const temporaryAvailable = b->CreateUMin(unprocessed, inputStrideSize[i]);
    779 
    780             b->CreateAssert(b->CreateICmpULE(mAvailableItemCount[i], temporaryAvailable), "linearly available cannot be greater than temporarily available");
    781             Value * const tempBufferPtr = temporaryInputBuffer[i];
     782            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     783                b->CreateAssert(b->CreateICmpULE(mAvailableItemCount[i], temporaryAvailable),
     784                                "linearly available cannot be greater than temporarily available");
     785            }
    782786            Value * const offset = b->CreateAnd(ic, BLOCK_WIDTH_MASK);
     787            Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
     788            b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
    783789            const auto copyAlignment = getItemAlignment(mStreamSetInputs[i]);
    784             b->CreateMemZero(tempBufferPtr, ConstantExpr::getSizeOf(tempBufferPtr->getType()), blockAlignment);
    785             b->CreateStreamCpy(name, tempBufferPtr, ZERO, mStreamSetInputBaseAddress[i] , offset, mAvailableItemCount[i], copyAlignment);
     790            b->CreateStreamCpy(name, tempBuffer, ZERO, baseBuffer, offset, mAvailableItemCount[i], copyAlignment);
    786791            Value * const temporaryStrides = b->CreateSelect(b->CreateICmpULT(unprocessed, inputStrideSize[i]), ZERO, ONE);
    787792            BasicBlock * const copyToBackEnd = b->GetInsertBlock();
     
    791796            Value * const remaining = b->CreateSub(temporaryAvailable, mAvailableItemCount[i]);
    792797            Value * const baseAddress = b->getBaseAddress(name);
    793             b->CreateStreamCpy(name, tempBufferPtr, mAvailableItemCount[i], baseAddress, ZERO, remaining, copyAlignment);
     798            b->CreateStreamCpy(name, tempBuffer, mAvailableItemCount[i], baseAddress, ZERO, remaining, copyAlignment);
    794799            BasicBlock * const copyToFrontEnd = b->GetInsertBlock();
    795800            b->CreateBr(resume);
    796801
    797802            b->SetInsertPoint(resume);
    798             PHINode * const bufferPtr = b->CreatePHI(mStreamSetInputBaseAddress[i] ->getType(), 3);
    799             bufferPtr->addIncoming(mStreamSetInputBaseAddress[i] , entry);
    800             bufferPtr->addIncoming(tempBufferPtr, copyToBackEnd);
    801             bufferPtr->addIncoming(tempBufferPtr, copyToFrontEnd);
    802             mStreamSetInputBaseAddress[i] = bufferPtr;
     803            PHINode * const bufferPtr = b->CreatePHI(baseBuffer->getType(), 3);
     804            bufferPtr->addIncoming(baseBuffer , entry);
     805            bufferPtr->addIncoming(tempBuffer, copyToBackEnd);
     806            bufferPtr->addIncoming(tempBuffer, copyToFrontEnd);
     807            baseBuffer = bufferPtr;
    803808
    804809            PHINode * const phiAvailItemCount = b->CreatePHI(b->getSizeTy(), 3);
     
    808813            mAvailableItemCount[i] = phiAvailItemCount;
    809814
    810             PHINode * const phiNumOfStrides = b->CreatePHI(b->getSizeTy(), 2);
    811             phiNumOfStrides->addIncoming(accessibleStrides, entry);
    812             phiNumOfStrides->addIncoming(temporaryStrides, copyToBackEnd);
    813             phiNumOfStrides->addIncoming(temporaryStrides, copyToFrontEnd);
    814             accessibleStrides = phiNumOfStrides;
    815         }
     815            PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
     816            phiStrides->addIncoming(accessibleStrides, entry);
     817            phiStrides->addIncoming(temporaryStrides, copyToBackEnd);
     818            phiStrides->addIncoming(temporaryStrides, copyToFrontEnd);
     819            accessibleStrides = phiStrides;
     820        }
     821
     822        mStreamSetInputBaseAddress[i] = baseBuffer;
    816823        numOfStrides = b->CreateUMin(numOfStrides, accessibleStrides);
    817824    }
     
    819826    // Now determine the linearly writeable strides
    820827    Value * linearlyWritable[outputSetCount];
    821     Value * baseOutputBuffer[outputSetCount];
    822828    Value * outputStrideSize[outputSetCount];
    823829    mInitialProducedItemCount.resize(outputSetCount);
     
    828834        const ProcessingRate & rate = output.getRate();
    829835        Value * const ic = b->getProducedItemCount(name);
    830         baseOutputBuffer[i] = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
    831         assert (baseOutputBuffer[i]->getType()->isPointerTy());
    832         linearlyWritable[i] = b->getLinearlyWritableItems(name, ic);
     836        Value * baseBuffer = b->getBlockAddress(name, b->CreateLShr(ic, LOG_2_BLOCK_WIDTH));
     837        assert (baseBuffer->getType()->isPointerTy());
     838        linearlyWritable[i] = b->getLinearlyWritableItems(name, ic);       
     839        outputStrideSize[i] = getStrideSize(b, rate);
     840        // Is the number of linearly writable items sufficient for a stride?
     841        if (outputStrideSize[i]) {
     842            AllocaInst * const tempBuffer = temporaryOutputBuffer[i];
     843            Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]);
     844            // Do we require a temporary buffer to write to?
     845            if (tempBuffer) {
     846                assert (tempBuffer->getType() == baseBuffer->getType());
     847                BasicBlock * const entry = b->GetInsertBlock();
     848                BasicBlock * const useTemporary = b->CreateBasicBlock(name + "UseTemporary");
     849                BasicBlock * const resume = b->CreateBasicBlock(name + "Resume");
     850                Value * const requiresCopy = b->CreateICmpEQ(writableStrides, ZERO);
     851
     852                b->CreateUnlikelyCondBr(requiresCopy, useTemporary, resume);
     853
     854                // Clear the buffer after use since we may end up reusing it within the same stride
     855                b->SetInsertPoint(useTemporary);
     856                Value * const bufferSize = b->CreateMul(ConstantExpr::getSizeOf(tempBuffer->getAllocatedType()), tempBuffer->getArraySize());
     857                b->CreateMemZero(tempBuffer, bufferSize, blockAlignment);
     858                b->CreateBr(resume);
     859
     860                b->SetInsertPoint(resume);
     861                PHINode * const phiBuffer = b->CreatePHI(baseBuffer->getType(), 3);
     862                phiBuffer->addIncoming(baseBuffer, entry);
     863                phiBuffer->addIncoming(tempBuffer, useTemporary);
     864                baseBuffer = phiBuffer;
     865                PHINode * const phiStrides = b->CreatePHI(b->getSizeTy(), 2);
     866                phiStrides->addIncoming(writableStrides, entry);
     867                phiStrides->addIncoming(ONE, useTemporary);
     868                writableStrides = phiStrides;
     869
     870            }
     871            numOfStrides = b->CreateUMin(numOfStrides, writableStrides);
     872        }
    833873        mInitialProducedItemCount[i] = ic;
    834         outputStrideSize[i] = nullptr;
    835         if (temporaryOutputBuffer[i]) {
    836             outputStrideSize[i] = getStrideSize(b, rate);
    837             // Is the number of linearly writable items sufficient for a stride?
    838             Value * writableStrides = b->CreateUDiv(linearlyWritable[i], outputStrideSize[i]);
    839             if (!rate.isFixed() || requiresBufferedFinalStride(output)) {
    840                 Value * const requiresCopy = b->CreateICmpEQ(writableStrides, ZERO);
    841                 assert (temporaryOutputBuffer[i]->getType() == baseOutputBuffer[i]->getType());
    842                 baseOutputBuffer[i] = b->CreateSelect(requiresCopy, temporaryOutputBuffer[i], baseOutputBuffer[i]);
    843                 writableStrides = b->CreateSelect(requiresCopy, ONE, writableStrides);
    844             }
    845             numOfStrides = b->CreateUMin(numOfStrides, writableStrides);
    846             assert (temporaryOutputBuffer[i]->getType() == baseOutputBuffer[i]->getType());
    847         }
    848         mStreamSetOutputBaseAddress[i] = baseOutputBuffer[i];
     874        mStreamSetOutputBaseAddress[i] = baseBuffer;
    849875    }
    850876
     
    854880    if (LLVM_LIKELY(numOfStrides != nullptr)) {
    855881        mIsFinal = b->CreateAnd(mIsFinal, b->CreateICmpEQ(numOfStrides, ZERO));
    856         Value * const hasStride = b->CreateOr(b->CreateICmpNE(numOfStrides, ZERO), mIsFinal);
    857         b->CreateAssert(hasStride, getName() + " has insufficient input data or output space for one stride");
     882        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     883            Value * const hasStride = b->CreateOr(b->CreateICmpNE(numOfStrides, ZERO), mIsFinal);
     884            b->CreateAssert(hasStride, getName() + " has insufficient input data or output space for one stride");
     885        }
    858886        for (unsigned i = 0; i < inputSetCount; ++i) {
    859887            const ProcessingRate & rate = mStreamSetInputs[i].getRate();
     
    908936    // Copy back data to the actual output buffers.
    909937    for (unsigned i = 0; i < outputSetCount; i++) {
    910         Value * const tempBuffer = temporaryOutputBuffer[i];
     938        AllocaInst * const tempBuffer = temporaryOutputBuffer[i];
    911939        if (LLVM_UNLIKELY(tempBuffer == nullptr)) {
    912940            continue;
    913941        }
    914         Value * const baseBuffer = baseOutputBuffer[i];
     942        Value * const baseBuffer = mStreamSetOutputBaseAddress[i];
    915943        assert ("stack corruption likely" && (tempBuffer->getType() == baseBuffer->getType()));
    916944        const auto & name = mStreamSetOutputs[i].getName();
    917945        BasicBlock * const copyToBack = b->CreateBasicBlock(name + "CopyToBack");
    918946        BasicBlock * const copyToFront = b->CreateBasicBlock(name + "CopyToFront");
    919         BasicBlock * const clearBuffer = b->CreateBasicBlock(name + "ClearBuffer");
    920947        BasicBlock * const resume = b->CreateBasicBlock(name + "ResumeCopyBack");
    921948        // If we used a temporary buffer, copy it back to the original output buffer
     
    930957        b->CreateStreamCpy(name, baseBuffer, offset, tempBuffer, ZERO, toWrite, alignment);
    931958        // If we required a temporary output buffer, we will probably need to write to the beginning of the buffer as well.
    932         b->CreateLikelyCondBr(b->CreateICmpULT(toWrite, newlyProduced), copyToFront, clearBuffer);
     959        b->CreateLikelyCondBr(b->CreateICmpULT(toWrite, newlyProduced), copyToFront, resume);
    933960
    934961        b->SetInsertPoint(copyToFront);
     
    936963        Value * const baseAddress = b->getBaseAddress(name);
    937964        b->CreateStreamCpy(name, baseAddress, ZERO, tempBuffer, toWrite, remaining, alignment);
    938         b->CreateBr(clearBuffer);
    939         // Clear the buffer after use since we may end up reusing it within the same stride
    940         b->SetInsertPoint(clearBuffer);
    941 
    942965        b->CreateBr(resume);
    943966
     
    970993        Value * const avail = mInitialAvailableItemCount[i];
    971994        Value * const processed = b->getProcessedItemCount(name);
    972         b->CreateAssert(b->CreateICmpULE(processed, avail), name + ": processed data cannot exceed available data");
     995        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     996            b->CreateAssert(b->CreateICmpULE(processed, avail), name + ": processed data cannot exceed available data");
     997        }
    973998        Value * const remaining = b->CreateSub(avail, processed);
    974999        Value * const remainingStrides = b->CreateUDiv(remaining, inputStrideSize[i]);
     
    9871012        if (LLVM_LIKELY(outputStrideSize[i] != nullptr)) {
    9881013            Value * const consumed = b->getConsumedItemCount(name);
    989             b->CreateAssert(b->CreateICmpULE(consumed, produced), name + ": consumed data cannot exceed produced data");
     1014            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     1015                b->CreateAssert(b->CreateICmpULE(consumed, produced), name + ": consumed data cannot exceed produced data");
     1016            }
    9901017            Value * const unconsumed = b->CreateSub(produced, consumed);
    9911018            Value * const capacity = b->getCapacity(name);
    992             b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), name + ": unconsumed data cannot exceed capacity");
     1019            if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     1020                b->CreateAssert(b->CreateICmpULE(unconsumed, capacity), name + ": unconsumed data cannot exceed capacity");
     1021            }
    9931022            Value * const remaining = b->CreateSub(capacity, unconsumed);
    9941023            Value * const remainingStrides = b->CreateUDiv(remaining, outputStrideSize[i]);
     
    11811210    BasicBlock * const doFinalBlock = b->CreateBasicBlock(getName() + "_doFinalBlock");
    11821211    BasicBlock * const segmentDone = b->CreateBasicBlock(getName() + "_segmentDone");
    1183     b->CreateAssert(b->CreateXor(b->CreateIsNotNull(numOfBlocks), mIsFinal),
    1184                     "numOfStrides cannot be 0 unless this is the final stride and must be 0 if it is");
     1212    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     1213        b->CreateAssert(b->CreateXor(b->CreateIsNotNull(numOfBlocks), mIsFinal),
     1214                        "numOfStrides cannot be 0 unless this is the final stride and must be 0 if it is");
     1215    }
    11851216    const auto inputSetCount = mStreamSetInputs.size();
    11861217    Value * baseProcessedIndex[inputSetCount];
  • icGREP/icgrep-devel/icgrep/kernels/p2s_kernel.cpp

    r5755 r5771  
    22#include <kernels/streamset.h>
    33#include <kernels/kernel_builder.h>
     4#include <toolchain/toolchain.h>
     5#include <llvm/Support/Compiler.h>
    46
    57namespace llvm { class Value; }
     
    131133        b->CreateAlignedStore(merge0, b->CreateBitCast(b->CreateGEP(outputPtr, offset), bitBlockPtrTy), 1);
    132134        Value * const nextOffset1 = b->CreateZExt(b->CreateExtractElement(unitCounts, b->getInt32(2 * j)), i32Ty);
    133         b->CreateAssert(b->CreateICmpULE(offset, nextOffset1), "deletion offset is not monotonically non-decreasing");
    134 
     135        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     136            b->CreateAssert(b->CreateICmpULE(offset, nextOffset1), "deletion offset is not monotonically non-decreasing");
     137        }
    135138        Value * const merge1 = b->bitCast(b->esimd_mergeh(8, hi_bytes[j], lo_bytes[j]));
    136139        b->CreateAlignedStore(merge1, b->CreateBitCast(b->CreateGEP(outputPtr, nextOffset1), bitBlockPtrTy), 1);
    137140        Value * const nextOffset2 = b->CreateZExt(b->CreateExtractElement(unitCounts, b->getInt32(2 * j + 1)), i32Ty);
    138         b->CreateAssert(b->CreateICmpULE(nextOffset1, nextOffset2), "deletion offset is not monotonically non-decreasing");
    139 
     141        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
     142            b->CreateAssert(b->CreateICmpULE(nextOffset1, nextOffset2), "deletion offset is not monotonically non-decreasing");
     143        }
    140144        offset = nextOffset2;
    141145    }
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.cpp

    r5761 r5771  
    232232    Value * const bytesRead = b->CreateReadCall(fd, sourceBuffer, bytesToRead);
    233233    Value * const itemsRead = b->CreateUDiv(bytesRead, codeUnitBytes);
    234     b->CreateAssert(b->CreateICmpULE(itemsRead, itemsToRead), "read more items than expected");
    235234    Value * const itemsBuffered = b->CreateAdd(buffered, itemsRead);
    236235    b->setBufferedSize("sourceBuffer", itemsBuffered);
  • icGREP/icgrep-devel/icgrep/pablo/carry_manager.cpp

    r5733 r5771  
    429429        b->SetInsertPoint(resume);
    430430
    431         if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    432             b->CreateAssertZero(b->CreateOr(finalBorrow, finalCarry),
    433                                        "CarryManager: loop post-condition violated: final borrow and carry must be zero!");
    434         }
     431        b->CreateAssertZero(b->CreateOr(finalBorrow, finalCarry),
     432                                   "CarryManager: loop post-condition violated: final borrow and carry must be zero!");
    435433
    436434        assert (!mLoopIndicies.empty());
  • icGREP/icgrep-devel/icgrep/pablo/carrypack_manager.cpp

    r5733 r5771  
    452452        iBuilder->SetInsertPoint(resume);
    453453
    454         if (codegen::DebugOptionIsSet(codegen::EnableAsserts)) {
    455             iBuilder->CreateAssertZero(iBuilder->CreateOr(finalBorrow, finalCarry),
    456                                        "CarryPackManager: loop post-condition violated: final borrow and carry must be zero!");
    457         }
     454        iBuilder->CreateAssertZero(iBuilder->CreateOr(finalBorrow, finalCarry),
     455                                   "CarryPackManager: loop post-condition violated: final borrow and carry must be zero!");
    458456
    459457        assert (!mLoopIndicies.empty());
  • icGREP/icgrep-devel/icgrep/toolchain/object_cache.cpp

    r5761 r5771  
    119119                if (kernel->hasSignature()) {
    120120                    const MDString * const sig = getSignature(M.get());
    121                     assert ("signature is missing from kernel file: possible module naming conflict?" && sig);
     121                    assert ("signature is missing from kernel file: possible module naming conflict or change in the LLVM metadata storage policy?" && sig);
    122122                    if (LLVM_UNLIKELY(sig == nullptr || !sig->getString().equals(kernel->makeSignature(idb)))) {
    123123                        goto invalid;
     
    200200
    201201void ParabixObjectCache::performIncrementalCacheCleanupStep() {
    202     if (mCacheCleanupIterator != fs::directory_iterator()) {
    203         const auto e = mCacheCleanupIterator->path();
    204         mCacheCleanupIterator++;
     202    mCleanupMutex.lock();
     203    if (LLVM_UNLIKELY(mCleanupIterator == fs::directory_iterator())) {
     204        mCleanupMutex.unlock();
     205    } else {
     206        const auto e = mCleanupIterator->path();
     207        mCleanupIterator++;
     208        mCleanupMutex.unlock();
     209
    205210        // Simple clean-up policy: files that haven't been touched by the
    206211        // driver in MaxCacheEntryHours are deleted.
     
    232237        sys::fs::create_directories(mCachePath);
    233238    }
    234     fs::directory_iterator it(p);
    235     mCacheCleanupIterator = it;
     239    mCleanupIterator = fs::directory_iterator(p);
    236240}
    237241
  • icGREP/icgrep-devel/icgrep/toolchain/object_cache.h

    r5761 r5771  
    1515#include <vector>
    1616#include <string>
     17#include <mutex>
    1718
    1819namespace llvm { class Module; }
     
    4849    void performIncrementalCacheCleanupStep();
    4950private:
     51    std::mutex mCleanupMutex;
     52    boost::filesystem::directory_iterator mCleanupIterator;
    5053    ModuleCache mCachedObject;
    5154    const Path mCachePath;
    52     boost::filesystem::directory_iterator mCacheCleanupIterator;
    5355};
    5456
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp

    r5759 r5771  
    123123TargetOptions Options;
    124124
    125 const cl::OptionCategory * codegen_flags() {
     125const cl::OptionCategory * LLVM_READONLY codegen_flags() {
    126126    return &CodeGenOptions;
    127127}
    128128
    129 bool DebugOptionIsSet(const DebugFlags flag) {
     129bool LLVM_READONLY DebugOptionIsSet(const DebugFlags flag) {
    130130    return DebugOptions.isSet(flag);
    131131}
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.h

    r5757 r5771  
    3838namespace codegen {
    3939
    40 const llvm::cl::OptionCategory * codegen_flags();
     40const llvm::cl::OptionCategory * LLVM_READONLY codegen_flags();
    4141
    4242// Command Parameters
     
    5252};
    5353
    54 bool DebugOptionIsSet(const DebugFlags flag);
     54bool LLVM_READONLY DebugOptionIsSet(const DebugFlags flag);
    5555
    5656extern bool PipelineParallel;
Note: See TracChangeset for help on using the changeset viewer.