Changeset 5998


Ignore:
Timestamp:
Apr 28, 2018, 3:54:43 PM (11 months ago)
Author:
nmedfort
Message:

Added temporary buffer functionality to the pipeline for single stream source buffers. Fixed memory leak from UCD::UnicodeBreakRE()

Location:
icGREP/icgrep-devel/icgrep
Files:
18 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5988 r5998  
    167167}
    168168
    169 Value * CBuilder::CreateRoundUp(Value * const number, Value * const divisor, const Twine &Name) {
     169Value * CBuilder::CreateRoundUp(Value * const number, Value * const divisor, const Twine & Name) {
     170    if (isa<ConstantInt>(divisor)) {
     171        const auto d = cast<ConstantInt>(divisor)->getZExtValue();
     172        if (is_power_2(d)) {
     173            Constant * const ONE = ConstantInt::get(divisor->getType(), 1);
     174            Constant * const toAdd = ConstantExpr::getSub(cast<ConstantInt>(divisor), ONE);
     175            return CreateAnd(CreateAdd(number, toAdd), ConstantExpr::getNeg(cast<ConstantInt>(divisor)));
     176        }
     177    }
    170178    return CreateMul(CreateCeilUDiv(number, divisor), divisor, Name);
    171179}
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r5938 r5998  
    3333void UnicodePropertyExpressionError(std::string errmsg) {
    3434    llvm::report_fatal_error(errmsg);
    35 }
    36    
    37    
    38 RE * UnicodeBreakRE() {
    39     return makeAlt({makeCC(0x0A, 0x0C), makeCC(0x85), makeCC(0x2028,0x2029), makeSeq({makeCC(0x0D), makeNegativeLookAheadAssertion(makeCC(0x0A))})});
    4035}
    4136
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.h

    r5880 r5998  
    1515LLVM_ATTRIBUTE_NORETURN void UnicodePropertyExpressionError(std::string errmsg);
    1616
    17 re::RE * UnicodeBreakRE();
    1817bool resolvePropertyDefinition(re::Name * const property);
    1918std::string resolvePropertyFunction(re::Name * const property);
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r5992 r5998  
    4040#include <re/re_multiplex.h>
    4141#include <re/grapheme_clusters.h>
     42#include <re/re_utility.h>
    4243#include <re/printer_re.h>
    4344#include <toolchain/toolchain.h>
     
    159160    mEngineThread(pthread_self()) {}
    160161
    161 GrepEngine::~GrepEngine() {
    162     //delete mGrepDriver;
    163 }
    164 
    165162QuietModeEngine::QuietModeEngine() : GrepEngine() {
    166163    mEngineKind = EngineKind::QuietMode;
     
    214211    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
    215212        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
    216         anchorName->setDefinition(UCD::UnicodeBreakRE());
     213        anchorName->setDefinition(re::makeUnicodeBreak());
    217214        anchorRE = anchorName;
    218215    }
     
    778775    mGrepDriver(make_unique<ParabixDriver>("InternalEngine")) {}
    779776   
    780 InternalSearchEngine::~InternalSearchEngine() {
    781 }
    782 
    783777void InternalSearchEngine::grepCodeGen(re::RE * matchingRE, re::RE * excludedRE, MatchAccumulator * accum) {
    784778    auto & idb = mGrepDriver->getBuilder();
     
    894888}
    895889
    896 }
     890GrepEngine::~GrepEngine() { }
     891
     892InternalSearchEngine::~InternalSearchEngine() { }
     893
     894}
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.h

    r5994 r5998  
    5555public:
    5656
     57    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
     58
    5759    GrepEngine();
    58     virtual ~GrepEngine();
     60
     61    virtual ~GrepEngine() = 0;
    5962   
    6063    void setPreferMMap() {mPreferMMap = true;}
     
    8689    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
    8790
    88     enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
     91    std::string linePrefix(std::string fileName);
     92
     93protected:
     94
    8995    EngineKind mEngineKind;
    90    
    91     std::string linePrefix(std::string fileName);
    92 
    9396    bool mSuppressFileMessages;
    9497    argv::BinaryFilesMode mBinaryFilesMode;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5985 r5998  
    2828class Kernel : public KernelInterface {
    2929    friend class KernelBuilder;
     30    friend class PipelineGenerator;
    3031public:
    3132   
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.cpp

    r5993 r5998  
    296296    Constant * const overflowSize = ConstantExpr::getSizeOf(buf->getType());
    297297    CreateMemCpy(target, source, overflowSize, getBitBlockWidth() / 8);
     298}
     299
     300/** ------------------------------------------------------------------------------------------------------------- *
     301 * @brief AcquireTemporaryBuffer
     302 ** ------------------------------------------------------------------------------------------------------------- */
     303std::pair<Value *, Value *> KernelBuilder::AcquireTemporaryBuffer(const std::string & name, Value * offset, Value * itemsToCopy) {
     304    const StreamSetBuffer * const buf = mKernel->getAnyStreamSetBuffer(name);
     305    const auto itemWidth = getItemWidth(buf->getBaseType());
     306    const Binding & binding = mKernel->getBinding(name);
     307    if (LLVM_UNLIKELY(!binding.getRate().isFixed())) {
     308        Constant * const BIT_BLOCK_WIDTH = ConstantInt::get(offset->getType(), getBitBlockWidth());
     309        Value * const alignedOffset = CreateAnd(offset, CreateNeg(BIT_BLOCK_WIDTH));
     310        itemsToCopy = CreateAdd(itemsToCopy, CreateSub(offset, alignedOffset));
     311        offset = alignedOffset;
     312    }
     313    Value * bytesToCopy = itemsToCopy;
     314    if (itemWidth < 8) {
     315        bytesToCopy = CreateCeilUDiv(itemsToCopy, getSize(8 / itemWidth));
     316    } else if (itemWidth > 8) {
     317        bytesToCopy = CreateMul(itemsToCopy, getSize(itemWidth / 8));
     318    }
     319    Constant * const baseSize = ConstantExpr::getTrunc(ConstantExpr::getSizeOf(buf->getStreamSetBlockType()), getSizeTy());
     320    Constant * const itemsConsumedPerIteration = getSize(std::max(ceiling(mKernel->getUpperBound(binding.getRate())), 1U));
     321    Constant * const paddedSize =  ConstantExpr::getMul(baseSize, itemsConsumedPerIteration);
     322
     323    // one is added to bytes to copy to ensure that the stream is "zero-extended" by one block to properly handle any
     324    // final block processing.o
     325    Value * const size = CreateRoundUp(CreateAdd(bytesToCopy, getSize(1)), paddedSize);
     326    Value * const handle = getStreamHandle(name);
     327    Value * const base = buf->getBaseAddress(this, handle);
     328    Value * const buffer = CreateAlignedMalloc(size, getCacheAlignment());
     329    // TODO: handle split copy? currently no SourceBuffers could support it and I'm not sure how useful it'd be to do so.
     330    Value * const from = buf->getRawItemPointer(this, handle, offset);
     331    CreateMemCpy(buffer, from, bytesToCopy, 1);
     332    CreateMemZero(CreateGEP(buffer, bytesToCopy), CreateSub(size, bytesToCopy), 1);
     333    // get the difference between our base and from position then compute an offsetted temporary buffer address
     334    Value * const diff = CreatePtrDiff(CreatePointerCast(base, from->getType()), from);
     335    Value * const offsettedBuffer = CreatePointerCast(CreateGEP(buffer, diff), base->getType());
     336    buf->setBaseAddress(this, handle, offsettedBuffer);
     337    Value * const tempBuffer = CreatePointerCast(buffer, base->getType());
     338    return std::make_pair(base, tempBuffer);
    298339}
    299340
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r5985 r5998  
    153153    void CreateCopyToOverflow(const std::string & name);
    154154
     155    std::pair<llvm::Value *, llvm::Value *> AcquireTemporaryBuffer(const std::string & name, llvm::Value * const offset, llvm::Value * const itemsToCopy);
     156
    155157    void setBaseAddress(const std::string & name, llvm::Value * addr);
    156158
  • icGREP/icgrep-devel/icgrep/kernels/multiblock_kernel.cpp

    r5985 r5998  
    492492        #endif
    493493        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    494             Value * const processed = b->getProcessedItemCount(input.getName());
     494            Value * const processed = b->getNonDeferredProcessedItemCount(input);
    495495            Value * const newlyProcessed = b->CreateSub(processed, mInitialProcessedItemCount[i]);
    496496            Value * const withinCapacity = b->CreateICmpULE(newlyProcessed, mAccessibleInputItems[i]);
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5985 r5998  
    3939// of bytes to the actual output stream.
    4040
    41 //void expand3_4Kernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &b, Value * const numOfStrides) {
    42 
    43 //    BasicBlock * expand2_3entry = b->GetInsertBlock();
    44 //    BasicBlock * expand_3_4_loop = b->CreateBasicBlock("expand_3_4_loop");
    45 //    BasicBlock * expand3_4_exit = b->CreateBasicBlock("expand3_4_exit");
    46 
    47 //    // Determine the require shufflevector constants.
    48 //    const unsigned PACK_SIZE = b->getBitBlockWidth()/8;
    49 
    50 //    ConstantInt * const ZERO = b->getSize(0);
    51 //    ConstantInt * const ONE = b->getSize(1);
    52 //    ConstantInt * const THREE = b->getSize(3);
    53 //    ConstantInt * const FOUR = b->getSize(4);
    54 //    ConstantInt * const SEVEN = b->getSize(7);
    55 
    56 //    // Construct a list of indexes in  the form
    57 //    // 0, 1, 2, 2, 3, 4, 5, 5, 6, 7, 8, 8, ...
    58 //    unsigned sourceByteIndex = 0;
    59 //    unsigned expand3_4_index[PACK_SIZE];
    60 //    for (unsigned i = 0; i < PACK_SIZE; i++) {
    61 //        expand3_4_index[i] = sourceByteIndex;
    62 //        if (i % 4 != 2) sourceByteIndex++;
    63 //    }
    64 //    unsigned const expand3_4_offset[4] = {PACK_SIZE, 3*PACK_SIZE/4, PACK_SIZE/2, PACK_SIZE/4};
    65 //    Value * expand_3_4_shuffle[4];
    66 //    for (unsigned j = 0; j < 4; j++) {
    67 //        std::vector<Constant *> Idxs;
    68 //        for (unsigned i = 0; i < PACK_SIZE; i++) {
    69 //            Idxs.push_back(ConstantInt::get(b->getInt32Ty(), expand3_4_offset[j] + expand3_4_index[i]));
    70 //        }
    71 //        expand_3_4_shuffle[j] = ConstantVector::get(Idxs);
    72 //    }
    73 
    74 
    75 
    76 //    Constant * triplePackSize = b->getSize(3 * PACK_SIZE); // 3 packs per loop.
    77 //    UndefValue * undefPack = UndefValue::get(b->fwVectorType(8));
    78 
    79 //    Value * const numOfBlocks = b->CreateMul(numOfStrides, b->getSize(8));
    80 
    81 //    Value * itemsToDo = mAvailableItemCount[0];
    82 
    83 //    // The main loop processes 3 packs of data at a time.
    84 //    b->CreateBr(expand_3_4_loop);
    85 
    86 //    b->SetInsertPoint(expand_3_4_loop);
    87 //    PHINode * loopItemsRemain = b->CreatePHI(b->getSizeTy(), 2);
    88 //    PHINode * strideOffset = b->CreatePHI(b->getSizeTy(), 2);
    89 //    loopItemsRemain->addIncoming(itemsToDo, expand2_3entry);
    90 //    strideOffset->addIncoming(ZERO, expand2_3entry);
    91 
    92 //    Value * const baseInputOffset = b->CreateMul(strideOffset, THREE);
    93 //    Value * const baseOutputOffset = b->CreateMul(strideOffset, FOUR);
    94 //    Value * carryOver = undefPack;
    95 //    for (unsigned i = 0; i < 3; ++i) {
    96 //        ConstantInt * const index = b->getSize(i);
    97 //        Value * const inputOffset = b->CreateAdd(baseInputOffset, index);
    98 //        Value * const inputPackIndex = b->CreateAnd(inputOffset, SEVEN);
    99 //        Value * const inputBlockOffset = b->CreateLShr(inputOffset, THREE);
    100 //        Value * const input = b->fwCast(8, b->loadInputStreamPack("sourceStream", ZERO, inputPackIndex, inputBlockOffset));
    101 //        Value * const expanded = b->CreateShuffleVector(carryOver, input, expand_3_4_shuffle[i]);
    102 //        Value * const outputOffset = b->CreateAdd(baseOutputOffset, index);
    103 //        Value * const outputPackIndex = b->CreateAnd(outputOffset, SEVEN);
    104 //        Value * const outputBlockOffset = b->CreateLShr(outputOffset, THREE);
    105 //        b->storeOutputStreamPack("expand34Stream", ZERO, outputPackIndex, outputBlockOffset, b->bitCast(expanded));
    106 //        carryOver = input;
    107 //    }
    108 //    Value * expanded = b->CreateShuffleVector(carryOver, undefPack, expand_3_4_shuffle[3]);
    109 //    Value * outputOffset = b->CreateAdd(baseOutputOffset, THREE);
    110 //    Value * const outputPackIndex = b->CreateAnd(outputOffset, SEVEN);
    111 //    Value * const outputBlockOffset = b->CreateLShr(outputOffset, THREE);
    112 //    b->storeOutputStreamPack("expand34Stream", ZERO, outputPackIndex, outputBlockOffset, b->bitCast(expanded));
    113 
    114 //    Value * remainingItems = b->CreateSub(loopItemsRemain, triplePackSize);
    115 
    116 //    loopItemsRemain->addIncoming(remainingItems, expand_3_4_loop);
    117 //    Value * const nextStrideOffset = b->CreateAdd(strideOffset, ONE);
    118 //    strideOffset->addIncoming(nextStrideOffset, expand_3_4_loop);
    119 
    120 //    //Value * continueLoop = b->CreateICmpSGT(remainingItems, ZERO);
    121 //    Value * continueLoop = b->CreateICmpULT(nextStrideOffset, numOfBlocks);
    122 //    b->CreateCondBr(continueLoop, expand_3_4_loop, expand3_4_exit);
    123 
    124 //    b->SetInsertPoint(expand3_4_exit);
    125 
    126 //}
    127 
    12841void expand3_4Kernel::generateMultiBlockLogic(const std::unique_ptr<KernelBuilder> &b, Value * const numOfStrides) {
    12942
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5985 r5998  
    8787}
    8888
    89 void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* b */, Value * /* handle */, Value * /* addr */) const {
     89void StreamSetBuffer::setBaseAddress(IDISA::IDISA_Builder * const /* b */, Value * /* addr */, Value * /* handle */) const {
    9090    report_fatal_error("setBaseAddress is not supported by this buffer type");
    9191}
     
    535535}
    536536
    537 //Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
    538 //    Value * const bufferSize = getBufferedSize(b, handle);
    539 //    assert (bufferSize->getType() == fromPosition->getType());
    540 //    Value * itemsFromBase = b->CreateURem(fromPosition, bufferSize);
    541 //    if (reverse) {
    542 //        Value * bufAvail = b->CreateSelect(b->CreateIsNull(itemsFromBase), bufferSize, itemsFromBase);
    543 //        return b->CreateSelect(b->CreateICmpULT(bufAvail, availItems), bufAvail, availItems);
    544 //    } else {
    545 //        Constant * const overflow = ConstantInt::get(bufBlocks->getType(), mOverflowBlocks * b->getBitBlockWidth() - 1);
    546 //        Value * const linearSpace = b->CreateAdd(bufferSize, overflow);
    547 //        Value * remaining = b->CreateSub(linearSpace, itemsFromBase);
    548 //        return b->CreateSelect(b->CreateICmpULT(availItems, remaining), availItems, remaining);
    549 //    }
    550 //}
    551 
    552 //Value * DynamicBuffer::getLinearlyWritableItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * consumed, bool reverse) const {
    553 //    Value * const bufferSize = getBufferedSize(b, handle);
    554 //    assert (bufferSize->getType() == fromPosition->getType());
    555 //    Value * bufRem = b->CreateURem(fromPosition, bufferSize);
    556 //    if (reverse) {
    557 //        return b->CreateSelect(b->CreateIsNull(bufRem), bufferSize, bufRem);
    558 //    }
    559 //    Constant * const overflow = ConstantInt::get(bufBlocks->getType(), mOverflowBlocks * b->getBitBlockWidth() - 1);
    560 //    Value * const linearSpace = b->CreateAdd(bufferSize, overflow);
    561 //    return b->CreateSub(linearSpace, bufRem);
    562 //}
    563 
    564537Value * DynamicBuffer::getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, Value * const handle, Value * fromPosition, Value * availItems, bool reverse) const {
    565538    Value * const bufferSize = getBufferedSize(b, handle);
     
    594567    return b->CreateSub(limit, fromPosition);
    595568}
    596 
    597 
    598569
    599570Value * DynamicBuffer::getBufferedSize(IDISA::IDISA_Builder * const b, Value * const handle) const {
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5985 r5998  
    7878        return mStreamSetBufferPtr;
    7979    }
    80    
     80
     81    bool supportsCopyBack() const {
     82        return mOverflowBlocks != 0;
     83    }
     84
     85    virtual bool isUnbounded() const {
     86        return false;
     87    }
     88
     89    size_t overflowSize() const {
     90        return mOverflowBlocks;
     91    }
     92
     93    virtual ~StreamSetBuffer() = 0;
     94
     95    kernel::Kernel * getProducer() const {
     96        return mProducer;
     97    }
     98
     99    const std::vector<kernel::Kernel *> & getConsumers() const {
     100        return mConsumers;
     101    }
     102
     103    virtual void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb);
     104
     105    virtual void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const;
     106
     107    llvm::PointerType * getStreamSetPointerType() const {
     108        return getStreamSetBlockType()->getPointerTo(mAddressSpace);
     109    }
     110
     111protected:
     112
    81113    virtual llvm::Type * getStreamSetBlockType() const;
    82    
    83     virtual void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb);
    84 
    85     virtual void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const;
    86114
    87115    virtual llvm::Value * getStreamBlockPtr(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * streamIndex, llvm::Value * blockIndex, const bool readOnly) const;
     
    93121    virtual llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const;
    94122
    95     virtual void setBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * addr, llvm::Value *) const;
    96 
    97     virtual void setBufferedSize(IDISA::IDISA_Builder * const b, llvm::Value * size, llvm::Value *) const;
     123    virtual void setBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * addr) const;
     124
     125    virtual void setBufferedSize(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * size) const;
    98126   
    99127    virtual llvm::Value * getBufferedSize(IDISA::IDISA_Builder * const b, llvm::Value * handle) const;
     
    111139   
    112140    virtual void doubleCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle) const;
    113 
    114     bool supportsCopyBack() const {
    115         return mOverflowBlocks != 0;
    116     }
    117 
    118     virtual bool isUnbounded() const {
    119         return false;
    120     }
    121 
    122     size_t overflowSize() const {
    123         return mOverflowBlocks;
    124     }
    125    
    126     virtual ~StreamSetBuffer() = 0;
    127 
    128     kernel::Kernel * getProducer() const {
    129         return mProducer;
    130     }
    131 
    132     const std::vector<kernel::Kernel *> & getConsumers() const {
    133         return mConsumers;
    134     }
    135 
    136 protected:
    137141
    138142    StreamSetBuffer(BufferKind k, llvm::Type * baseType, llvm::Type * resolvedType, unsigned BufferBlocks, unsigned OverflowBlocks, unsigned AddressSpace);
     
    170174
    171175class SourceBuffer final : public StreamSetBuffer {
     176    friend class kernel::KernelBuilder;
    172177public:
    173178    static inline bool classof(const StreamSetBuffer * b) {
     
    181186    }
    182187
     188protected:
     189
    183190    void setBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * addr) const override;
    184191
     
    201208    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
    202209
    203 protected:
    204    
    205210    enum Field {BaseAddress, BufferedSize, Capacity};
    206211
     
    210215
    211216class ExternalBuffer final : public StreamSetBuffer {
     217    friend class kernel::KernelBuilder;
    212218public:
    213219    static inline bool classof(const StreamSetBuffer * b) {
     
    221227    }
    222228
     229    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     230
     231    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
     232
     233protected:
     234
    223235    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    224236   
    225237    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    226238
    227     void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    228 
    229     void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
    230 
    231239    llvm::Value * getBufferedSize(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    232240
     
    235243
    236244class CircularBuffer : public StreamSetBuffer {
     245    friend class kernel::KernelBuilder;
    237246public:
    238247    static inline bool classof(const StreamSetBuffer * b) {
     
    242251    CircularBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace = 0);
    243252
     253protected:
     254
    244255    llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const final;
    245 
    246 protected:
    247256
    248257    CircularBuffer(const BufferKind kind, const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, size_t overflowBlocks, unsigned AddressSpace);
     
    259268//
    260269class CircularCopybackBuffer final : public CircularBuffer {
     270    friend class kernel::KernelBuilder;
    261271public:
    262272    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::CircularCopybackBuffer;}
     
    270280//
    271281class ExpandableBuffer final : public StreamSetBuffer {
     282    friend class kernel::KernelBuilder;
    272283public:
    273284    static inline bool classof(const StreamSetBuffer * b) {return b->getBufferKind() == BufferKind::ExpandableBuffer;}
     
    275286    ExpandableBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t bufferBlocks, unsigned AddressSpace = 0);
    276287
     288    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     289
     290    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
     291
     292protected:
     293
    277294    llvm::Value * getStreamBlockPtr(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * streamIndex, llvm::Value * blockIndex, const bool readOnly) const override;
    278295
     
    282299   
    283300    llvm::Value * getStreamSetCount(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    284 
    285     void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    286 
    287     void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & kb) const override;
    288 
    289 protected:
    290301
    291302    llvm::Value * getBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
     
    303314// Dynamically allocated circular buffers: TODO: add copyback, swizzle support, dynamic allocation, producer, consumer, length
    304315class DynamicBuffer final : public StreamSetBuffer {
     316
     317    friend class kernel::KernelBuilder;
     318
    305319    /* Dynamic data fields stored in the buffer struct */
    306320    enum Field {BaseAddress, PriorBaseAddress, AllocatedCapacity, WorkingBlocks, Length, ProducedPosition, ConsumedPosition, FieldCount};
     
    321335   
    322336    DynamicBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, llvm::Type * type, size_t initialCapacity, size_t overflowBlocks = 0, unsigned swizzleFactor = 1, unsigned addrSpace = 0);
    323    
     337
     338    void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
     339
     340    void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const override;
     341   
     342protected:
     343
    324344    llvm::Value * getLinearlyAccessibleItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * avail, bool reverse = false) const override;
    325345   
    326346    llvm::Value * getLinearlyWritableItems(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * fromPosition, llvm::Value * consumed, bool reverse = false) const override;
    327    
    328     void allocateBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) override;
    329 
    330     void releaseBuffer(const std::unique_ptr<kernel::KernelBuilder> & b) const override;
    331347
    332348    llvm::Value * getRawItemPointer(IDISA::IDISA_Builder * const b, llvm::Value * handle, llvm::Value * absolutePosition) const override;
     
    336352    void doubleCapacity(IDISA::IDISA_Builder * const b, llvm::Value * handle)  const final;
    337353
    338 protected:
    339354    llvm::Value * getBaseAddress(IDISA::IDISA_Builder * const b, llvm::Value * handle) const override;
    340355   
  • icGREP/icgrep-devel/icgrep/lz4/LZ4GrepGenerator.cpp

    r5987 r5998  
    6363#include <re/re_name_resolve.h>
    6464#include <re/re_name_gather.h>
    65 //#include <re/re_collect_unicodesets.h>
    6665#include <re/re_multiplex.h>
     66#include <re/re_utility.h>
    6767#include <re/grapheme_clusters.h>
    6868#include <re/printer_re.h>
     
    111111    if (mGrepRecordBreak == GrepRecordBreakKind::Unicode) {
    112112        re::Name * anchorName = re::makeName("UTF8_LB", re::Name::Type::Unicode);
    113         anchorName->setDefinition(UCD::UnicodeBreakRE());
     113        anchorName->setDefinition(re::makeUnicodeBreak());
    114114        anchorRE = anchorName;
    115115    }
  • icGREP/icgrep-devel/icgrep/re/re_utility.cpp

    r5267 r5998  
    5858}
    5959
     60RE * makeUnicodeBreak() {
     61    return makeAlt({makeCC(0x0A, 0x0C), makeCC(0x85), makeCC(0x2028,0x2029), makeSeq({makeCC(0x0D), makeNegativeLookAheadAssertion(makeCC(0x0A))})});
     62}
    6063   
    6164}
  • icGREP/icgrep-devel/icgrep/re/re_utility.h

    r5267 r5998  
    2222Name * makeWhitespaceSet();
    2323Name * makeWordSet();
     24RE * makeUnicodeBreak();
    2425
    2526}
  • icGREP/icgrep-devel/icgrep/toolchain/driver.cpp

    r5755 r5998  
    2323    }
    2424}
     25
     26Driver::~Driver() {}
  • icGREP/icgrep-devel/icgrep/toolchain/driver.h

    r5856 r5998  
    1717public:
    1818    Driver(std::string && moduleName);
    19 
    20     virtual ~Driver() = default;
    2119
    2220    const std::unique_ptr<kernel::KernelBuilder> & getBuilder() {
     
    5856    virtual void performIncrementalCacheCleanupStep() = 0;
    5957
     58    virtual ~Driver() = 0;
     59
    6060protected:
    6161
  • icGREP/icgrep-devel/icgrep/toolchain/pipeline.cpp

    r5996 r5998  
    8484    ChannelGraph pruneGraph(ChannelGraph && G, VertexList && V) const;
    8585
    86     void checkIfAllInputKernelsAreTerminated(const std::unique_ptr<KernelBuilder> & b, const unsigned index);
     86    bool isPotentiallyUnsafeInputBuffer(const StreamSetBuffer * const buffer);
     87
     88    void allocateTemporaryBufferPointerArray(const std::unique_ptr<KernelBuilder> & b, const Kernel * const kernel);
     89
     90    void checkIfAllInputKernelsHaveFinished(const std::unique_ptr<KernelBuilder> & b, const unsigned index);
    8791
    8892    void checkAvailableInputData(const std::unique_ptr<KernelBuilder> & b, const unsigned index);
     
    9498    Value * callKernel(const std::unique_ptr<KernelBuilder> & b, const unsigned index);
    9599
    96     void applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & b, const unsigned index);
     100    void applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & b, const Kernel *kernel);
     101
     102    void runKernel(const std::unique_ptr<KernelBuilder> & b, const Kernel * const kernel);
     103
     104    void allocateTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel, Value * const requiresTemporaryBuffers);
     105
     106    void freeTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel, Value * const requiresTemporaryBuffers);
    97107
    98108    Value * getFullyProcessedItemCount(const std::unique_ptr<KernelBuilder> & b, const Binding & binding, Value * const final) const;
     
    112122    ChannelGraph                        inputGraph;
    113123    ChannelGraph                        outputGraph;
     124
     125    std::vector<Value *>                temporaryBufferPtrs;
    114126
    115127    BasicBlock *                        kernelFinished;
     
    709721    b->SetInsertPoint(kernelCode);
    710722
    711     checkIfAllInputKernelsAreTerminated(b, index);
    712 
    713     checkAvailableInputData(b, index);
    714 
    715     checkAvailableOutputSpace(b, index);
    716 
    717     applyOutputBufferExpansions(b, index);
    718 
    719723    Value * const finalStride = callKernel(b, index);
    720724
     
    765769
    766770
     771    // If this kernel is the last consumer of a input buffer, update the consumed count for that buffer.
     772
    767773    // TODO: if all consumers process the data at a fixed rate, we can just set the consumed item count
    768774    // by the strideNo rather than tracking it.
    769775
    770 
    771     // If this kernel is the last consumer of a input buffer, update the consumed count for that buffer.
    772     // NOTE: unless we can prove that this kernel cannot terminate before any prior consumer, we cannot
    773     // put this code into the kernelFinished block.
     776    // TODO: a kernel could take the same stream set for multiple arguments.
     777
     778    // TODO: if we can prove that this kernel cannot terminate before any prior consumer, this code
     779    // could be executed in kernelFinished block.
    774780    for (unsigned i = 0; i < inputs.size(); ++i) {
    775781        const StreamSetBuffer * const buffer = kernel->getStreamSetInputBuffer(i);
     
    782788            if (output.getRate().isRelative()) continue;
    783789            b->setKernel(producer);
    784             if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    785                 Value * const alreadyConsumed = b->getConsumedItemCount(output.getName());
    786                 b->CreateAssert(b->CreateICmpULE(alreadyConsumed, consumedItemCountPhi[i]),
    787                                 producer->getName() + ": " + output.getName() + " consumed item count is not monotonically non-decreasing!");
    788             }
    789790            b->setConsumedItemCount(output.getName(), consumedItemCountPhi[i]);
    790791            b->setKernel(kernel);
     
    796797
    797798/** ------------------------------------------------------------------------------------------------------------- *
    798  * @brief checkAvailableInputData
    799  ** ------------------------------------------------------------------------------------------------------------- */
    800 void PipelineGenerator::checkIfAllInputKernelsAreTerminated(const std::unique_ptr<KernelBuilder> & b, const unsigned index) {
     799 * @brief checkIfAllInputKernelsHaveFinished
     800 ** ------------------------------------------------------------------------------------------------------------- */
     801void PipelineGenerator::checkIfAllInputKernelsHaveFinished(const std::unique_ptr<KernelBuilder> & b, const unsigned index) {
    801802    const auto n = in_degree(index, dependencyGraph);
    802803    if (LLVM_UNLIKELY(n == 0)) {
     
    807808            const auto u = source(e, dependencyGraph);
    808809            Value * const finished = dependencyGraph[u];
    809             //b->CallPrintInt("* " + kernels[u]->getName() + "_hasFinished", finished);
    810810            noMore = b->CreateAnd(noMore, finished);
    811811        }
     
    818818void PipelineGenerator::checkAvailableInputData(const std::unique_ptr<KernelBuilder> & b, const unsigned index) {
    819819    const Kernel * const kernel = kernels[index];
    820     b->setKernel(kernel);
     820    b->setKernel(kernel);   
    821821    for (auto e : make_iterator_range(in_edges(index, inputGraph))) {
    822822        const Channel & c = inputGraph[e];
     
    834834        Value * const unprocessed = b->CreateSub(produced, processed);
    835835        Value * const hasEnough = b->CreateICmpUGE(unprocessed, requiredInput);
    836         Value * const check = b->CreateOr(hasEnough, noMore);
    837836        terminated->addIncoming(b->getFalse(), b->GetInsertBlock());
    838837        if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::EnableAsserts))) {
    839838            madeProgress->addIncoming(anyProgress, b->GetInsertBlock());
    840839        }
    841         BasicBlock * const hasSufficientInput = b->CreateBasicBlock(kernel->getName() + "_" + input.getName() + "_hasSufficientInput");
     840        const auto prefix = kernel->getName() + "_" + input.getName();
     841        BasicBlock * const hasSufficientInput = b->CreateBasicBlock(prefix + "_hasSufficientInput");
     842        Value * const check = b->CreateOr(hasEnough, noMore);
    842843        b->CreateLikelyCondBr(check, hasSufficientInput, kernelFinished);
    843844        b->SetInsertPoint(hasSufficientInput);
     
    868869            madeProgress->addIncoming(anyProgress, b->GetInsertBlock());
    869870        }
    870         BasicBlock * const hasOutputSpace = b->CreateBasicBlock(kernel->getName() + "_" + name + "_hasOutputSpace");
     871        const auto prefix = kernel->getName() + "_" + name;
     872        BasicBlock * const hasOutputSpace = b->CreateBasicBlock(prefix + "_hasOutputSpace");
    871873        b->CreateLikelyCondBr(check, hasOutputSpace, kernelFinished);
    872874        b->SetInsertPoint(hasOutputSpace);
     
    907909    b->setKernel(kernel);
    908910
     911    checkIfAllInputKernelsHaveFinished(b, index);
     912
     913    checkAvailableInputData(b, index);
     914
     915    checkAvailableOutputSpace(b, index);
     916
     917    applyOutputBufferExpansions(b, kernel);
     918
    909919    #ifndef DISABLE_COPY_TO_OVERFLOW
    910920    // Store how many items we produced by this kernel in the prior iteration. We'll use this to determine when
     
    924934    #endif
    925935
    926     const auto & inputs = kernel->getStreamInputs();
    927     const auto n = inputs.size();
    928     std::vector<Value *> arguments(n + 2);
    929 
    930     Value * isFinal = noMore;
    931     for (unsigned i = 0; i < n; ++i) {
    932         const Binding & input = inputs[i];
    933         const StreamSetBuffer * const buffer = kernel->getStreamSetInputBuffer(i);
    934 
    935         const auto p = producedItemCount.find(buffer);
    936         assert (p != producedItemCount.end());
    937         Value * const produced = p->second;
    938 
    939         const ProcessingRate & rate = input.getRate();
    940         if (rate.isPopCount()) {
    941             arguments[i + 2] = produced;
    942         } else {
    943             const unsigned strideSize = ceiling(kernel->getUpperBound(rate) * kernel->getStride());
    944             Value * const processed = b->getNonDeferredProcessedItemCount(input);
    945             Value * const limit = b->CreateAdd(processed, b->getSize(strideSize * codegen::SegmentSize));
    946             Value * const partial = b->CreateICmpULT(produced, limit);
    947             arguments[i + 2] = b->CreateSelect(partial, produced, limit);
    948             isFinal = b->CreateAnd(isFinal, partial);
    949         }
    950     }
    951 
    952     // TODO: pass in a strideNo for fixed rate streams to allow the kernel to calculate the current avail,
    953     // processed, and produced counts
    954 
    955     arguments[0] = kernel->getInstance();
    956     arguments[1] = isFinal;
    957 
    958     b->createDoSegmentCall(arguments);
     936    allocateTemporaryBufferPointerArray(b, kernel);
     937
     938    runKernel(b, kernel);
    959939
    960940    #ifndef DISABLE_COPY_TO_OVERFLOW
     
    986966
    987967/** ------------------------------------------------------------------------------------------------------------- *
     968 * @brief runKernel
     969 ** ------------------------------------------------------------------------------------------------------------- */
     970void PipelineGenerator::runKernel(const std::unique_ptr<KernelBuilder> & b, const Kernel * const kernel) {
     971
     972    const auto & inputs = kernel->getStreamInputs();
     973    const auto n = inputs.size();
     974    std::vector<Value *> arguments(n + 2);
     975
     976    Value * isFinal = noMore;
     977
     978    Value * requiresTemporaryBuffers = nullptr;
     979
     980    for (unsigned i = 0; i < n; ++i) {
     981        const Binding & input = inputs[i];
     982        const StreamSetBuffer * const buffer = kernel->getStreamSetInputBuffer(i);
     983
     984        const auto p = producedItemCount.find(buffer);
     985        assert (p != producedItemCount.end());
     986        Value * const produced = p->second;
     987
     988        const ProcessingRate & rate = input.getRate();
     989        if (rate.isPopCount()) {
     990            arguments[i + 2] = produced;
     991        } else {
     992            const unsigned strideSize = ceiling(kernel->getUpperBound(rate) * kernel->getStride());
     993            Value * const processed = b->getNonDeferredProcessedItemCount(input);
     994            Value * const limit = b->CreateAdd(processed, b->getSize(strideSize * codegen::SegmentSize));
     995            Value * const hasPartial = b->CreateICmpULT(produced, limit);
     996            arguments[i + 2] = b->CreateSelect(hasPartial, produced, limit);
     997            isFinal = b->CreateAnd(isFinal, hasPartial);
     998            if (!temporaryBufferPtrs.empty() && temporaryBufferPtrs[i]) {
     999                if (requiresTemporaryBuffers) {
     1000                    requiresTemporaryBuffers = b->CreateOr(requiresTemporaryBuffers, hasPartial);
     1001                } else {
     1002                    requiresTemporaryBuffers = hasPartial;
     1003                }
     1004            }
     1005        }
     1006    }
     1007
     1008    // TODO: pass in a strideNo for fixed rate streams to allow the kernel to calculate the current avail,
     1009    // processed, and produced counts
     1010
     1011    arguments[0] = kernel->getInstance();
     1012    arguments[1] = isFinal;
     1013
     1014    if (requiresTemporaryBuffers) {
     1015        allocateTemporaryBuffers(b, kernel, requiresTemporaryBuffers);
     1016    }
     1017
     1018    b->createDoSegmentCall(arguments);
     1019
     1020    if (requiresTemporaryBuffers) {
     1021        freeTemporaryBuffers(b, kernel, requiresTemporaryBuffers);
     1022    }
     1023}
     1024
     1025
     1026/** ------------------------------------------------------------------------------------------------------------- *
    9881027 * @brief applyOutputBufferExpansions
    9891028 ** ------------------------------------------------------------------------------------------------------------- */
    990 void PipelineGenerator::applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & b, const unsigned index) {
    991     const Kernel * const kernel = kernels[index];
     1029void PipelineGenerator::applyOutputBufferExpansions(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel) {
    9921030    const auto & outputs = kernel->getStreamSetOutputBuffers();
    9931031    for (unsigned i = 0; i < outputs.size(); i++) {
     
    10251063}
    10261064
     1065/** ------------------------------------------------------------------------------------------------------------- *
     1066 * @brief allocateTemporaryBufferPointerArray
     1067 ** ------------------------------------------------------------------------------------------------------------- */
     1068void PipelineGenerator::allocateTemporaryBufferPointerArray(const std::unique_ptr<KernelBuilder> & b, const Kernel * const kernel) {
     1069    // TODO: whenever two kernels are using the same "unsafe" buffer, they'll both create and destroy their own
     1070    // temporary copies of it. This could be optimized to have it done at production and deleted after the last
     1071    // consuming kernel utilizes it.
     1072    temporaryBufferPtrs.clear();
     1073
     1074    const auto & inputs = kernel->getStreamInputs();
     1075    for (unsigned i = 0; i < inputs.size(); ++i) {
     1076        const StreamSetBuffer * const buffer = kernel->getStreamSetInputBuffer(i);
     1077        if (LLVM_UNLIKELY(isPotentiallyUnsafeInputBuffer(buffer))) {
     1078            if (temporaryBufferPtrs.empty()) {
     1079                temporaryBufferPtrs.resize(inputs.size(), nullptr);
     1080            }
     1081            assert (temporaryBufferPtrs[i] == nullptr);
     1082            PointerType * const ptrTy = buffer->getStreamSetPointerType();
     1083            StructType * const structTy = StructType::create(b->getContext(), {ptrTy, ptrTy});
     1084            AllocaInst * const tempBuffer = b->CreateAlloca(structTy);
     1085            b->CreateStore(Constant::getNullValue(structTy), tempBuffer);
     1086            temporaryBufferPtrs[i] = tempBuffer;
     1087        }
     1088    }
     1089
     1090}
     1091
     1092/** ------------------------------------------------------------------------------------------------------------- *
     1093 * @brief isPotentiallyUnsafeInputBuffer
     1094 *
     1095 * We cannot trust that the final block of any single stream source or external buffer can be safely read past its
     1096 * final item since kernels may attempt to load aligned blocks of data, leading to potentially-intermittent
     1097 * segmentation faults, depending on whether the access crosses a page boundary.
     1098 ** ------------------------------------------------------------------------------------------------------------- */
     1099inline bool PipelineGenerator::isPotentiallyUnsafeInputBuffer(const StreamSetBuffer * const buffer) {
     1100    return isa<SourceBuffer>(buffer) && buffer->getNumOfStreams() == 1;
     1101}
     1102
     1103/** ------------------------------------------------------------------------------------------------------------- *
     1104 * @brief allocateTemporaryBuffers
     1105 ** ------------------------------------------------------------------------------------------------------------- */
     1106void PipelineGenerator::allocateTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel, Value * const requiresTemporaryBuffers) {
     1107    ConstantInt * const ZERO = b->getInt32(0);
     1108    ConstantInt * const ONE = b->getInt32(1);
     1109    BasicBlock * const allocateBuffers = b->CreateBasicBlock();
     1110    BasicBlock * const runKernel = b->CreateBasicBlock();
     1111    b->CreateUnlikelyCondBr(requiresTemporaryBuffers, allocateBuffers, runKernel);
     1112
     1113    b->SetInsertPoint(allocateBuffers);
     1114    for (unsigned i = 0; i < temporaryBufferPtrs.size(); ++i) {
     1115        if (temporaryBufferPtrs[i]) {
     1116            const Binding & input = kernel->getStreamInput(i);
     1117            const auto p = producedItemCount.find(kernel->getStreamSetInputBuffer(i));
     1118            assert (p != producedItemCount.end());
     1119            Value * const produced = p->second;
     1120            Value * const processed = b->getProcessedItemCount(input.getName());
     1121            Value * const unprocessed = b->CreateSub(produced, processed);
     1122            const auto temp = b->AcquireTemporaryBuffer(input.getName(), processed, unprocessed);
     1123            b->CreateStore(temp.first, b->CreateGEP(temporaryBufferPtrs[i], { ZERO, ZERO }));
     1124            b->CreateStore(temp.second, b->CreateGEP(temporaryBufferPtrs[i], { ZERO, ONE }));
     1125        }
     1126    }
     1127    b->CreateBr(runKernel);
     1128
     1129    b->SetInsertPoint(runKernel);
     1130}
     1131
     1132/** ------------------------------------------------------------------------------------------------------------- *
     1133 * @brief freeTemporaryBuffers
     1134 ** ------------------------------------------------------------------------------------------------------------- */
     1135void PipelineGenerator::freeTemporaryBuffers(const std::unique_ptr<KernelBuilder> & b, const Kernel * kernel, Value * const requiresTemporaryBuffers) {
     1136    ConstantInt * const ZERO = b->getInt32(0);
     1137    ConstantInt * const ONE = b->getInt32(1);
     1138
     1139    BasicBlock * const freeBuffers = b->CreateBasicBlock();
     1140    BasicBlock * const finishedKernel = b->CreateBasicBlock();
     1141    b->CreateUnlikelyCondBr(requiresTemporaryBuffers, freeBuffers, finishedKernel);
     1142    b->SetInsertPoint(freeBuffers);
     1143    for (unsigned i = 0; i < temporaryBufferPtrs.size(); ++i) {
     1144        if (temporaryBufferPtrs[i]) {
     1145            Value * const originalBuffer = b->CreateLoad(b->CreateGEP(temporaryBufferPtrs[i], { ZERO, ZERO }));
     1146            const Binding & input = kernel->getStreamInput(i);
     1147            b->setBaseAddress(input.getName(), originalBuffer);
     1148            Value * const temporaryBuffer = b->CreateLoad(b->CreateGEP(temporaryBufferPtrs[i], { ZERO, ONE }));
     1149            b->CreateFree(temporaryBuffer);
     1150        }
     1151    }
     1152    b->CreateBr(finishedKernel);
     1153
     1154    b->SetInsertPoint(finishedKernel);
     1155}
    10271156
    10281157/** ------------------------------------------------------------------------------------------------------------- *
     
    10621191    }
    10631192    return final;
    1064 
    10651193}
    10661194
Note: See TracChangeset for help on using the changeset viewer.