Changeset 5454


Ignore:
Timestamp:
May 16, 2017, 4:13:53 PM (2 years ago)
Author:
nmedfort
Message:

Bug fix check in for DumpTrace?, compilation of DoBlock? / DoFinalBlock? functions. Pablo CodeMotionPass? optimized and enabled by default.

Location:
icGREP/icgrep-devel/icgrep
Files:
1 added
35 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5440 r5454  
    427427    len = CreateZExtOrTrunc(len, sizeTy);
    428428    if (codegen::EnableAsserts) {
     429        DataLayout DL(getModule());
     430        IntegerType * const intPtrTy = getIntPtrTy(DL);
    429431        CreateAssert(len, "CreateMUnmap: length cannot be 0");
    430         Value * const addrValue = CreatePtrToInt(addr, sizeTy);
    431         Value * const pageOffset = CreateURem(addrValue, getSize(getpagesize()));
    432         CreateAssert(CreateICmpEQ(pageOffset, getSize(0)), "CreateMUnmap: addr must be a multiple of the page size");
    433         Value * const boundCheck = CreateICmpULT(addrValue, CreateSub(ConstantInt::getAllOnesValue(sizeTy), len));
     432        Value * const addrValue = CreatePtrToInt(addr, intPtrTy);
     433        Value * const pageOffset = CreateURem(addrValue, ConstantInt::get(intPtrTy, getpagesize()));
     434        CreateAssert(CreateICmpEQ(pageOffset, ConstantInt::getNullValue(intPtrTy)), "CreateMUnmap: addr must be a multiple of the page size");
     435        Value * const boundCheck = CreateICmpULT(addrValue, CreateSub(ConstantInt::getAllOnesValue(intPtrTy), CreateZExtOrTrunc(len, intPtrTy)));
    434436        CreateAssert(boundCheck, "CreateMUnmap: addresses in [addr, addr+len) are outside the valid address space range");
    435437    }
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5440 r5454  
    349349    const auto alignment = mBitBlockWidth / 8;
    350350    if (codegen::EnableAsserts) {
    351         Value * alignmentOffset = CreateURem(CreatePtrToInt(ptr, getSizeTy()), getSize(alignment));
    352         Value * alignmentCheck = CreateICmpEQ(alignmentOffset, getSize(0));
     351        DataLayout DL(getModule());
     352        IntegerType * const intPtrTy = getIntPtrTy(DL);
     353        Value * alignmentOffset = CreateURem(CreatePtrToInt(ptr, intPtrTy), ConstantInt::get(intPtrTy, alignment));
     354        Value * alignmentCheck = CreateICmpEQ(alignmentOffset, ConstantInt::getNullValue(intPtrTy));
    353355        CreateAssert(alignmentCheck, "CreateBlockAlignedLoad: pointer is unaligned");
    354356    }
     
    359361    const auto alignment = mBitBlockWidth / 8;
    360362    if (codegen::EnableAsserts) {
    361         Value * alignmentOffset = CreateURem(CreatePtrToInt(ptr, getSizeTy()), getSize(alignment));
    362         Value * alignmentCheck = CreateICmpEQ(alignmentOffset, getSize(0));
     363        DataLayout DL(getModule());
     364        IntegerType * const intPtrTy = getIntPtrTy(DL);
     365        Value * alignmentOffset = CreateURem(CreatePtrToInt(ptr, intPtrTy), ConstantInt::get(intPtrTy, alignment));
     366        Value * alignmentCheck = CreateICmpEQ(alignmentOffset, ConstantInt::getNullValue(intPtrTy));
    363367        CreateAssert(alignmentCheck, "CreateBlockAlignedStore: pointer is not aligned");
    364368    }
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5450 r5454  
    277277    for(unsigned i = 0; i < n; ++i){
    278278        StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    279         kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICgrepKernelBuilder>(idb, REs[i]));
     279        kernel::Kernel * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i]));
    280280        pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
    281281        MatchResultsBufs[i] = MatchResults;
  • icGREP/icgrep-devel/icgrep/icgrep-devel.config

    r4804 r5454  
     1
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5435 r5454  
    248248wc.cpp
    249249CMakeLists.txt
     250toolchain/workqueue.h
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.cpp

    r5446 r5454  
    2929}
    3030
    31 ICgrepKernelBuilder::ICgrepKernelBuilder (const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re)
    32 : PabloKernel(iBuilder, "",
    33               {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}},
    34               {Binding{iBuilder->getStreamSetTy(1, 1), "matches"}},
    35               {},
    36               {})
    37 , mRE(re)
    38 , mSignature(Printer_RE::PrintRE(re)) {
    39     setName("ic" + sha1sum(mSignature));
     31RegularExpressionOptimizer::RegularExpressionOptimizer(re::RE * const re_ast)
     32: mRE(regular_expression_passes(re_ast))
     33, mSignature(Printer_RE::PrintRE(mRE)) {
     34
    4035}
    4136
    42 std::string ICgrepKernelBuilder::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
     37ICGrepKernel::ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, RE * const re)
     38: RegularExpressionOptimizer(re)
     39, PabloKernel(iBuilder,
     40              "ic" + sha1sum(mSignature),
     41              {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}},
     42              {Binding{iBuilder->getStreamSetTy(1, 1), "matches"}}) {
     43
     44}
     45
     46std::string ICGrepKernel::makeSignature(const std::unique_ptr<kernel::KernelBuilder> &) {
    4347    return mSignature;
    4448}
    4549
    46 void ICgrepKernelBuilder::generatePabloMethod() {
    47     re2pablo_compiler(this, regular_expression_passes(mRE));
     50void ICGrepKernel::generatePabloMethod() {
     51    re2pablo_compiler(this, mRE);
    4852}
    4953
  • icGREP/icgrep-devel/icgrep/kernels/grep_kernel.h

    r5440 r5454  
    1212namespace kernel {
    1313
    14 class ICgrepKernelBuilder: public pablo::PabloKernel {
     14struct RegularExpressionOptimizer {
     15    RegularExpressionOptimizer(re::RE * re_ast);
     16protected:
     17    re::RE * const  mRE;
     18    std::string     mSignature;
     19};
     20
     21class ICGrepKernel : public RegularExpressionOptimizer, public pablo::PabloKernel {
    1522public:
    16     ICgrepKernelBuilder(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const re_ast);   
     23    ICGrepKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder, re::RE * const re_ast);
    1724    std::string makeSignature(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) override;
    1825    bool isCachable() const override { return true; }
    1926protected:
    2027    void generatePabloMethod() override;
    21 private:
    22     re::RE * const  mRE;
    23     std::string     mSignature;
    2428};
    2529
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5440 r5454  
    8686     */
    8787   
    88     const std::string & getName() const { return mKernelName; }
    89 
    90     void setName(std::string newName) { mKernelName = newName; }
     88    const std::string & getName() const {
     89        return mKernelName;
     90    }
    9191       
    9292    virtual bool isCachable() const = 0;
     
    192192    llvm::StructType *                      mKernelStateType;
    193193    unsigned                                mLookAheadPositions;
    194     std::string                             mKernelName;
     194    const std::string                       mKernelName;
    195195    std::vector<llvm::Value *>              mInitialArguments;
    196196    std::vector<Binding>                    mStreamSetInputs;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5453 r5454  
    140140
    141141    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    142         if ((mStreamSetInputBuffers[i]->getBufferBlocks() > 1) && (mStreamSetInputBuffers[i]->getBufferBlocks() < requiredBlocks)) {
     142        if ((mStreamSetInputBuffers[i]->getBufferBlocks() != 0) && (mStreamSetInputBuffers[i]->getBufferBlocks() < requiredBlocks)) {
    143143            report_fatal_error(getName() + ": " + mStreamSetInputs[i].name + " requires buffer size " + std::to_string(requiredBlocks));
    144144        }
     
    466466    Function * const cp = mCurrentMethod;
    467467    auto ip = idb->saveIP();
     468    std::vector<Value *> availableItemCount(0);
    468469
    469470    /// Check if the do block method is called and create the function if necessary   
    470471    if (!idb->supportsIndirectBr()) {
    471         FunctionType * const type = FunctionType::get(idb->getVoidTy(), {self->getType()}, false);
     472
     473        std::vector<Type *> params;
     474        params.reserve(1 + mAvailableItemCount.size());
     475        params.push_back(self->getType());
     476        for (Value * avail : mAvailableItemCount) {
     477            params.push_back(avail->getType());
     478        }
     479
     480        FunctionType * const type = FunctionType::get(idb->getVoidTy(), params, false);
    472481        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, idb->getModule());
    473482        mCurrentMethod->setCallingConv(CallingConv::C);
     
    477486        args->setName("self");
    478487        setInstance(&*args);
    479         idb->SetInsertPoint(idb->CreateBasicBlock("entry"));
     488        availableItemCount.reserve(mAvailableItemCount.size());
     489        while (++args != mCurrentMethod->arg_end()) {
     490            availableItemCount.push_back(&*args);
     491        }
     492        assert (availableItemCount.size() == mAvailableItemCount.size());
     493        mAvailableItemCount.swap(availableItemCount);
     494        idb->SetInsertPoint(BasicBlock::Create(idb->getContext(), "entry", mCurrentMethod));
    480495    }
    481496
     
    527542    }
    528543
    529 
    530     /// Call the do block method if necessary then restore the current function state to the do segement method
    531544    if (!idb->supportsIndirectBr()) {
     545        // Restore the DoSegment function state then call the DoBlock method
    532546        idb->CreateRetVoid();
    533547        mDoBlockMethod = mCurrentMethod;
    534548        idb->restoreIP(ip);
    535         idb->CreateCall(mCurrentMethod, self);
    536549        setInstance(self);
    537550        mCurrentMethod = cp;
     551        mAvailableItemCount.swap(availableItemCount);
     552        CreateDoBlockMethodCall(idb);
    538553    }
    539554
     
    546561    Value * const remainingItemCount = remainingItems;
    547562    auto ip = idb->saveIP();
     563    std::vector<Value *> availableItemCount(0);
    548564
    549565    if (!idb->supportsIndirectBr()) {
    550         FunctionType * const type = FunctionType::get(idb->getVoidTy(), {self->getType(), idb->getSizeTy()}, false);
     566        std::vector<Type *> params;
     567        params.reserve(2 + mAvailableItemCount.size());
     568        params.push_back(self->getType());
     569        params.push_back(idb->getSizeTy());
     570        for (Value * avail : mAvailableItemCount) {
     571            params.push_back(avail->getType());
     572        }
     573        FunctionType * const type = FunctionType::get(idb->getVoidTy(), params, false);
    551574        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, idb->getModule());
    552575        mCurrentMethod->setCallingConv(CallingConv::C);
     
    558581        remainingItems = &*(++args);
    559582        remainingItems->setName("remainingItems");
    560         idb->SetInsertPoint(idb->CreateBasicBlock("entry"));
     583        availableItemCount.reserve(mAvailableItemCount.size());
     584        while (++args != mCurrentMethod->arg_end()) {
     585            availableItemCount.push_back(&*args);
     586        }
     587        assert (availableItemCount.size() == mAvailableItemCount.size());
     588        mAvailableItemCount.swap(availableItemCount);
     589        idb->SetInsertPoint(BasicBlock::Create(idb->getContext(), "entry", mCurrentMethod));
    561590    }
    562591
     
    568597        idb->CreateRetVoid();
    569598        idb->restoreIP(ip);
    570         idb->CreateCall(mCurrentMethod, {self, remainingItemCount});
     599        setInstance(self);
     600        mAvailableItemCount.swap(availableItemCount);
     601        // Restore the DoSegment function state then call the DoFinal method
     602        std::vector<Value *> args;
     603        args.reserve(2 + mAvailableItemCount.size());
     604        args.push_back(self);
     605        args.push_back(remainingItemCount);
     606        for (Value * avail : mAvailableItemCount) {
     607            args.push_back(avail);
     608        }
     609        idb->CreateCall(mCurrentMethod, args);
    571610        mCurrentMethod = cp;
    572         setInstance(self);
    573611    }
    574612
     
    589627        idb->SetInsertPoint(bb);
    590628    } else {
    591         idb->CreateCall(mDoBlockMethod, getInstance());
     629        std::vector<Value *> args;
     630        args.reserve(1 + mAvailableItemCount.size());
     631        args.push_back(getInstance());
     632        for (Value * avail : mAvailableItemCount) {
     633            args.push_back(avail);
     634        }
     635        idb->CreateCall(mDoBlockMethod, args);
    592636    }
    593637}
     
    602646
    603647    DataLayout DL(kb->getModule());
    604     IntegerType * const intAddressTy = DL.getIntPtrType(kb->getContext());
     648    IntegerType * const intAddrTy = DL.getIntPtrType(kb->getContext());
    605649
    606650    std::vector<Type *> multiBlockParmTypes;
     
    930974        mStreamSetInputBuffers[i]->createBlockAlignedCopy(kb.get(), nextBufPtr, kb->getStreamSetBufferPtr(mStreamSetInputs[i].name), copyItems2);
    931975        Value * itemAddress = kb->getRawInputPointer(mStreamSetInputs[i].name, kb->getInt32(0), processedItemCount[i]);
    932         itemAddress = kb->CreatePtrToInt(itemAddress, intAddressTy);
     976        itemAddress = kb->CreatePtrToInt(itemAddress, intAddrTy);
    933977        Value * baseAddress = inputBlockPtr[i];
    934         baseAddress = kb->CreatePtrToInt(baseAddress, intAddressTy);
    935         Value * tempAddress = kb->CreateAdd(kb->CreatePtrToInt(tempBufPtr, intAddressTy), kb->CreateSub(itemAddress, baseAddress));
     978        baseAddress = kb->CreatePtrToInt(baseAddress, intAddrTy);
     979        Value * tempAddress = kb->CreateAdd(kb->CreatePtrToInt(tempBufPtr, intAddrTy), kb->CreateSub(itemAddress, baseAddress));
    936980        tempArgs.push_back(kb->CreateIntToPtr(tempAddress, mStreamSetInputBuffers[i]->getPointerType()));
    937981    }
     
    944988        blockBasePos.push_back(kb->CreateAnd(producedItemCount[i], blockBaseMask));
    945989        mStreamSetOutputBuffers[i]->createBlockAlignedCopy(kb.get(), tempBufPtr, outputBlockPtr[i], kb->CreateSub(producedItemCount[i], blockBasePos[i]));
    946         Value * itemAddress = kb->CreatePtrToInt(kb->getRawOutputPointer(mStreamSetOutputs[i].name, kb->getInt32(0), producedItemCount[i]), intAddressTy);
     990        Value * itemAddress = kb->CreatePtrToInt(kb->getRawOutputPointer(mStreamSetOutputs[i].name, kb->getInt32(0), producedItemCount[i]), intAddrTy);
    947991        Value * outputPtr = kb->getOutputStreamBlockPtr(mStreamSetOutputs[i].name, kb->getInt32(0));
    948         Value * baseAddress = kb->CreatePtrToInt(outputPtr, intAddressTy);
    949         Value * tempAddress = kb->CreateAdd(kb->CreatePtrToInt(tempBufPtr, intAddressTy), kb->CreateSub(itemAddress, baseAddress));
     992        Value * baseAddress = kb->CreatePtrToInt(outputPtr, intAddrTy);
     993        Value * tempAddress = kb->CreateAdd(kb->CreatePtrToInt(tempBufPtr, intAddrTy), kb->CreateSub(itemAddress, baseAddress));
    950994        tempArgs.push_back(kb->CreateIntToPtr(tempAddress, mStreamSetOutputBuffers[i]->getPointerType()));
    951995    }
     
    9941038}
    9951039
     1040static inline std::string annotateKernelNameWithDebugFlags(std::string && name) {
     1041    if (codegen::EnableAsserts) {
     1042        name += "_EA";
     1043    }
     1044    return name;
     1045}
     1046
    9961047// CONSTRUCTOR
    9971048Kernel::Kernel(std::string && kernelName,
    998                              std::vector<Binding> && stream_inputs,
    999                              std::vector<Binding> && stream_outputs,
    1000                              std::vector<Binding> && scalar_parameters,
    1001                              std::vector<Binding> && scalar_outputs,
    1002                              std::vector<Binding> && internal_scalars)
    1003 : KernelInterface(std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
     1049               std::vector<Binding> && stream_inputs,
     1050               std::vector<Binding> && stream_outputs,
     1051               std::vector<Binding> && scalar_parameters,
     1052               std::vector<Binding> && scalar_outputs,
     1053               std::vector<Binding> && internal_scalars)
     1054: KernelInterface(std::move(annotateKernelNameWithDebugFlags(std::move(kernelName)))
     1055                  , std::move(stream_inputs), std::move(stream_outputs)
     1056                  , std::move(scalar_parameters), std::move(scalar_outputs)
     1057                  , std::move(internal_scalars))
    10041058, mCurrentMethod(nullptr)
    10051059, mNoTerminateAttribute(false)
     
    10311085// CONSTRUCTOR
    10321086MultiBlockKernel::MultiBlockKernel(std::string && kernelName,
    1033                                      std::vector<Binding> && stream_inputs,
    1034                                      std::vector<Binding> && stream_outputs,
    1035                                      std::vector<Binding> && scalar_parameters,
    1036                                      std::vector<Binding> && scalar_outputs,
    1037                                              std::vector<Binding> && internal_scalars)
     1087                                   std::vector<Binding> && stream_inputs,
     1088                                   std::vector<Binding> && stream_outputs,
     1089                                   std::vector<Binding> && scalar_parameters,
     1090                                   std::vector<Binding> && scalar_outputs,
     1091                                   std::vector<Binding> && internal_scalars)
    10381092: Kernel(std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars)) {
    10391093   
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5446 r5454  
    8888
    8989    llvm::Module * setModule(const std::unique_ptr<KernelBuilder> & idb, llvm::Module * const module);
    90 
    91     void createKernelStub(const std::unique_ptr<KernelBuilder> & idb, const StreamSetBuffers & inputs, const StreamSetBuffers & outputs, llvm::Module * const kernelModule);
    9290
    9391    llvm::Module * getModule() const {
  • icGREP/icgrep-devel/icgrep/kernels/kernel_builder.h

    r5446 r5454  
    115115
    116116    KernelBuilder(llvm::LLVMContext & C, unsigned registerWidth, unsigned blockWidth, unsigned stride)
    117     : IDISA::IDISA_Builder(C, registerWidth, blockWidth, stride) {
     117    : IDISA::IDISA_Builder(C, registerWidth, blockWidth, stride)
     118    , mKernel(nullptr) {
    118119
    119120    }
  • icGREP/icgrep-devel/icgrep/kernels/source_kernel.cpp

    r5440 r5454  
    8181    // instruct the OS that it can safely drop any fully consumed pages
    8282    Value * consumed = iBuilder->getConsumedItemCount("sourceBuffer");
    83     Type * const consumedTy = consumed->getType();
     83    IntegerType * const consumedTy = cast<IntegerType>(consumed->getType());
    8484    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
     85
     86    DataLayout DL(iBuilder->getModule());
     87    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
    8588
    8689    // multiply the consumed count by the code unit size then mask off any partial pages
    8790    if (mCodeUnitWidth > 8) {
    88         consumed = iBuilder->CreateMul(consumed, iBuilder->getSize(mCodeUnitWidth / 8));
     91        consumed = iBuilder->CreateMul(consumed, ConstantInt::get(consumedTy, mCodeUnitWidth / 8));
    8992    }
    9093    const auto pageSize = getpagesize();
     
    9497        consumed = iBuilder->CreateSub(consumed, iBuilder->CreateURem(consumed, ConstantInt::get(consumedTy, pageSize)));
    9598    }
     99
    96100    Value * sourceBuffer = iBuilder->getBaseAddress("sourceBuffer");
    97     sourceBuffer = iBuilder->CreatePtrToInt(sourceBuffer, consumedTy);
     101    sourceBuffer = iBuilder->CreatePtrToInt(sourceBuffer, intAddrTy);
     102    if (LLVM_UNLIKELY(intAddrTy->getBitWidth() > consumedTy->getBitWidth())) {
     103        consumed = iBuilder->CreateZExt(consumed, intAddrTy);
     104    } else if (LLVM_UNLIKELY(intAddrTy->getBitWidth() < consumedTy->getBitWidth())) {
     105        sourceBuffer = iBuilder->CreateZExt(sourceBuffer, consumedTy);
     106    }
    98107    Value * consumedBuffer = iBuilder->CreateAdd(sourceBuffer, consumed);
    99 
    100 
    101 
    102 
    103108    Value * readableBuffer = iBuilder->getScalarField("readableBuffer");
    104     readableBuffer = iBuilder->CreatePtrToInt(readableBuffer, consumedTy);
     109    readableBuffer = iBuilder->CreatePtrToInt(readableBuffer, consumedBuffer->getType());
    105110    Value * unnecessaryBytes = iBuilder->CreateSub(consumedBuffer, readableBuffer);
    106111
    107 
    108 
    109112    // avoid calling madvise unless an actual page table change could occur
    110     Value * hasPagesToDrop = iBuilder->CreateICmpEQ(unnecessaryBytes, ConstantInt::getNullValue(unnecessaryBytes->getType()));
     113    Value * hasPagesToDrop = iBuilder->CreateICmpEQ(unnecessaryBytes, ConstantInt::getNullValue(intAddrTy));
    111114    iBuilder->CreateLikelyCondBr(hasPagesToDrop, processSegment, dropPages);
    112115
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5452 r5454  
    263263
    264264void SwizzledCopybackBuffer::createBlockAlignedCopy(IDISA::IDISA_Builder * const iBuilder, Value * targetBlockPtr, Value * sourceBlockPtr, Value * itemsToCopy) const {
    265     Type * size_ty = iBuilder->getSizeTy();
    266     Type * i8ptr = iBuilder->getInt8PtrTy();
     265    Type * int8PtrTy = iBuilder->getInt8PtrTy();
     266    DataLayout DL(iBuilder->getModule());
     267    IntegerType * const intAddrTy = iBuilder->getIntPtrTy(DL);
     268
    267269    Constant * blockSize = iBuilder->getSize(iBuilder->getBitBlockWidth());
    268270    Function * f = iBuilder->GetInsertBlock()->getParent();
     
    270272    BasicBlock * partialBlockCopy = BasicBlock::Create(iBuilder->getContext(), "partialBlockCopy", f, 0);
    271273    BasicBlock * copyDone = BasicBlock::Create(iBuilder->getContext(), "copyDone", f, 0);
    272     unsigned numStreams = getType()->getArrayNumElements();
    273     unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
    274     auto elemTy = getType()->getArrayElementType();
    275     unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
     274    const unsigned numStreams = getType()->getArrayNumElements();
     275    const unsigned swizzleFactor = iBuilder->getBitBlockWidth()/mFieldWidth;
     276    const auto elemTy = getType()->getArrayElementType();
     277    const unsigned fieldWidth = isa<ArrayType>(elemTy) ? elemTy->getArrayNumElements() : 1;
    276278    Value * blocksToCopy = iBuilder->CreateUDiv(itemsToCopy, blockSize);
    277279    Value * partialItems = iBuilder->CreateURem(itemsToCopy, blockSize);
     
    279281    Value * partialBlockSourcePtr = iBuilder->CreateGEP(sourceBlockPtr, blocksToCopy);
    280282    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(blocksToCopy, iBuilder->getSize(0)), wholeBlockCopy, partialBlockCopy);
     283
    281284    iBuilder->SetInsertPoint(wholeBlockCopy);
    282     unsigned alignment = iBuilder->getBitBlockWidth() / 8;
    283     Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, size_ty), iBuilder->CreatePtrToInt(targetBlockPtr, size_ty));
    284     iBuilder->CreateMemMove(iBuilder->CreateBitCast(targetBlockPtr, i8ptr), iBuilder->CreateBitCast(sourceBlockPtr, i8ptr), copyLength, alignment);
     285    const unsigned alignment = iBuilder->getBitBlockWidth() / 8;
     286    Value * copyLength = iBuilder->CreateSub(iBuilder->CreatePtrToInt(partialBlockTargetPtr, intAddrTy), iBuilder->CreatePtrToInt(targetBlockPtr, intAddrTy));
     287    iBuilder->CreateMemMove(iBuilder->CreatePointerCast(targetBlockPtr, int8PtrTy), iBuilder->CreatePointerCast(sourceBlockPtr, int8PtrTy), copyLength, alignment);
    285288    iBuilder->CreateCondBr(iBuilder->CreateICmpUGT(partialItems, iBuilder->getSize(0)), partialBlockCopy, copyDone);
    286289    iBuilder->SetInsertPoint(partialBlockCopy);
     
    290293        Value * strmTargetPtr = iBuilder->CreateGEP(partialBlockTargetPtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
    291294        Value * strmSourcePtr = iBuilder->CreateGEP(partialBlockSourcePtr, {iBuilder->getInt32(0), iBuilder->getInt32(strm)});
    292         iBuilder->CreateMemMove(iBuilder->CreateBitCast(strmTargetPtr, i8ptr), iBuilder->CreateBitCast(strmSourcePtr, i8ptr), copyBytes, alignment);
     295        iBuilder->CreateMemMove(iBuilder->CreatePointerCast(strmTargetPtr, int8PtrTy), iBuilder->CreatePointerCast(strmSourcePtr, int8PtrTy), copyBytes, alignment);
    293296    }
    294297    iBuilder->CreateBr(copyDone);
     298
    295299    iBuilder->SetInsertPoint(copyDone);
    296300}
     
    475479
    476480SourceBuffer::SourceBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, unsigned MemoryAddressSpace, unsigned StructAddressSpace)
    477 : StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), nullptr), 1, StructAddressSpace) {
     481: StreamSetBuffer(BufferKind::SourceBuffer, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(MemoryAddressSpace), b->getSizeTy(), nullptr), 0, StructAddressSpace) {
    478482    mUniqueID = "B";
    479483    if (MemoryAddressSpace != 0 || StructAddressSpace != 0) {
     
    483487
    484488ExternalBuffer::ExternalBuffer(const std::unique_ptr<kernel::KernelBuilder> & b, Type * type, llvm::Value * addr, unsigned AddressSpace)
    485 : StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 1, AddressSpace) {
     489: StreamSetBuffer(BufferKind::ExternalBuffer, type, resolveStreamSetType(b, type), 0, AddressSpace) {
    486490    mUniqueID = "E";
    487491    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
  • icGREP/icgrep-devel/icgrep/pablo/arithmetic.h

    r5283 r5454  
    4848
    4949#define CREATE_OPERATOR_TYPE(Name) \
    50 class Name : public Operator { \
     50class Name final : public Operator { \
    5151    friend class PabloBlock; \
    5252public: \
  • icGREP/icgrep-devel/icgrep/pablo/boolean.h

    r5267 r5454  
    66namespace pablo {
    77
    8 class And : public Variadic {
     8class And final : public Variadic {
    99    friend class PabloBlock;
    1010public:
     
    3434};
    3535
    36 class Or : public Variadic {
     36class Or final : public Variadic {
    3737    friend class PabloBlock;
    3838public:
     
    6262};
    6363
    64 class Xor : public Variadic {
     64class Xor final : public Variadic {
    6565    friend class PabloBlock;
    6666public:
     
    8989};
    9090
    91 class Not : public Statement {
     91class Not final : public Statement {
    9292    friend class PabloBlock;
    9393public:
     
    111111};
    112112
    113 class Sel : public Statement {
     113class Sel final : public Statement {
    114114    friend class PabloBlock;
    115115public:
  • icGREP/icgrep-devel/icgrep/pablo/branch.h

    r5371 r5454  
    5151};
    5252
    53 class If : public Branch {
     53class If final : public Branch {
    5454    friend class PabloBlock;
    5555    friend class Statement;
     
    6666};
    6767
    68 class While : public Branch {
     68class While final : public Branch {
    6969    friend class PabloBlock;
    7070    friend class Statement;
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/booleanreassociationpass.cpp

    r5240 r5454  
    720720    cliques.erase(cliques.begin(), end);
    721721
    722     return std::move(cliques);
     722    return cliques;
    723723}
    724724
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/codemotionpass.cpp

    r5283 r5454  
    66#include <pablo/pe_var.h>
    77#include <boost/container/flat_set.hpp>
    8 #include <vector>
    98#ifndef NDEBUG
    109#include <pablo/analysis/pabloverifier.hpp>
    1110#endif
    1211
    13 using namespace boost;
    14 using namespace boost::container;
    1512using namespace llvm;
    1613
    1714namespace pablo {
    18 
    19 /** ------------------------------------------------------------------------------------------------------------- *
    20  * @brief optimize
    21  ** ------------------------------------------------------------------------------------------------------------- */
    22 bool CodeMotionPass::optimize(PabloKernel * kernel) {
    23     CodeMotionPass::movement(kernel->getEntryBlock());
    24     #ifndef NDEBUG
    25     PabloVerifier::verify(kernel, "post-code-motion");
    26     #endif
    27     return true;
    28 }
    29 
    30 /** ------------------------------------------------------------------------------------------------------------- *
    31  * @brief movement
    32  ** ------------------------------------------------------------------------------------------------------------- */
    33 void CodeMotionPass::movement(PabloBlock * const block) {
    34     sink(block);
    35     for (Statement * stmt : *block) {
    36         if (isa<If>(stmt)) {
    37             movement(cast<If>(stmt)->getBody());
    38         } else if (isa<While>(stmt)) {
    39             movement(cast<While>(stmt)->getBody());
    40             // TODO: if we analyzed the probability of this loop being executed once, twice, or many times, we could
    41             // determine whether hoisting will helpful or harmful to the expected run time.
    42             hoistLoopInvariants(cast<While>(stmt));
    43         }
    44     }
    45 }
    46 
    47 /** ------------------------------------------------------------------------------------------------------------- *
    48  * @brief depthOf
    49  ** ------------------------------------------------------------------------------------------------------------- */
    50 inline static int depthOf(PabloBlock * scope) {
    51     int depth = 0;
    52     while (scope) {
    53         ++depth;
    54         scope = scope->getPredecessor();
    55     }
    56     return depth;
    57 }
    58 
    59 /** ------------------------------------------------------------------------------------------------------------- *
    60  * @brief findLCA
    61  ** ------------------------------------------------------------------------------------------------------------- */
    62 inline PabloBlock * getLCA(PabloBlock * scope1, PabloBlock * scope2) {
    63     int depth1 = depthOf(scope1);
    64     int depth2 = depthOf(scope2);
    65     // If one of these scopes is nested deeper than the other, scan upwards through
    66     // the scope tree until both scopes are at the same depth.
    67     while (depth1 > depth2) {
    68         scope1 = scope1->getPredecessor();
    69         --depth1;
    70     }
    71     while (depth1 < depth2) {
    72         scope2 = scope2->getPredecessor();
    73         --depth2;
    74     }
    75     // Then iteratively step backwards until we find a matching set of scopes; this
    76     // must be the LCA of our original scopes.
    77     while (scope1 != scope2) {
    78         assert (scope1 && scope2);
    79         scope1 = scope1->getPredecessor();
    80         scope2 = scope2->getPredecessor();
    81     }
    82     return scope1;
    83 }
    8415
    8516/** ------------------------------------------------------------------------------------------------------------- *
     
    9425        }
    9526    }
    96     inline bool count(T const item) const {
     27    inline bool contains(T const item) const {
    9728        const auto i = std::lower_bound(std::vector<T>::begin(), std::vector<T>::end(), item);
    9829        return (i != std::vector<T>::end() && *i == item);
     
    10435using UserSet = SetQueue<Statement *>;
    10536
    106 /** ------------------------------------------------------------------------------------------------------------- *
    107  * @brief getScopesOfAllUsers
    108  ** ------------------------------------------------------------------------------------------------------------- */
    109 inline void getScopesOfAllUsers(PabloAST * expr, ScopeSet & scopes) {
    110     for (PabloAST * use : expr->users()) {
    111         if (LLVM_LIKELY(isa<Statement>(use))) {
    112             scopes.insert(cast<Statement>(use)->getParent());
    113         } else if (LLVM_UNLIKELY(isa<PabloKernel>(use))) {
    114             scopes.insert(cast<PabloKernel>(use)->getEntryBlock());
    115         }
    116     }
    117 }
    118 
    119 /** ------------------------------------------------------------------------------------------------------------- *
    120  * @brief getInScopeDominatorsOfAllUsers
    121  ** ------------------------------------------------------------------------------------------------------------- */
    122 inline void getInScopeDominatorsOfAllUsers(PabloAST * expr, UserSet & users, PabloBlock * const block) {
    123     for (PabloAST * use : expr->users()) {
    124         if (LLVM_LIKELY(isa<Statement>(use))) {
    125             Statement * user = cast<Statement>(use);
    126             PabloBlock * parent = user->getParent();
    127             while (parent != block) {
    128                 assert (parent);
    129                 user = parent->getBranch();
    130                 parent = parent->getPredecessor();
    131             }
    132             users.insert(user);
    133         }
    134     }
    135 }
    136 
    137 /** ------------------------------------------------------------------------------------------------------------- *
    138  * @brief sinkIfAcceptableTarget
    139  *
    140  * Scan through this statement's users to see whether they're all in a nested scope. If not, check whether the
    141  * statement can be moved past a branch statement within the same scope.
    142  ** ------------------------------------------------------------------------------------------------------------- */
    143 inline void sinkIfAcceptableTarget(Statement * const stmt, PabloBlock * const block, ScopeSet & scopes, UserSet & users) {
    144     assert (scopes.empty());
    145     if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
    146         for (Var * def : cast<Branch>(stmt)->getEscaped()) {
    147             getScopesOfAllUsers(def, scopes);
    148         }
    149     } else {
    150         getScopesOfAllUsers(isa<Assign>(stmt) ? cast<Assign>(stmt)->getVariable() : stmt, scopes);
    151     }   
    152     if (LLVM_UNLIKELY(scopes.empty())) {
    153         assert (!isa<Assign>(stmt));
    154         // should not occur unless we have a branch with no escaped vars or a statement
    155         // that has no users. In either event, the statement itself should be removed.
    156         stmt->eraseFromParent(true);
    157         return;
    158     }
    159     while (scopes.size() > 1) {
    160         PabloBlock * scope1 = scopes.back(); scopes.pop_back();
    161         PabloBlock * scope2 = scopes.back(); scopes.pop_back();
    162         scopes.insert(getLCA(scope1, scope2));
    163     }
    164     PabloBlock * const scope = scopes.back(); scopes.clear();
    165     if (LLVM_LIKELY(scope == block)) {
    166         assert (users.empty());
     37using LoopVariants = boost::container::flat_set<const PabloAST *>;
     38
     39struct CodeMotionPassContainer {
     40
     41    /** ------------------------------------------------------------------------------------------------------------- *
     42     * @brief depthOf
     43     ** ------------------------------------------------------------------------------------------------------------- */
     44    static int depthOf(PabloBlock * scope) {
     45        int depth = 0;
     46        while (scope) {
     47            ++depth;
     48            scope = scope->getPredecessor();
     49        }
     50        return depth;
     51    }
     52
     53    /** ------------------------------------------------------------------------------------------------------------- *
     54     * @brief findLCA
     55     ** ------------------------------------------------------------------------------------------------------------- */
     56    PabloBlock * getLCA(PabloBlock * scope1, PabloBlock * scope2) {
     57        int depth1 = depthOf(scope1);
     58        int depth2 = depthOf(scope2);
     59        // If one of these scopes is nested deeper than the other, scan upwards through
     60        // the scope tree until both scopes are at the same depth.
     61        while (depth1 > depth2) {
     62            scope1 = scope1->getPredecessor();
     63            --depth1;
     64        }
     65        while (depth1 < depth2) {
     66            scope2 = scope2->getPredecessor();
     67            --depth2;
     68        }
     69        // Then iteratively step backwards until we find a matching set of scopes; this
     70        // must be the LCA of our original scopes.
     71        while (scope1 != scope2) {
     72            assert (scope1 && scope2);
     73            scope1 = scope1->getPredecessor();
     74            scope2 = scope2->getPredecessor();
     75        }
     76        return scope1;
     77    }
     78
     79    /** ------------------------------------------------------------------------------------------------------------- *
     80     * @brief getScopesOfAllUsers
     81     ** ------------------------------------------------------------------------------------------------------------- */
     82    void getScopesOfAllUsers(PabloAST * expr) {
     83        for (PabloAST * use : expr->users()) {
     84            if (LLVM_LIKELY(isa<Statement>(use))) {
     85                mScopes.insert(cast<Statement>(use)->getParent());
     86            } else if (LLVM_UNLIKELY(isa<PabloKernel>(use))) {
     87                mScopes.insert(cast<PabloKernel>(use)->getEntryBlock());
     88            }
     89        }
     90    }
     91
     92    /** ------------------------------------------------------------------------------------------------------------- *
     93     * @brief getInScopeDominatorsOfAllUsers
     94     ** ------------------------------------------------------------------------------------------------------------- */
     95    void getInScopeDominatorsOfAllUsers(PabloAST * expr, PabloBlock * const block) {
     96        for (PabloAST * use : expr->users()) {
     97            if (LLVM_LIKELY(isa<Statement>(use))) {
     98                Statement * user = cast<Statement>(use);
     99                PabloBlock * parent = user->getParent();
     100                while (parent != block) {
     101                    assert (parent);
     102                    user = parent->getBranch();
     103                    parent = parent->getPredecessor();
     104                }
     105                mUsers.insert(user);
     106            }
     107        }
     108    }
     109
     110    /** ------------------------------------------------------------------------------------------------------------- *
     111     * @brief sinkIfAcceptableTarget
     112     *
     113     * Scan through this statement's users to see whether they're all in a nested scope. If not, check whether the
     114     * statement can be moved past a branch statement within the same scope.
     115     ** ------------------------------------------------------------------------------------------------------------- */
     116    void sinkIfAcceptableTarget(Statement * const stmt, PabloBlock * const block) {
     117        assert (mScopes.empty() && mUsers.empty());
    167118        if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
    168119            for (Var * def : cast<Branch>(stmt)->getEscaped()) {
    169                 getInScopeDominatorsOfAllUsers(def, users, block);
     120                getScopesOfAllUsers(def);
    170121            }
    171122        } else {
    172             getInScopeDominatorsOfAllUsers(isa<Assign>(stmt) ? cast<Assign>(stmt)->getVariable() : stmt, users, block);
    173         }
    174         Branch * branch = nullptr;
    175         Statement * temp = stmt;
    176         for (;;) {
    177             temp = temp->getNextNode();
    178             if (temp == nullptr || users.count(temp)) {
    179                 if (branch) {
    180                     // we can move the statement past a branch within its current scope
    181                     stmt->insertAfter(branch);
    182                 }
    183                 break;
    184             }
    185             if (isa<Branch>(temp)) {
    186                 branch = cast<Branch>(temp);
    187             }
    188         }
    189         users.clear();
    190     } else { // test whether the LCA scope is nested within this scope.
    191         PabloBlock * temp = scope;
    192         for (;;) {
    193             temp = temp->getPredecessor();
    194             if (temp == nullptr) {
    195                 break;
    196             } else if (temp == block) {
    197                 // we can move the statement into a nested scope
    198                 stmt->insertBefore(scope->front());
    199                 break;
    200             }
    201         }
    202     }
     123            getScopesOfAllUsers(isa<Assign>(stmt) ? cast<Assign>(stmt)->getVariable() : stmt);
     124        }
     125        if (LLVM_UNLIKELY(mScopes.empty())) {
     126            assert (!isa<Assign>(stmt));
     127            // should not occur unless we have a branch with no escaped vars or a statement
     128            // that has no users. In either event, the statement itself should be removed.
     129            stmt->eraseFromParent(true);
     130            return;
     131        }
     132        while (mScopes.size() > 1) {
     133            PabloBlock * scope1 = mScopes.back(); mScopes.pop_back();
     134            PabloBlock * scope2 = mScopes.back(); mScopes.pop_back();
     135            mScopes.insert(getLCA(scope1, scope2));
     136        }
     137        PabloBlock * const scope = mScopes.back(); mScopes.clear();
     138        if (LLVM_LIKELY(scope == block)) {
     139            assert (mUsers.empty());
     140            if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
     141                for (Var * def : cast<Branch>(stmt)->getEscaped()) {
     142                    getInScopeDominatorsOfAllUsers(def, block);
     143                }
     144            } else {
     145                getInScopeDominatorsOfAllUsers(isa<Assign>(stmt) ? cast<Assign>(stmt)->getVariable() : stmt, block);
     146            }
     147            Branch * branch = nullptr;
     148            Statement * temp = stmt;
     149            for (;;) {
     150                temp = temp->getNextNode();
     151                if (temp == nullptr || mUsers.contains(temp)) {
     152                    if (branch) {
     153                        // we can move the statement past a branch within its current scope
     154                        stmt->insertAfter(branch);
     155                    }
     156                    break;
     157                }
     158                if (isa<Branch>(temp)) {
     159                    branch = cast<Branch>(temp);
     160                }
     161            }
     162            mUsers.clear();
     163        } else { // test whether the LCA scope is nested within this scope.
     164            PabloBlock * temp = scope;
     165            for (;;) {
     166                temp = temp->getPredecessor();
     167                if (temp == nullptr) {
     168                    break;
     169                } else if (temp == block) {
     170                    // we can move the statement into a nested scope
     171                    stmt->insertBefore(scope->front());
     172                    break;
     173                }
     174            }
     175        }
     176    }
     177
     178    /** ------------------------------------------------------------------------------------------------------------- *
     179     * @brief doCodeSinking
     180     ** ------------------------------------------------------------------------------------------------------------- */
     181    void doCodeSinking(PabloBlock * const block) {
     182        Statement * stmt = block->back(); // note: reverse AST traversal
     183        while (stmt) {
     184            Statement * const prevNode = stmt->getPrevNode();
     185            sinkIfAcceptableTarget(stmt, block);
     186            stmt = prevNode;
     187        }
     188    }
     189
     190    /** ------------------------------------------------------------------------------------------------------------- *
     191     * @brief hoistLoopInvariants
     192     ** ------------------------------------------------------------------------------------------------------------- */
     193    void hoistLoopInvariants(Branch * const loop) {
     194        assert (mLoopVariants.empty());
     195        for (Var * variant : loop->getEscaped()) {
     196            mLoopVariants.insert(variant);
     197        }
     198        Statement * outerNode = loop->getPrevNode();
     199        Statement * stmt = loop->getBody()->front();
     200        while (stmt) {
     201            if (isa<Branch>(stmt)) {
     202                for (Var * var : cast<Branch>(stmt)->getEscaped()) {
     203                    mLoopVariants.insert(var);
     204                }
     205            } else {
     206                bool invariant = true;
     207                for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
     208                    if (mLoopVariants.count(stmt->getOperand(i)) != 0) {
     209                        invariant = false;
     210                        break;
     211                    }
     212                }
     213                if (LLVM_UNLIKELY(invariant)) {
     214                    Statement * next = stmt->getNextNode();
     215                    stmt->insertAfter(outerNode);
     216                    outerNode = stmt;
     217                    stmt = next;
     218                } else {
     219                    mLoopVariants.insert(stmt);
     220                    stmt = stmt->getNextNode();
     221                }
     222            }
     223        }
     224        mLoopVariants.clear();
     225    }
     226
     227    /** ------------------------------------------------------------------------------------------------------------- *
     228     * @brief doCodeMovement
     229     ** ------------------------------------------------------------------------------------------------------------- */
     230    void doCodeMovement(PabloBlock * const block) {
     231        doCodeSinking(block);
     232        for (Statement * stmt : *block) {
     233            if (LLVM_UNLIKELY(isa<Branch>(stmt))) {
     234                doCodeMovement(cast<Branch>(stmt)->getBody());
     235                if (isa<While>(stmt)) {
     236                    // TODO: if we analyzed the probability of this loop being executed once, twice, or many times, we could
     237                    // determine whether hoisting will helpful or harmful to the expected run time.
     238                    hoistLoopInvariants(cast<While>(stmt));
     239                }
     240            }
     241        }
     242    }
     243
     244private:
     245    ScopeSet        mScopes;
     246    UserSet         mUsers;
     247    LoopVariants    mLoopVariants;
     248};
     249
     250/** ------------------------------------------------------------------------------------------------------------- *
     251 * @brief optimize
     252 ** ------------------------------------------------------------------------------------------------------------- */
     253bool CodeMotionPass::optimize(PabloKernel * kernel) {
     254    CodeMotionPassContainer C;
     255    C.doCodeMovement(kernel->getEntryBlock());
     256    #ifndef NDEBUG
     257    PabloVerifier::verify(kernel, "post-code-motion");
     258    #endif
     259    return true;
    203260}
    204261
    205 /** ------------------------------------------------------------------------------------------------------------- *
    206  * @brief sink
    207  ** ------------------------------------------------------------------------------------------------------------- */
    208 inline void CodeMotionPass::sink(PabloBlock * const block) {
    209     ScopeSet scopes;
    210     UserSet users;
    211     Statement * stmt = block->back(); // note: reverse AST traversal
    212     while (stmt) {
    213         Statement * prevNode = stmt->getPrevNode();
    214         sinkIfAcceptableTarget(stmt, block, scopes, users);
    215         stmt = prevNode;
    216     }
     262
    217263}
    218 
    219 /** ------------------------------------------------------------------------------------------------------------- *
    220  * @brief hoistLoopInvariants
    221  ** ------------------------------------------------------------------------------------------------------------- */
    222 void CodeMotionPass::hoistLoopInvariants(While * loop) {
    223     flat_set<const PabloAST *> loopVariants;
    224     for (Var * variant : loop->getEscaped()) {
    225         loopVariants.insert(variant);
    226     }
    227     Statement * outerNode = loop->getPrevNode();
    228     Statement * stmt = loop->getBody()->front();
    229     while (stmt) {
    230         if (isa<Branch>(stmt)) {
    231             for (Var * var : cast<Branch>(stmt)->getEscaped()) {
    232                 loopVariants.insert(var);
    233             }
    234         } else {
    235             bool invariant = true;
    236             for (unsigned i = 0; i != stmt->getNumOperands(); ++i) {
    237                 if (loopVariants.count(stmt->getOperand(i)) != 0) {
    238                     invariant = false;
    239                     break;
    240                 }
    241             }
    242             if (LLVM_UNLIKELY(invariant)) {
    243                 Statement * next = stmt->getNextNode();
    244                 stmt->insertAfter(outerNode);
    245                 outerNode = stmt;
    246                 stmt = next;
    247             } else {
    248                 loopVariants.insert(stmt);
    249                 stmt = stmt->getNextNode();
    250             }
    251         }
    252     }
    253 }
    254 
    255 }
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/codemotionpass.h

    r5270 r5454  
    33
    44namespace pablo {
    5 
    65class PabloKernel;
    7 class PabloBlock;
    8 class Statement;
    9 class While;
    10 class Variadic;
    11 
    126class CodeMotionPass {
    137public:
    14     static bool optimize(PabloKernel * kernel);
    15 protected:
    16     static void movement(PabloBlock * const block);
    17     static void sink(PabloBlock * const block);
    18     static void hoistLoopInvariants(While * loop);
     8    static bool optimize(PabloKernel * const kernel);
    199};
    20 
    2110}
    2211
  • icGREP/icgrep-devel/icgrep/pablo/optimizers/pablo_simplifier.cpp

    r5366 r5454  
    545545void Simplifier::deadCodeElimination(PabloBlock * const block) {
    546546
    547    flat_map<PabloAST *, Assign *> unread;
     547    flat_map<PabloAST *, Assign *> unread;
    548548
    549549    Statement * stmt = block->front();
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5446 r5454  
    1313#include <kernels/kernel_builder.h>
    1414#include <llvm/IR/Module.h>
    15 // #include "llvm/Support/Debug.h"
    1615
    1716using namespace pablo;
     
    121120
    122121void PabloKernel::prepareKernel(const std::unique_ptr<kernel::KernelBuilder> & iBuilder) {
    123     if (DebugOptionIsSet(DumpTrace)) {
    124         setName(getName() + "_DumpTrace");
    125     }
    126122    mSizeTy = iBuilder->getSizeTy();
    127123    mStreamTy = iBuilder->getStreamTy();
     
    162158}
    163159
     160static inline std::string annotateKernelNameWithDebugFlags(std::string && name) {
     161    if (DebugOptionIsSet(DumpTrace)) {
     162        name += "_DumpTrace";
     163    }
     164    return name;
     165}
     166
    164167PabloKernel::PabloKernel(const std::unique_ptr<KernelBuilder> & b,
    165                          std::string kernelName,
     168                         std::string && kernelName,
    166169                         std::vector<Binding> stream_inputs,
    167170                         std::vector<Binding> stream_outputs,
    168171                         std::vector<Binding> scalar_parameters,
    169172                         std::vector<Binding> scalar_outputs)
    170 : BlockOrientedKernel(std::move(kernelName),
     173: BlockOrientedKernel(std::move(annotateKernelNameWithDebugFlags(std::move(kernelName))),
    171174                      std::move(stream_inputs), std::move(stream_outputs),
    172175                      std::move(scalar_parameters), std::move(scalar_outputs),
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.h

    r5446 r5454  
    125125protected:
    126126
    127     PabloKernel(const std::unique_ptr<kernel::KernelBuilder> & builder, std::string kernelName,
     127    PabloKernel(const std::unique_ptr<kernel::KernelBuilder> & builder,
     128                std::string && kernelName,
    128129                std::vector<Binding> stream_inputs = {},
    129130                std::vector<Binding> stream_outputs = {},
  • icGREP/icgrep-devel/icgrep/pablo/pablo_toolchain.cpp

    r5436 r5454  
    5252static cl::bits<PabloCompilationFlags>
    5353    PabloOptimizationsOptions(cl::values(clEnumVal(DisableSimplification, "Disable Pablo Simplification pass (not recommended)"),
    54                                          clEnumVal(EnableCodeMotion, "Moves statements into the innermost legal If-scope and moves invariants out of While-loops."),
     54                                         clEnumVal(DisableCodeMotion, "Moves statements into the innermost legal If-scope and moves invariants out of While-loops."),
    5555#ifdef ENABLE_MULTIPLEXING
    5656                                         clEnumVal(EnableMultiplexing, "combine Advances whose inputs are mutual exclusive into the fewest number of advances possible (expensive)."),
     
    195195    const timestamp_t optimization_start = read_cycle_counter();
    196196#endif
    197     if (!PabloOptimizationsOptions.isSet(DisableSimplification)) {
     197    if (LLVM_LIKELY(!PabloOptimizationsOptions.isSet(DisableSimplification))) {
    198198        READ_CYCLE_COUNTER(simplification_start);
    199199        Simplifier::optimize(kernel);
     
    230230    }
    231231#endif
    232     if (PabloOptimizationsOptions.isSet(EnableCodeMotion)) {
     232    if (LLVM_LIKELY(!PabloOptimizationsOptions.isSet(DisableCodeMotion))) {
    233233        READ_CYCLE_COUNTER(sinking_start);
    234234        CodeMotionPass::optimize(kernel);
  • icGREP/icgrep-devel/icgrep/pablo/pablo_toolchain.h

    r5295 r5454  
    1818
    1919enum PabloCompilationFlags {
    20     DisableSimplification, EnableCodeMotion,
     20    DisableSimplification, DisableCodeMotion,
    2121    EnableMultiplexing, EnableLowering, EnablePreDistribution, EnablePostDistribution, EnablePrePassScheduling
    2222};
  • icGREP/icgrep-devel/icgrep/pablo/pe_advance.h

    r5267 r5454  
    1313namespace pablo {
    1414
    15 class Advance : public Statement {
     15class Advance final : public Statement {
    1616    friend class PabloBlock;
    1717public:
  • icGREP/icgrep-devel/icgrep/pablo/pe_constant.h

    r5230 r5454  
    66namespace pablo {
    77
    8 class Constant : public PabloAST {
     8class Constant final : public PabloAST {
    99    friend class PabloBlock;
    1010public:
  • icGREP/icgrep-devel/icgrep/pablo/pe_count.h

    r5267 r5454  
    1212namespace pablo {
    1313
    14 class Count : public Statement {
     14class Count final : public Statement {
    1515    friend class PabloBlock;
    1616public:
  • icGREP/icgrep-devel/icgrep/pablo/pe_infile.h

    r5230 r5454  
    1212namespace pablo {
    1313
    14 class InFile : public Statement {
     14class InFile final : public Statement {
    1515    friend class PabloBlock;
    1616public:
     
    3333};
    3434
    35 class AtEOF : public Statement {
     35class AtEOF final : public Statement {
    3636    friend class PabloBlock;
    3737public:
  • icGREP/icgrep-devel/icgrep/pablo/pe_lookahead.h

    r5267 r5454  
    1313namespace pablo {
    1414
    15 class Lookahead : public Statement {
     15class Lookahead final : public Statement {
    1616    friend class PabloBlock;
    1717public:
  • icGREP/icgrep-devel/icgrep/pablo/pe_matchstar.h

    r5230 r5454  
    1212namespace pablo {
    1313
    14 class MatchStar : public Statement {
     14class MatchStar final : public Statement {
    1515    friend class PabloBlock;
    1616public:
  • icGREP/icgrep-devel/icgrep/pablo/pe_ones.h

    r5283 r5454  
    1212namespace pablo {
    1313
    14 class Ones : public PabloAST {
     14class Ones final : public PabloAST {
    1515    friend class PabloBlock;
    1616    friend class PabloKernel;
  • icGREP/icgrep-devel/icgrep/pablo/pe_zeroes.h

    r5283 r5454  
    1212namespace pablo {
    1313
    14 class Zeroes : public PabloAST {
     14class Zeroes final : public PabloAST {
    1515    friend class PabloBlock;
    1616    friend class PabloKernel;
  • icGREP/icgrep-devel/icgrep/pablo/ps_assign.h

    r5230 r5454  
    1212namespace pablo {
    1313
    14 class Assign : public Statement {
     14class Assign final : public Statement {
    1515    friend class PabloBlock;
    1616public:
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.cpp

    r5446 r5454  
    2828#include <kernels/kernel.h>
    2929#include <sys/stat.h>
    30 #include <thread>
    31 #include <boost/lockfree/queue.hpp>
    3230#include <llvm/IR/Verifier.h>
     31//#include <toolchain/workqueue.h>
     32
    3333
    3434using namespace llvm;
     
    4949
    5050static cl::bits<DebugFlags>
    51 DebugOptions(cl::values(clEnumVal(ShowIR, "Print generated LLVM IR."),
     51DebugOptions(cl::values(clEnumVal(ShowUnoptimizedIR, "Print generated LLVM IR."),
     52                        clEnumVal(ShowIR, "Print optimized LLVM IR."),
    5253                        clEnumVal(VerifyIR, "Run the IR verification pass."),
    5354#ifndef USE_LLVM_3_6
     
    148149    std::string errMessage;
    149150    EngineBuilder builder{std::unique_ptr<Module>(mMainModule)};
     151    builder.setUseOrcMCJITReplacement(true);
    150152    builder.setErrorStr(&errMessage);
    151153    TargetOptions opts = InitTargetOptionsFromCodeGenFlags();
    152154    opts.MCOptions.AsmVerbose = codegen::AsmVerbose;
    153155    builder.setTargetOptions(opts);
    154     builder.setVerifyModules(IN_DEBUG_MODE || codegen::DebugOptionIsSet(codegen::VerifyIR));
     156    builder.setVerifyModules(false);
    155157    CodeGenOpt::Level optLevel = CodeGenOpt::Level::None;
    156158    switch (codegen::OptLevel) {
     
    174176            mCache = new ParabixObjectCache(codegen::ObjectCacheDir);
    175177        }
    176         assert (mCache);
    177178        mEngine->setObjectCache(mCache);
    178179    }
     
    261262void ParabixDriver::linkAndFinalize() {
    262263
    263 //    using WorkQueue = boost::lockfree::queue<Kernel *>;
    264 
    265264    legacy::PassManager PM;
     265    std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
     266    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowUnoptimizedIR))) {
     267        if (codegen::IROutputFilename.empty()) {
     268            IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
     269        } else {
     270            std::error_code error;
     271            IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
     272        }
     273        PM.add(createPrintModulePass(*IROutputStream));
     274    }
     275
    266276    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
    267277        PM.add(createVerifierPass());
     
    272282    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    273283    PM.add(createCFGSimplificationPass());
    274 
    275 //    unsigned threadCount = std::thread::hardware_concurrency();
    276 
    277     std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
    278284    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    279 //        threadCount = 1; // If we're dumping IR, disable seperate compilation
    280         if (codegen::IROutputFilename.empty()) {
    281             IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
    282         } else {
    283             std::error_code error;
    284             IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
     285        if (LLVM_LIKELY(IROutputStream == nullptr)) {
     286            if (codegen::IROutputFilename.empty()) {
     287                IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
     288            } else {
     289                std::error_code error;
     290                IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
     291            }
    285292        }
    286293        PM.add(createPrintModulePass(*IROutputStream));
     
    290297    std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
    291298    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
    292 //        threadCount = 1; // If we're dumping ASM, disable seperate compilation
    293299        if (codegen::ASMOutputFilename.empty()) {
    294300            ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
     
    303309    #endif
    304310
     311    Module * module = nullptr;
     312
    305313    try {
    306314
    307 //    if (threadCount > 1) {
    308 
    309 //        WorkQueue Q(mPipeline.size());
    310 //        for (Kernel * kernel : mPipeline) {
    311 //            Q.unsynchronized_push(kernel); assert (kernel);
    312 //        }
    313 
    314 //        std::thread compilation_thread[threadCount - 1];
    315 //        for (unsigned i = 0; i < (threadCount - 1); ++i) {
    316 //            compilation_thread[i] = std::thread([&]{
    317 
    318 //                llvm::LLVMContext C;
    319 //                std::unique_ptr<KernelBuilder> kb(IDISA::GetIDISA_Builder(C, mMainModule->getTargetTriple()));
    320 //                kb->setDriver(this);
    321 
    322 //                Kernel * kernel = nullptr;
    323 //                while (Q.pop(kernel)) {
    324 //                    kb->setKernel(kernel);
    325 //                    Module * module = kernel->getModule();
    326 //                    bool uncachedObject = true;
    327 //                    if (mCache && mCache->loadCachedObjectFile(kb, kernel)) {
    328 //                        uncachedObject = false;
    329 //                    }
    330 //                    if (uncachedObject) {
    331 //                        module->setTargetTriple(mMainModule->getTargetTriple());
    332 //                        kernel->generateKernel(kb);
    333 //                        // PM.run(*module);
    334 //                        mEngine->generateCodeForModule(module);
    335 //                    }
    336 //                    // mEngine->addModule(std::unique_ptr<Module>(module));
    337 //                }
    338 //            });
    339 //        }
    340 
    341 //        // PM.run(*mMainModule);
    342 
    343 //        Kernel * kernel = nullptr;
    344 //        while (Q.pop(kernel)) {
    345 //            iBuilder->setKernel(kernel);
    346 //            Module * module = kernel->getModule();
    347 //            bool uncachedObject = true;
    348 //            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
    349 //                uncachedObject = false;
    350 //            }
    351 //            if (uncachedObject) {
    352 //                module->setTargetTriple(mMainModule->getTargetTriple());
    353 //                kernel->generateKernel(iBuilder);
    354 //                // PM.run(*module);
    355 //            }
    356 //            mEngine->addModule(std::unique_ptr<Module>(module));
    357 //            mEngine->generateCodeForModule(module);
    358 //        }
    359 
    360 //        for (unsigned i = 0; i < (threadCount - 1); ++i) {
    361 //            compilation_thread[i].join();
    362 //        }
    363 
    364 //        iBuilder->setKernel(nullptr);
    365 
    366 //    } else { // single threaded
    367 
    368315        for (Kernel * const kernel : mPipeline) {
    369 
    370316            iBuilder->setKernel(kernel);
    371             Module * module = kernel->getModule();
     317            module = kernel->getModule();
    372318            bool uncachedObject = true;
    373319            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
     
    384330
    385331        iBuilder->setKernel(nullptr);
     332        module = mMainModule;
    386333        PM.run(*mMainModule);
    387334
    388 //    }
    389 
    390     mEngine->finalizeObject();
     335        mEngine->finalizeObject();
    391336
    392337    } catch (const std::exception & e) {
     
    396341}
    397342
     343
     344//void ParabixDriver::linkAndFinalize() {
     345
     346//    legacy::PassManager PM;
     347//    if (IN_DEBUG_MODE || LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::VerifyIR))) {
     348//        PM.add(createVerifierPass());
     349//    }
     350//    PM.add(createPromoteMemoryToRegisterPass()); //Force the use of mem2reg to promote stack variables.
     351//    PM.add(createReassociatePass());             //Reassociate expressions.
     352//    PM.add(createGVNPass());                     //Eliminate common subexpressions.
     353//    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
     354//    PM.add(createCFGSimplificationPass());
     355
     356//    unsigned threadCount = 2; //std::thread::hardware_concurrency();
     357
     358//    std::unique_ptr<raw_fd_ostream> IROutputStream(nullptr);
     359//    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
     360//        threadCount = 1; // If we're dumping IR, disable seperate compilation
     361//        if (codegen::IROutputFilename.empty()) {
     362//            IROutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
     363//        } else {
     364//            std::error_code error;
     365//            IROutputStream.reset(new raw_fd_ostream(codegen::IROutputFilename, error, sys::fs::OpenFlags::F_None));
     366//        }
     367//        PM.add(createPrintModulePass(*IROutputStream));
     368//    }
     369
     370//    #ifndef USE_LLVM_3_6
     371//    std::unique_ptr<raw_fd_ostream> ASMOutputStream(nullptr);
     372//    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowASM))) {
     373//        threadCount = 1; // If we're dumping ASM, disable seperate compilation
     374//        if (codegen::ASMOutputFilename.empty()) {
     375//            ASMOutputStream.reset(new raw_fd_ostream(STDERR_FILENO, false, false));
     376//        } else {
     377//            std::error_code error;
     378//            ASMOutputStream.reset(new raw_fd_ostream(codegen::ASMOutputFilename, error, sys::fs::OpenFlags::F_None));
     379//        }
     380//        if (LLVM_UNLIKELY(mTarget->addPassesToEmitFile(PM, *ASMOutputStream, TargetMachine::CGFT_AssemblyFile))) {
     381//            report_fatal_error("LLVM error: could not add emit assembly pass");
     382//        }
     383//    }
     384//    #endif
     385
     386//    Module * module = mMainModule;
     387//    WorkQueue<Module *> Q(mPipeline.size());
     388//    std::thread compilation_thread[threadCount - 1];
     389
     390//    try {
     391
     392//        for (unsigned i = 0; i < (threadCount - 1); ++i) {
     393//            compilation_thread[i] = std::thread([this, &Q]{
     394
     395//                InitializeNativeTarget();
     396
     397//                Module * module = nullptr;
     398//                while (Q.pop(module)) {
     399//                    mEngine->addModule(std::unique_ptr<Module>(module));
     400//                    mEngine->generateCodeForModule(module);
     401//                }
     402//            });
     403//        }
     404
     405//        module = mMainModule;
     406//        iBuilder->setKernel(nullptr);
     407//        PM.run(*mMainModule);
     408//        Q.push(mMainModule);
     409
     410//        for (Kernel * const kernel : mPipeline) {
     411//            iBuilder->setKernel(kernel);
     412//            module = kernel->getModule();
     413//            bool uncachedObject = true;
     414//            if (mCache && mCache->loadCachedObjectFile(iBuilder, kernel)) {
     415//                uncachedObject = false;
     416//            }
     417//            if (uncachedObject) {
     418//                module->setTargetTriple(mMainModule->getTargetTriple());
     419//                kernel->generateKernel(iBuilder);
     420//                PM.run(*module);
     421//            }
     422//            Q.push(module);
     423//        }
     424
     425//        for (;;) {
     426//            if (Q.empty()) {
     427//                break;
     428//            } else if (Q.try_pop(module)) {
     429//                mEngine->addModule(std::unique_ptr<Module>(module));
     430//                mEngine->generateCodeForModule(module);
     431//            }
     432//        }
     433
     434//        Q.notify_all();
     435//        for (unsigned i = 0; i < (threadCount - 1); ++i) {
     436//            compilation_thread[i].join();
     437//        }
     438
     439//        assert (Q.empty());
     440
     441//        mEngine->finalizeObject();
     442
     443//    } catch (const std::exception & e) {
     444//        module->dump();
     445//        report_fatal_error(e.what());
     446//    }
     447
     448//}
     449
    398450const std::unique_ptr<KernelBuilder> & ParabixDriver::getBuilder() {
    399451    return iBuilder;
  • icGREP/icgrep-devel/icgrep/toolchain/toolchain.h

    r5446 r5454  
    2929// Command Parameters
    3030enum DebugFlags {
     31    ShowUnoptimizedIR,
    3132    ShowIR,
    3233    VerifyIR,
     
    9495    llvm::Function * LinkFunction(llvm::Module * mod, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
    9596
    96     void compileSingleThread();
    97 
    98     void compileMultiThread(const unsigned threadCount);
    99 
    100 
    10197private:
    10298    std::unique_ptr<llvm::LLVMContext>                      mContext;
Note: See TracChangeset for help on using the changeset viewer.