Changeset 5398 for icGREP/icgrep-devel


Ignore:
Timestamp:
Apr 7, 2017, 4:59:04 PM (2 years ago)
Author:
nmedfort
Message:

Continued work on processing stdin input. Partial integration of ParabixDriver? methods into icgrep and editd. Object cache does not currently work for recursive REs.

Location:
icGREP/icgrep-devel/icgrep
Files:
29 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5397 r5398  
    8989        IRBuilder<> builder(entry);
    9090        std::vector<Value *> args;
    91         args.push_back(CreateGlobalStringPtr(out.c_str()));
     91        args.push_back(GetString(out.c_str()));
    9292        Value * const name = &*(arg++);
    9393        name->setName("name");
     
    108108    }
    109109    assert (num->getType()->isIntegerTy());
    110     CreateCall(printRegister, {CreateGlobalStringPtr(name.c_str()), num});
     110    CreateCall(printRegister, {GetString(name.c_str()), num});
    111111}
    112112
     
    207207    }
    208208    Value * ptr = CreateCall(fMMap, {addr, size, prot, flags, fd, offset});
    209     CreateAssert(CheckMMapSuccess(ptr), "CreateMMap: mmap failed to allocate memory");
     209    if (codegen::EnableAsserts) {
     210        CreateAssert(CheckMMapSuccess(ptr), "CreateMMap: mmap failed to allocate memory");
     211    }
    210212    return ptr;
    211213}
     
    232234    newSize = CreateZExtOrTrunc(newSize, sizeTy);
    233235    ConstantInt * const flags = ConstantInt::get(intTy, mayMove ? MREMAP_MAYMOVE : 0);
    234     Value * ptr = CreateCall(fMRemap, {addr, oldSize, newSize, flags});   
    235     CreateAssert(CheckMMapSuccess(ptr), "CreateMRemap: mremap failed to allocate memory");
     236    Value * ptr = CreateCall(fMRemap, {addr, oldSize, newSize, flags});
     237    if (codegen::EnableAsserts) {
     238        CreateAssert(CheckMMapSuccess(ptr), "CreateMRemap: mremap failed to allocate memory");
     239    }
    236240    return ptr;
    237241}
     
    249253    }
    250254    addr = CreatePointerCast(addr, voidPtrTy);
    251     CreateAssert(addr, "CreateMUnmap: addr cannot be null");
    252255    size = CreateZExtOrTrunc(size, sizeTy);
    253256    return CreateCall(fMUnmap, {addr, size});
     
    451454            ConstantInt * _11 = getSize(11);
    452455            Value * bytes = CreatePointerCast(CreateMalloc(len), getInt8PtrTy());
    453             CreateMemCpy(bytes, CreateGlobalStringPtr("Assertion `"), _11, 1);
     456            CreateMemCpy(bytes, GetString("Assertion `"), _11, 1);
    454457            CreateMemCpy(CreateGEP(bytes, _11), msg, sz, 1);
    455             CreateMemCpy(CreateGEP(bytes, CreateAdd(sz, _11)), CreateGlobalStringPtr("' failed.\n"), getSize(10), 1);
     458            CreateMemCpy(CreateGEP(bytes, CreateAdd(sz, _11)), GetString("' failed.\n"), getSize(10), 1);
    456459            CreateWriteCall(getInt32(2), bytes, len);
     460
     461
    457462            CreateExit(-1);
    458463            CreateBr(success); // necessary to satisfy the LLVM verifier. this is not actually executed.
     
    461466            restoreIP(ip);
    462467        }
    463         CreateCall(function, {CreateICmpEQ(assertion, Constant::getNullValue(assertion->getType())), CreateGlobalStringPtr(failureMessage), getSize(failureMessage.size())});
     468        CreateCall(function, {CreateICmpEQ(assertion, Constant::getNullValue(assertion->getType())), GetString(failureMessage), getSize(failureMessage.size())});
    464469    }
    465470}
     
    484489}
    485490
     491inline static unsigned ceil_log2(const unsigned v) {
     492    assert ("log2(0) is undefined!" && v != 0);
     493    return 32 - __builtin_clz(v - 1);
     494}
     495
     496Value * CBuilder::CreatePopcount(Value * bits) {
     497    Value * ctpopFunc = Intrinsic::getDeclaration(mMod, Intrinsic::ctpop, bits->getType());
     498    return CreateCall(ctpopFunc, bits);
     499}
     500
     501Value * CBuilder::CreateCountForwardZeroes(Value * value) {
     502    Value * cttzFunc = Intrinsic::getDeclaration(mMod, Intrinsic::cttz, value->getType());
     503    return CreateCall(cttzFunc, {value, ConstantInt::getFalse(getContext())});
     504}
     505
     506Value * CBuilder::CreateCountReverseZeroes(Value * value) {
     507    Value * ctlzFunc = Intrinsic::getDeclaration(mMod, Intrinsic::ctlz, value->getType());
     508    return CreateCall(ctlzFunc, {value, ConstantInt::getFalse(getContext())});
     509}
     510
    486511Value * CBuilder::CreateCeilLog2(Value * value) {
    487512    IntegerType * ty = cast<IntegerType>(value->getType());
    488513    CreateAssert(value, "CreateCeilLog2: value cannot be zero");
    489     Value * m = CreateCall(Intrinsic::getDeclaration(mMod, Intrinsic::ctlz, ty), {value, ConstantInt::getFalse(getContext())});
    490     Value * isPowOf2 = CreateICmpEQ(CreateAnd(value, CreateSub(value, ConstantInt::get(ty, 1))), ConstantInt::getNullValue(ty));
    491     m = CreateSub(ConstantInt::get(m->getType(), ty->getBitWidth() - 1), m);
    492     return CreateSelect(isPowOf2, m, CreateAdd(m, ConstantInt::get(m->getType(), 1)));
     514    Value * m = CreateCountForwardZeroes(CreateSub(value, ConstantInt::get(ty, 1)));
     515    return CreateSub(ConstantInt::get(m->getType(), ty->getBitWidth() - 1), m);
     516}
     517
     518Value * CBuilder::GetString(StringRef Str) {
     519    Value * ptr = mMod->getGlobalVariable(Str, true);
     520    if (ptr == nullptr) {
     521        ptr = CreateGlobalString(Str, Str);
     522    }
     523    Value * zero = getInt32(0);
     524    return CreateInBoundsGEP(ptr, { zero, zero });
    493525}
    494526
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5389 r5398  
    105105    void CallPrintInt(const std::string & name, llvm::Value * const value);
    106106   
     107    llvm::Value * GetString(llvm::StringRef Str);
     108
    107109    inline llvm::IntegerType * getSizeTy() const {
    108110        return mSizeType;
     
    123125
    124126    virtual llvm::StoreInst *  CreateAtomicStoreRelease(llvm::Value * val, llvm::Value * ptr);
    125    
     127
    126128    void CreateAssert(llvm::Value * assertion, llvm::StringRef failureMessage);
    127129
     
    138140    }
    139141
     142    llvm::Value * CreatePopcount(llvm::Value * bits);
     143
     144    llvm::Value * CreateCountForwardZeroes(llvm::Value * value);
     145
     146    llvm::Value * CreateCountReverseZeroes(llvm::Value * value);
     147
    140148    llvm::Value * CreateCeilLog2(llvm::Value * value);
    141149
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.cpp

    r5379 r5398  
    1313#include <llvm/Support/raw_ostream.h>
    1414#include <llvm/IR/TypeBuilder.h>
     15#include <toolchain.h>
    1516
    1617using namespace llvm;
     
    5556        IRBuilder<> builder(entry);
    5657        std::vector<Value *> args;
    57         args.push_back(CreateGlobalStringPtr(out.str().c_str()));
     58        args.push_back(GetString(out.str().c_str()));
    5859        Value * const name = &*(arg++);
    5960        name->setName("name");
     
    7172        printRegister = function;
    7273    }
    73     CreateCall(printRegister, {CreateGlobalStringPtr(name.c_str()), CreateBitCast(value, mBitBlockType)});
     74    CreateCall(printRegister, {GetString(name.c_str()), CreateBitCast(value, mBitBlockType)});
    7475}
    7576
     
    355356}
    356357
     358LoadInst * IDISA_Builder::CreateBlockAlignedLoad(Value * const ptr) {
     359    const auto alignment = mBitBlockWidth / 8;
     360    if (codegen::EnableAsserts) {
     361        Value * alignmentOffset = CreateURem(CreatePtrToInt(ptr, getSizeTy()), getSize(alignment));
     362        Value * alignmentCheck = CreateICmpEQ(alignmentOffset, getSize(0));
     363        CreateAssert(alignmentCheck, "CreateBlockAlignedLoad: pointer is unaligned");
     364    }
     365    return CreateAlignedLoad(ptr, alignment);
     366}
     367
     368void IDISA_Builder::CreateBlockAlignedStore(Value * const value, Value * const ptr) {
     369    const auto alignment = mBitBlockWidth / 8;
     370    if (codegen::EnableAsserts) {
     371        Value * alignmentOffset = CreateURem(CreatePtrToInt(ptr, getSizeTy()), getSize(alignment));
     372        Value * alignmentCheck = CreateICmpEQ(alignmentOffset, getSize(0));
     373        CreateAssert(alignmentCheck, "CreateBlockAlignedStore: pointer is not aligned");
     374    }
     375    CreateAlignedStore(value, ptr, alignment);
     376}
     377
    357378IDISA_Builder::IDISA_Builder(Module * m, unsigned archBitWidth, unsigned bitBlockWidth, unsigned stride, const bool SupportsIndirectBr, unsigned CacheAlignment)
    358379: CBuilder(m, archBitWidth, SupportsIndirectBr, CacheAlignment)
  • icGREP/icgrep-devel/icgrep/IR_Gen/idisa_builder.h

    r5377 r5398  
    133133    unsigned            mBitBlockWidth;
    134134    unsigned            mStride;
     135
    135136    llvm::VectorType *  mBitBlockType;
    136137    llvm::Constant *    mZeroInitializer;
     
    138139    llvm::Constant *    mPrintRegisterFunction;
    139140};
    140 
    141 inline llvm::LoadInst * IDISA_Builder::CreateBlockAlignedLoad(llvm::Value * const ptr) {
    142     return CreateAlignedLoad(ptr, mBitBlockWidth / 8);
    143 }
    144141
    145142inline llvm::LoadInst * IDISA_Builder::CreateBlockAlignedLoad(llvm::Value * const ptr, llvm::Value * const index) {
     
    149146inline llvm::LoadInst * IDISA_Builder::CreateBlockAlignedLoad(llvm::Value * const ptr, std::initializer_list<llvm::Value *> indices) {
    150147    return CreateBlockAlignedLoad(CreateGEP(ptr, indices));
    151 }
    152 
    153 inline void IDISA_Builder::CreateBlockAlignedStore(llvm::Value * const value, llvm::Value * const ptr) {
    154     CreateAlignedStore(value, ptr, mBitBlockWidth / 8);
    155148}
    156149
     
    163156}
    164157   
    165 
    166    
    167158}
    168159#endif // IDISA_BUILDER_H
  • icGREP/icgrep-devel/icgrep/IR_Gen/tracegen.h

    r5272 r5398  
    5151unsigned TraceTool::newTraceVar(std::string traceName) {
    5252    std::string formatString = traceName + " = %" PRIx64 "\n";
    53     mTraceFormatString.push_back(iBuilder->CreateGlobalStringPtr(formatString.c_str()));
     53    mTraceFormatString.push_back(iBuilder->GetString(formatString.c_str()));
    5454    return mTraceVarCount++;
    5555}
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5377 r5398  
    280280}
    281281
    282 Function * preprocessPipeline(Module * m, IDISA::IDISA_Builder * iBuilder) {
     282//
     283// Handler for errors reported through llvm::report_fatal_error.  Report
     284// and signal error code 2 (grep convention).
     285//
     286static void error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
     287    throw std::runtime_error(Message);
     288}
     289
     290
     291void preprocessPipeline(ParabixDriver & pxDriver) {
     292
     293    llvm::install_fatal_error_handler(&error_handler);
     294
     295    IDISA::IDISA_Builder * iBuilder = pxDriver.getIDISA_Builder();
     296    Module * m = iBuilder->getModule();
    283297    Type * mBitBlockType = iBuilder->getBitBlockType();
    284298   
     
    298312    Value * const outputStream = &*(args++);
    299313    outputStream->setName("output");
    300    
     314    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main));
     315
    301316    ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    302317    SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8));
     
    304319
    305320    MMapSourceKernel mmapK(iBuilder);
    306     std::unique_ptr<Module> mmapM = mmapK.createKernelModule({}, {&ByteStream});
    307321    mmapK.setInitialArguments({fileSize});
    308    
    309     S2PKernel  s2pk(iBuilder);
    310     std::unique_ptr<Module> s2pM = s2pk.createKernelModule({&ByteStream}, {&BasisBits});
     322    pxDriver.addKernelCall(mmapK, {}, {&ByteStream});
     323   
     324    S2PKernel s2pk(iBuilder);
     325    pxDriver.addKernelCall(s2pk, {&ByteStream}, {&BasisBits});
    311326
    312327    PabloKernel ccck(iBuilder, "ccc",
     
    315330
    316331    buildPreprocessKernel(ccck, iBuilder);
    317    
    318     std::unique_ptr<Module> cccM = ccck.createKernelModule({&BasisBits}, {&CCResults});
    319    
    320     mmapK.addKernelDeclarations(m);
    321     s2pk.addKernelDeclarations(m);
    322     ccck.addKernelDeclarations(m);
    323    
    324     iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    325 
     332    pxDriver.addKernelCall(ccck, {&BasisBits}, {&CCResults});
     333         
    326334    ByteStream.setStreamSetBuffer(inputStream);
     335
    327336    BasisBits.allocateBuffer();
     337
    328338    CCResults.setStreamSetBuffer(outputStream);
    329339   
    330     generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &ccck});
    331        
     340    pxDriver.generatePipelineIR();
     341
    332342    iBuilder->CreateRetVoid();
    333    
    334     Linker L(*m);
    335     L.linkInModule(std::move(mmapM));
    336     L.linkInModule(std::move(s2pM));
    337     L.linkInModule(std::move(cccM));
    338    
    339     return main;
     343
     344    errs() << "JitCompileMain()\n";
     345
     346    pxDriver.JITcompileMain();
     347
     348    errs() << "linkAndFinalize()\n";
     349
     350    pxDriver.linkAndFinalize();
     351
     352    errs() << "done\n";
    340353}
    341354
     
    343356typedef void (*preprocessFunctionType)(char * byte_data, size_t filesize, char * output_data);
    344357
    345 preprocessFunctionType preprocessCodeGen() {
    346                            
     358preprocessFunctionType preprocessCodeGen() {                           
    347359    LLVMContext TheContext;
    348360    Module * M = new Module("preprocess", TheContext);
    349361    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
    350 
    351     llvm::Function * main_IR = preprocessPipeline(M, idb);
    352 
    353     ExecutionEngine * preprocessEngine = JIT_to_ExecutionEngine(M);
    354    
    355     preprocessEngine->finalizeObject();
    356 
     362    ParabixDriver pxDriver(idb);
     363    preprocessPipeline(pxDriver);
     364    auto f = reinterpret_cast<preprocessFunctionType>(pxDriver.getPointerToMain());
    357365    delete idb;
    358     return reinterpret_cast<preprocessFunctionType>(preprocessEngine->getPointerToFunction(main_IR));
     366    return f;
    359367}
    360368
     
    740748    return 0;
    741749}
    742 
    743 
    744 
    745 
    746 
    747 
    748 
    749 
    750 
    751 
    752 
    753 
    754 
    755 
    756 
  • icGREP/icgrep-devel/icgrep/editd/editdscan_kernel.cpp

    r5317 r5398  
    1212
    1313namespace kernel {
    14 
    15 Value * generateCountForwardZeroes(IDISA::IDISA_Builder * iBuilder, Value * bits) {
    16     Value * cttzFunc = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::cttz, bits->getType());
    17     return iBuilder->CreateCall(cttzFunc, std::vector<Value *>({bits, ConstantInt::get(iBuilder->getInt1Ty(), 0)}));
    18 }
    1914
    2015void editdScanKernel::generateDoBlockMethod() {
     
    7974
    8075    iBuilder->SetInsertPoint(matchesLoopBlock);
    81     Value * match_pos = iBuilder->CreateAdd(generateCountForwardZeroes(iBuilder, matches_phi), basePos);
     76    Value * match_pos = iBuilder->CreateAdd(iBuilder->CreateCountForwardZeroes(matches_phi), basePos);
    8277    Value * matches_new = iBuilder->CreateAnd(matches_phi, iBuilder->CreateSub(matches_phi, ConstantInt::get(T, 1)));
    8378    matches_phi->addIncoming(matches_new, matchesLoopBlock);
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5393 r5398  
    161161}
    162162
     163#ifdef CUDA_ENABLED
    163164Function * generateGPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, bool CountOnly){
    164165    Type * const int64ty = iBuilder->getInt64Ty();
     
    265266    return mainCPUFn;
    266267}
    267 
    268 void GrepEngine::multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16, GrepType grepType) {
    269 
    270     isUTF_16 = UTF_16;
    271     Module * M = new Module(moduleName + ":icgrep", getGlobalContext());; 
    272     IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);;
    273 
    274     const unsigned segmentSize = codegen::SegmentSize;
    275     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    276     const unsigned encodingBits = UTF_16 ? 16 : 8;
    277 
    278     mGrepType = grepType;
    279 
    280     Type * const sizeTy = iBuilder->getSizeTy();
    281     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), 0);
    282     Type * const resultTy = CountOnly ? sizeTy : iBuilder->getVoidTy();
    283 
    284     Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, sizeTy, sizeTy, nullptr));
    285     mainFn->setCallingConv(CallingConv::C);
    286     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    287     Function::arg_iterator args = mainFn->arg_begin();
    288    
    289     Value * inputStream = &*(args++);
    290     inputStream->setName("input");
    291     Value * fileSize = &*(args++);
    292     fileSize->setName("fileSize");
    293     Value * fileIdx = &*(args++);
    294     fileIdx->setName("fileIdx");
    295 
    296     StreamSetBuffer * byteStream = nullptr;
    297     kernel::KernelBuilder * sourceK = nullptr;
    298 //    if (usingStdIn) {
    299 //        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
    300 //        cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
    301 //        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
    302 //        sourceK->generateKernel({}, {byteStream});
    303 //    } else {
    304         byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
    305         cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
    306         sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
    307         sourceK->generateKernel({}, {byteStream});
    308         sourceK->setInitialArguments({fileSize});
    309 //    }
    310 
    311     CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    312     BasisBits.allocateBuffer();
    313 
    314     kernel::S2PKernel  s2pk(iBuilder);
    315     s2pk.generateKernel({byteStream}, {&BasisBits});
    316    
    317     std::vector<pablo::PabloKernel *> icgrepKs;
    318     std::vector<StreamSetBuffer *> MatchResultsBufs;
    319 
    320     for(unsigned i=0; i<REs.size(); i++){   
    321         pablo::PabloKernel * icgrepK = new pablo::PabloKernel(iBuilder, "icgrep"+std::to_string(i), {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
    322         re::re2pablo_compiler(icgrepK, re::regular_expression_passes(REs[i]), false);
    323         pablo_function_passes(icgrepK);
    324         icgrepKs.push_back(icgrepK);
    325         CircularBuffer * MatchResults = new CircularBuffer(iBuilder, iBuilder->getStreamSetTy(2, 1), segmentSize * bufferSegments);       
    326         MatchResults->allocateBuffer();
    327         MatchResultsBufs.push_back(MatchResults);
    328     }   
    329 
    330     std::vector<kernel::KernelBuilder *> KernelList;
    331     KernelList.push_back(sourceK);
    332     KernelList.push_back(&s2pk);
    333 
    334     CircularBuffer mergedResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    335     mergedResults.allocateBuffer();
    336 
    337     kernel::StreamsMerge streamsMergeK(iBuilder, 1, REs.size());
    338     streamsMergeK.generateKernel(MatchResultsBufs, {&mergedResults});
    339 
    340     kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
    341     CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    342     LineBreakStream.allocateBuffer();
    343     linebreakK.generateKernel({&BasisBits}, {&LineBreakStream});
    344    
    345     KernelList.push_back(&linebreakK);
    346     for(unsigned i=0; i<REs.size(); i++){
    347         icgrepKs[i]->generateKernel({&BasisBits, &LineBreakStream}, {MatchResultsBufs[i]});
    348         KernelList.push_back(icgrepKs[i]);
    349     }
    350     KernelList.push_back(&streamsMergeK);
    351 
    352     if (CountOnly) {
    353         kernel::MatchCount matchCountK(iBuilder);
    354         matchCountK.generateKernel({&mergedResults}, {}); 
    355 
    356         KernelList.push_back(&matchCountK); 
    357 
    358         generatePipeline(iBuilder, KernelList);
    359         iBuilder->CreateRet(matchCountK.getScalarField(matchCountK.getInstance(), "matchedLineCount"));
    360 
    361     } else {
    362         kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType, encodingBits);
    363         scanMatchK.generateKernel({byteStream, &mergedResults, &LineBreakStream}, {});
    364         scanMatchK.setInitialArguments({fileIdx});
    365 
    366         KernelList.push_back(&scanMatchK);
    367 
    368         generatePipeline(iBuilder, KernelList);
    369        
    370         iBuilder->CreateRetVoid();
    371     }
    372    
    373     mEngine = JIT_to_ExecutionEngine(M);
    374     ApplyObjectCache(mEngine);
    375     icgrep_Linking(M, mEngine);
    376 
    377 #ifndef NDEBUG
    378     verifyModule(*M, &dbgs());
    379268#endif
    380 
    381     mEngine->finalizeObject();
    382     delete iBuilder;
    383     delete sourceK;
    384     delete byteStream;
    385 
    386     if (CountOnly) {
    387         mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
    388     } else {
    389         mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
    390     }
    391 
    392 }
    393 
    394 void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, GrepType grepType, const bool usingStdIn) {
    395     isUTF_16 = UTF_16;
    396     int addrSpace = 0;
    397     bool CPU_Only = true;
    398     Module * M = nullptr; 
    399     IDISA::IDISA_Builder * iBuilder = nullptr;
    400 
    401 #ifdef CUDA_ENABLED
    402     setNVPTXOption();
    403     if (codegen::NVPTX) {
    404         Module * gpuM = new Module(moduleName+":gpu", getGlobalContext());
    405         IDISA::IDISA_Builder * GPUBuilder = IDISA::GetIDISA_GPU_Builder(gpuM);
    406         M = gpuM;
    407         iBuilder = GPUBuilder;
    408         M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
    409         M->setTargetTriple("nvptx64-nvidia-cuda");
    410         addrSpace = 1;
    411         CPU_Only = false;
    412         codegen::BlockSize = 64;
    413     }
    414 #endif
    415 
    416     Module * cpuM = new Module(moduleName + ":cpu", getGlobalContext());
    417     IDISA::IDISA_Builder * CPUBuilder = IDISA::GetIDISA_Builder(cpuM);
    418 
    419     if (CPU_Only) {
    420         M = cpuM;
    421         iBuilder = CPUBuilder;
    422     }
    423 
    424     // segment size made availabe for each call to the mmap source kernel
    425     const unsigned segmentSize = codegen::SegmentSize;
    426     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    427     const unsigned encodingBits = UTF_16 ? 16 : 8;
    428 
    429     mGrepType = grepType;
    430 
    431     Type * const size_ty = iBuilder->getSizeTy();
    432     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), addrSpace);
    433     Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
    434 
    435     Function * mainFn = nullptr;
    436     Value * inputStream = nullptr;
    437     Value * fileSize = nullptr;
    438     Value * fileIdx = nullptr;
    439 
    440 #ifdef CUDA_ENABLED   
    441     Value * outputStream = nullptr;
    442     Type * const outputType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), addrSpace);
    443     if (codegen::NVPTX){
    444         if (CountOnly){
    445             mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, nullptr));
    446             mainFn->setCallingConv(CallingConv::C);
    447             iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    448             Function::arg_iterator args = mainFn->arg_begin();
    449            
    450             inputStream = &*(args++);
    451             inputStream->setName("input");
    452             fileSize = &*(args++);
    453             fileSize->setName("fileSize");
    454         } else {
    455             mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, outputType, nullptr));
    456             mainFn->setCallingConv(CallingConv::C);
    457             iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    458             Function::arg_iterator args = mainFn->arg_begin();
    459            
    460             inputStream = &*(args++);
    461             inputStream->setName("input");
    462             fileSize = &*(args++);
    463             fileSize->setName("fileSize");
    464             outputStream = &*(args++);
    465             outputStream->setName("output");
    466         }
    467     }
    468 #endif
    469     if (CPU_Only) {
    470         mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
    471         mainFn->setCallingConv(CallingConv::C);
    472         iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    473         Function::arg_iterator args = mainFn->arg_begin();
    474        
    475         inputStream = &*(args++);
    476         inputStream->setName("input");
    477         fileSize = &*(args++);
    478         fileSize->setName("fileSize");
    479         fileIdx = &*(args++);
    480         fileIdx->setName("fileIdx");
    481 
    482     } 
    483 
    484     StreamSetBuffer * byteStream = nullptr;
    485     kernel::KernelBuilder * sourceK = nullptr;
    486     if (usingStdIn) {
    487         // TODO: use fstat(STDIN_FILENO) to see if we can mmap the stdin safely and avoid the calls to read
    488 
    489         byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * bufferSegments);
    490         cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
    491         sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
    492         sourceK->generateKernel({}, {byteStream});
    493     } else {
    494         byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
    495         cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
    496         sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
    497         sourceK->generateKernel({}, {byteStream});
    498         sourceK->setInitialArguments({fileSize});
    499     }
    500    
    501     CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    502 
    503     kernel::S2PKernel  s2pk(iBuilder);
    504     s2pk.generateKernel({byteStream}, {&BasisBits});
    505    
    506     kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
    507     CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    508 
    509     linebreakK.generateKernel({&BasisBits}, {&LineBreakStream});
    510     LineBreakStream.allocateBuffer();
    511 
    512     pablo::PabloKernel icgrepK(iBuilder, "icgrep", {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
    513     re::re2pablo_compiler(&icgrepK, re::regular_expression_passes(re_ast), CountOnly);
    514     pablo_function_passes(&icgrepK);
    515 
    516 
    517     BasisBits.allocateBuffer();
    518 
    519     if (CountOnly) {
    520         icgrepK.generateKernel({&BasisBits, &LineBreakStream}, {});
    521         generatePipeline(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK});
    522         iBuilder->CreateRet(icgrepK.createGetAccumulatorCall(icgrepK.getInstance(), "matchedLineCount"));
    523     } else {
    524 #ifdef CUDA_ENABLED
    525         if (codegen::NVPTX){
    526             ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), addrSpace);
    527             MatchResults.setStreamSetBuffer(outputStream);
    528 
    529             icgrepK.generateKernel({&BasisBits, &LineBreakStream},  {&MatchResults});
    530             generatePipelineLoop(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK});
    531 
    532         }
    533 #endif
    534         if (CPU_Only) {
    535             CircularBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
    536             MatchResults.allocateBuffer();
    537 
    538             icgrepK.generateKernel({&BasisBits, &LineBreakStream}, {&MatchResults});
    539 
    540             kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType, encodingBits);
    541             scanMatchK.generateKernel({byteStream, &MatchResults, &LineBreakStream}, {});
    542             scanMatchK.setInitialArguments({fileIdx});
    543            
    544             generatePipeline(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK, &scanMatchK});
    545         }
    546         iBuilder->CreateRetVoid();
    547     }
    548 
    549 #ifdef CUDA_ENABLED
    550     Function * mainCPUFn = nullptr;
    551     if(codegen::NVPTX){
    552         Function * kernelFunction = generateGPUKernel(M, iBuilder, CountOnly);
    553         MDNode * Node = MDNode::get(M->getContext(),
    554                                     {llvm::ValueAsMetadata::get(kernelFunction),
    555                                      MDString::get(M->getContext(), "kernel"),
    556                                      ConstantAsMetadata::get(ConstantInt::get(iBuilder->getInt32Ty(), 1))});
    557         NamedMDNode *NMD = M->getOrInsertNamedMetadata("nvvm.annotations");
    558         NMD->addOperand(Node);
    559    
    560         Compile2PTX(M, IRFilename, PTXFilename);
    561         mainCPUFn = generateCPUKernel(cpuM, CPUBuilder, mGrepType);
    562         if (CountOnly) return;
    563     }
    564 #endif
    565 
    566 
    567     mEngine = JIT_to_ExecutionEngine(cpuM);
    568     ApplyObjectCache(mEngine);
    569     icgrep_Linking(cpuM, mEngine);
    570 
    571 #ifndef NDEBUG
    572     verifyModule(*M, &dbgs());
    573 #endif
    574 
    575     mEngine->finalizeObject();
    576     delete iBuilder;
    577     delete sourceK;
    578     delete byteStream;
    579    
    580     if (CountOnly) {
    581         mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
    582     } else {
    583 #ifdef CUDA_ENABLED
    584         if(codegen::NVPTX){
    585             mGrepFunction_CPU = reinterpret_cast<GrepFunctionType_CPU>(mEngine->getPointerToFunction(mainCPUFn));
    586         }
    587 #endif
    588         if (CPU_Only) {
    589             mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
    590         }
    591     }
    592 
    593 }
    594 
    595 re::CC * GrepEngine::grepCodepoints() {
    596     parsedCodePointSet = re::makeCC();
    597     char * mFileBuffer = getUnicodeNameDataPtr();
    598     size_t mFileSize = getUnicodeNameDataSize();
    599     mGrepFunction(mFileBuffer, mFileSize, 0);
    600     return parsedCodePointSet;
    601 }
    602 
    603 const std::vector<std::string> & GrepEngine::grepPropertyValues(const std::string& propertyName) {
    604     enum { MaxSupportedVectorWidthInBytes = 32 };
    605     AlignedAllocator<char, MaxSupportedVectorWidthInBytes> alloc;
    606     parsedPropertyValues.clear();
    607     const std::string & str = UCD::getPropertyValueGrepString(propertyName);
    608     const auto n = str.length();
    609     // NOTE: MaxSupportedVectorWidthInBytes of trailing 0s are needed to prevent the grep function from
    610     // erroneously matching garbage data when loading the final partial block.
    611     char * aligned = alloc.allocate(n + MaxSupportedVectorWidthInBytes, 0);
    612     std::memcpy(aligned, str.data(), n);
    613     std::memset(aligned + n, 0, MaxSupportedVectorWidthInBytes);
    614     mGrepFunction(aligned, n, 0);
    615     alloc.deallocate(aligned, 0);
    616     return parsedPropertyValues;
    617 }
    618269
    619270static int * total_count;
     
    632283        total_count[i] = 0;
    633284    }
    634    
     285
    635286}
    636287
    637288template<typename CodeUnit>
    638 void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const int fileIdx) {
     289void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const size_t fileIdx) {
    639290    assert (buffer);
    640291    assert (line_start <= line_end);
    641292    assert (line_end <= filesize);
    642293
    643 #ifdef CUDA_ENABLED
    644 if (codegen::NVPTX){
    645     while(line_start>startPoints[blockNo]) blockNo++;
    646     line_start -= accumBytes[blockNo-1];
    647     line_end -= accumBytes[blockNo-1];
    648 }
    649 #endif
     294    #ifdef CUDA_ENABLED
     295    if (codegen::NVPTX){
     296        while(line_start>startPoints[blockNo]) blockNo++;
     297        line_start -= accumBytes[blockNo-1];
     298        line_end -= accumBytes[blockNo-1];
     299    }
     300    #endif
    650301
    651302    if (ShowFileNames) {
     
    695346
    696347void PrintResult(bool CountOnly, std::vector<size_t> & total_CountOnly){
    697    
    698     if(CountOnly){
     348    if (CountOnly) {
    699349        if (!ShowFileNames) {
    700350            for (unsigned i = 0; i < inputFiles.size(); ++i){
    701351                std::cout << total_CountOnly[i] << std::endl;
    702352            }
    703         }
    704         else {
     353        } else {
    705354            for (unsigned i = 0; i < inputFiles.size(); ++i){
    706355                std::cout << inputFiles[i] << ':' << total_CountOnly[i] << std::endl;
    707356            };
    708357        }
    709         return;
    710     }
    711    
    712     for (unsigned i = 0; i < inputFiles.size(); ++i){
    713         std::cout << resultStrs[i].str();
     358    } else {
     359        for (unsigned i = 0; i < inputFiles.size(); ++i){
     360            std::cout << resultStrs[i].str();
     361        }
    714362    }
    715363}
     
    737385    assert (line_start <= line_end);
    738386    parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
     387}
     388
     389void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, GrepType grepType, const bool usingStdIn) {
     390    isUTF_16 = UTF_16;
     391    int addrSpace = 0;
     392    bool CPU_Only = true;
     393    Module * M = nullptr;
     394    IDISA::IDISA_Builder * iBuilder = nullptr;
     395
     396    #ifdef CUDA_ENABLED
     397    setNVPTXOption();
     398    if (codegen::NVPTX) {
     399        Module * gpuM = new Module(moduleName+":gpu", getGlobalContext());
     400        IDISA::IDISA_Builder * GPUBuilder = IDISA::GetIDISA_GPU_Builder(gpuM);
     401        M = gpuM;
     402        iBuilder = GPUBuilder;
     403        M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
     404        M->setTargetTriple("nvptx64-nvidia-cuda");
     405        addrSpace = 1;
     406        CPU_Only = false;
     407        codegen::BlockSize = 64;
     408    }
     409    #endif
     410
     411    Module * cpuM = new Module(moduleName + ":cpu", getGlobalContext());
     412    IDISA::IDISA_Builder * CPUBuilder = IDISA::GetIDISA_Builder(cpuM);
     413    if (CPU_Only) {
     414        M = cpuM;
     415        iBuilder = CPUBuilder;
     416    }
     417    ParabixDriver pxDriver(iBuilder);
     418
     419    // segment size made availabe for each call to the mmap source kernel
     420    const unsigned segmentSize = codegen::SegmentSize;
     421    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
     422    const unsigned encodingBits = UTF_16 ? 16 : 8;
     423
     424    Type * const size_ty = iBuilder->getSizeTy();
     425    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), addrSpace);
     426    Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
     427
     428    Function * mainFn = nullptr;
     429    Value * inputStream = nullptr;
     430    Value * fileSize = nullptr;
     431    Value * fileIdx = nullptr;
     432
     433    #ifdef CUDA_ENABLED
     434    Value * outputStream = nullptr;
     435    Type * const outputType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), addrSpace);
     436    if (codegen::NVPTX){
     437        if (CountOnly){
     438            mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, nullptr));
     439            mainFn->setCallingConv(CallingConv::C);
     440            iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     441            Function::arg_iterator args = mainFn->arg_begin();
     442
     443            inputStream = &*(args++);
     444            inputStream->setName("input");
     445            fileSize = &*(args++);
     446            fileSize->setName("fileSize");
     447        } else {
     448            mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, outputType, nullptr));
     449            mainFn->setCallingConv(CallingConv::C);
     450            iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     451            Function::arg_iterator args = mainFn->arg_begin();
     452
     453            inputStream = &*(args++);
     454            inputStream->setName("input");
     455            fileSize = &*(args++);
     456            fileSize->setName("fileSize");
     457            outputStream = &*(args++);
     458            outputStream->setName("output");
     459        }
     460    }
     461    #endif
     462
     463    if (CPU_Only) {
     464        mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
     465        mainFn->setCallingConv(CallingConv::C);
     466        iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     467        Function::arg_iterator args = mainFn->arg_begin();
     468
     469        inputStream = &*(args++);
     470        inputStream->setName("input");
     471        fileSize = &*(args++);
     472        fileSize->setName("fileSize");
     473        fileIdx = &*(args++);
     474        fileIdx->setName("fileIdx");
     475
     476    }
     477
     478    StreamSetBuffer * byteStream = nullptr;
     479    kernel::KernelBuilder * sourceK = nullptr;
     480    if (usingStdIn) {
     481        // TODO: use fstat(STDIN_FILENO) to see if we can mmap the stdin safely and avoid the calls to read
     482        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize);
     483        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
     484    } else {
     485        byteStream = new SourceFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     486        sourceK = new kernel::FileSourceKernel(iBuilder, inputStream->getType(), segmentSize);
     487        sourceK->setInitialArguments({inputStream, fileSize});
     488    }
     489    byteStream->allocateBuffer();
     490    pxDriver.addKernelCall(*sourceK, {}, {byteStream});
     491
     492    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
     493    BasisBits.allocateBuffer();
     494
     495    kernel::S2PKernel s2pk(iBuilder);
     496    pxDriver.addKernelCall(s2pk, {byteStream}, {&BasisBits});
     497
     498    kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
     499    CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     500    LineBreakStream.allocateBuffer();
     501
     502    pxDriver.addKernelCall(linebreakK, {&BasisBits}, {&LineBreakStream});
     503
     504    pablo::PabloKernel icgrepK(iBuilder, "icgrep", {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
     505    re::re2pablo_compiler(&icgrepK, re::regular_expression_passes(re_ast), CountOnly);
     506    pablo_function_passes(&icgrepK);
     507
     508    if (CountOnly) {
     509
     510        pxDriver.addKernelCall(icgrepK, {&BasisBits, &LineBreakStream}, {});
     511
     512        pxDriver.generatePipelineIR();
     513
     514        iBuilder->CreateRet(icgrepK.createGetAccumulatorCall(icgrepK.getInstance(), "matchedLineCount"));
     515
     516        pxDriver.JITcompileMain();
     517        pxDriver.linkAndFinalize();
     518
     519    } else {
     520
     521        #ifdef CUDA_ENABLED
     522        if (codegen::NVPTX){
     523            ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), addrSpace);
     524            MatchResults.setStreamSetBuffer(outputStream);
     525
     526            pxDriver.addKernelCall(icgrepK, {&BasisBits, &LineBreakStream}, {&MatchResults});
     527
     528            pxDriver.generatePipelineIR();
     529
     530            iBuilder->CreateRetVoid();
     531
     532            pxDriver.JITcompileMain();
     533            pxDriver.linkAndFinalize();
     534        }
     535        #endif
     536
     537        if (CPU_Only) {
     538
     539            CircularBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     540            MatchResults.allocateBuffer();
     541
     542            pxDriver.addKernelCall(icgrepK, {&BasisBits, &LineBreakStream}, {&MatchResults});
     543
     544            kernel::ScanMatchKernel scanMatchK(iBuilder, grepType, encodingBits);
     545            scanMatchK.setInitialArguments({fileIdx});
     546
     547            pxDriver.addKernelCall(scanMatchK, {&MatchResults, &LineBreakStream, byteStream}, {});
     548
     549            switch (grepType) {
     550                case GrepType::Normal:
     551                    if (UTF_16) {
     552                        pxDriver.addExternalLink(scanMatchK, "matcher", &wrapped_report_match<uint16_t>);
     553                    } else {
     554                        pxDriver.addExternalLink(scanMatchK, "matcher", &wrapped_report_match<uint8_t>);
     555                    }
     556                    break;
     557                case GrepType::NameExpression:
     558                    pxDriver.addExternalLink(scanMatchK, "matcher", &insert_codepoints);
     559                    break;
     560                case GrepType::PropertyValue:
     561                    pxDriver.addExternalLink(scanMatchK, "matcher", &insert_property_values);
     562                    break;
     563            }
     564
     565            pxDriver.generatePipelineIR();
     566
     567            iBuilder->CreateRetVoid();
     568
     569            pxDriver.JITcompileMain();
     570            pxDriver.linkAndFinalize();
     571        }
     572    }
     573
     574    #ifdef CUDA_ENABLED
     575    Function * mainCPUFn = nullptr;
     576    if(codegen::NVPTX){
     577        Function * kernelFunction = generateGPUKernel(M, iBuilder, CountOnly);
     578        MDNode * Node = MDNode::get(M->getContext(),
     579                                    {llvm::ValueAsMetadata::get(kernelFunction),
     580                                     MDString::get(M->getContext(), "kernel"),
     581                                     ConstantAsMetadata::get(ConstantInt::get(iBuilder->getInt32Ty(), 1))});
     582        NamedMDNode *NMD = M->getOrInsertNamedMetadata("nvvm.annotations");
     583        NMD->addOperand(Node);
     584
     585        Compile2PTX(M, IRFilename, PTXFilename);
     586        mainCPUFn = generateCPUKernel(cpuM, CPUBuilder, mGrepType);
     587        if (CountOnly) return;
     588    }
     589    #endif
     590
     591    delete iBuilder;
     592    delete sourceK;
     593    delete byteStream;
     594
     595    if (CountOnly) {
     596        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(pxDriver.getPointerToMain());
     597    } else {
     598        #ifdef CUDA_ENABLED
     599        if(codegen::NVPTX){
     600            mGrepFunction_CPU = reinterpret_cast<GrepFunctionType_CPU>(pxDriver.getPointerToMain());
     601        }
     602        #endif
     603        if (CPU_Only) {
     604            mGrepFunction = reinterpret_cast<GrepFunctionType>(pxDriver.getPointerToMain());
     605        }
     606    }
     607}
     608
     609
     610void GrepEngine::multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16, GrepType grepType, const bool usingStdIn) {
     611
     612    isUTF_16 = UTF_16;
     613    Module * M = new Module(moduleName + ":icgrep", getGlobalContext());;
     614    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);;
     615
     616    const unsigned segmentSize = codegen::SegmentSize;
     617    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
     618    const unsigned encodingBits = UTF_16 ? 16 : 8;
     619
     620    Type * const sizeTy = iBuilder->getSizeTy();
     621    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), 0);
     622    Type * const resultTy = CountOnly ? sizeTy : iBuilder->getVoidTy();
     623
     624    Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, sizeTy, sizeTy, nullptr));
     625    mainFn->setCallingConv(CallingConv::C);
     626    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     627    Function::arg_iterator args = mainFn->arg_begin();
     628
     629    Value * inputStream = &*(args++);
     630    inputStream->setName("input");
     631    Value * fileSize = &*(args++);
     632    fileSize->setName("fileSize");
     633    Value * fileIdx = &*(args++);
     634    fileIdx->setName("fileIdx");
     635
     636    StreamSetBuffer * byteStream = nullptr;
     637    kernel::KernelBuilder * sourceK = nullptr;
     638    if (usingStdIn) {
     639        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize);
     640        cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
     641        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
     642    } else {
     643        byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     644        cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
     645        sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
     646        sourceK->setInitialArguments({fileSize});
     647    }
     648    sourceK->generateKernel({}, {byteStream});
     649
     650    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
     651    BasisBits.allocateBuffer();
     652
     653    kernel::S2PKernel  s2pk(iBuilder);
     654    s2pk.generateKernel({byteStream}, {&BasisBits});
     655
     656    std::vector<pablo::PabloKernel *> icgrepKs;
     657    std::vector<StreamSetBuffer *> MatchResultsBufs;
     658
     659    for(unsigned i=0; i<REs.size(); i++){
     660        pablo::PabloKernel * icgrepK = new pablo::PabloKernel(iBuilder, "icgrep"+std::to_string(i), {Binding{iBuilder->getStreamSetTy(8), "basis"}, Binding{iBuilder->getStreamSetTy(1, 1), "linebreak"}});
     661        re::re2pablo_compiler(icgrepK, re::regular_expression_passes(REs[i]), false);
     662        pablo_function_passes(icgrepK);
     663        icgrepKs.push_back(icgrepK);
     664        CircularBuffer * MatchResults = new CircularBuffer(iBuilder, iBuilder->getStreamSetTy(2, 1), segmentSize * bufferSegments);
     665        MatchResults->allocateBuffer();
     666        MatchResultsBufs.push_back(MatchResults);
     667    }
     668
     669    std::vector<kernel::KernelBuilder *> KernelList;
     670    KernelList.push_back(sourceK);
     671    KernelList.push_back(&s2pk);
     672
     673    CircularBuffer mergedResults(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     674    mergedResults.allocateBuffer();
     675
     676    kernel::StreamsMerge streamsMergeK(iBuilder, 1, REs.size());
     677    streamsMergeK.generateKernel(MatchResultsBufs, {&mergedResults});
     678
     679    kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
     680    CircularBuffer LineBreakStream(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments);
     681    LineBreakStream.allocateBuffer();
     682    linebreakK.generateKernel({&BasisBits}, {&LineBreakStream});
     683
     684    KernelList.push_back(&linebreakK);
     685    for(unsigned i=0; i<REs.size(); i++){
     686        icgrepKs[i]->generateKernel({&BasisBits, &LineBreakStream}, {MatchResultsBufs[i]});
     687        KernelList.push_back(icgrepKs[i]);
     688    }
     689    KernelList.push_back(&streamsMergeK);
     690
     691    if (CountOnly) {
     692        kernel::MatchCount matchCountK(iBuilder);
     693        matchCountK.generateKernel({&mergedResults}, {});
     694
     695        KernelList.push_back(&matchCountK);
     696
     697        generatePipeline(iBuilder, KernelList);
     698        iBuilder->CreateRet(matchCountK.getScalarField(matchCountK.getInstance(), "matchedLineCount"));
     699
     700    } else {
     701        kernel::ScanMatchKernel scanMatchK(iBuilder, grepType, encodingBits);
     702        scanMatchK.generateKernel({byteStream, &mergedResults, &LineBreakStream}, {});
     703        scanMatchK.setInitialArguments({fileIdx});
     704
     705        KernelList.push_back(&scanMatchK);
     706
     707        generatePipeline(iBuilder, KernelList);
     708
     709        iBuilder->CreateRetVoid();
     710    }
     711
     712    mEngine = JIT_to_ExecutionEngine(M);
     713    ApplyObjectCache(mEngine);
     714    icgrep_Linking(M, mEngine);
     715
     716    mEngine->finalizeObject();
     717    delete iBuilder;
     718    delete sourceK;
     719    delete byteStream;
     720
     721    if (CountOnly) {
     722        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
     723    } else {
     724        mGrepFunction = reinterpret_cast<GrepFunctionType>(mEngine->getPointerToFunction(mainFn));
     725    }
     726
     727}
     728
     729re::CC * GrepEngine::grepCodepoints() {
     730    parsedCodePointSet = re::makeCC();
     731    char * mFileBuffer = getUnicodeNameDataPtr();
     732    size_t mFileSize = getUnicodeNameDataSize();
     733    mGrepFunction(mFileBuffer, mFileSize, 0);
     734    return parsedCodePointSet;
     735}
     736
     737const std::vector<std::string> & GrepEngine::grepPropertyValues(const std::string& propertyName) {
     738    enum { MaxSupportedVectorWidthInBytes = 32 };
     739    AlignedAllocator<char, MaxSupportedVectorWidthInBytes> alloc;
     740    parsedPropertyValues.clear();
     741    const std::string & str = UCD::getPropertyValueGrepString(propertyName);
     742    const auto n = str.length();
     743    // NOTE: MaxSupportedVectorWidthInBytes of trailing 0s are needed to prevent the grep function from
     744    // erroneously matching garbage data when loading the final partial block.
     745    char * aligned = alloc.allocate(n + MaxSupportedVectorWidthInBytes, 0);
     746    std::memcpy(aligned, str.data(), n);
     747    std::memset(aligned + n, 0, MaxSupportedVectorWidthInBytes);
     748    mGrepFunction(aligned, n, 0);
     749    alloc.deallocate(aligned, 0);
     750    return parsedPropertyValues;
    739751}
    740752
     
    766778, mGrepFunction_CountOnly(nullptr)
    767779, mGrepFunction_CPU(nullptr)
    768 , mGrepType(GrepType::Normal)
    769780, mEngine(nullptr) {
    770781
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5377 r5398  
    2424 
    2525    void grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal, const bool usingStdIn = false);
    26     void multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal);
     26
     27    void multiGrepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal, const bool usingStdIn = false);
    2728     
    2829    void doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly);
     
    3940    GrepFunctionType_CountOnly mGrepFunction_CountOnly;
    4041    GrepFunctionType_CPU mGrepFunction_CPU;
    41 
    42     GrepType mGrepType;
    4342    llvm::ExecutionEngine * mEngine;
    4443};
  • icGREP/icgrep-devel/icgrep/kernels/alignedprint.cpp

    r5362 r5398  
    127127        Value * output = iBuilder->CreateAlloca(iBuilder->getInt8Ty(), streamLength);
    128128
    129         Value * outputName = iBuilder->CreateGlobalStringPtr(name.c_str());
     129        Value * outputName = iBuilder->GetString(name.c_str());
    130130        ConstantInt * const length = iBuilder->getInt32(name.length());
    131131        iBuilder->CreateMemCpy(output, outputName, length, 1);
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5390 r5398  
    4444Value * ProcessingRate::CreateRatioCalculation(IDISA::IDISA_Builder * b, Value * principalInputItems, Value * doFinal) const {
    4545    if (mKind == ProcessingRate::ProcessingRateKind::Fixed || mKind == ProcessingRate::ProcessingRateKind::Max) {
    46         if (ratio_numerator == 1) {
     46        if (mRatioNumerator == 1) {
    4747            return principalInputItems;
    4848        }
    4949        Type * const T = principalInputItems->getType();
    50         Constant * const numerator = ConstantInt::get(T, ratio_numerator);
    51         Constant * const denominator = ConstantInt::get(T, ratio_denominator);
    52         Constant * const denominatorLess1 = ConstantInt::get(T, ratio_denominator - 1);
     50        Constant * const numerator = ConstantInt::get(T, mRatioNumerator);
     51        Constant * const denominator = ConstantInt::get(T, mRatioDenominator);
     52        Constant * const denominatorLess1 = ConstantInt::get(T, mRatioDenominator - 1);
    5353        Value * strmItems = b->CreateMul(principalInputItems, numerator);
    5454        return b->CreateUDiv(b->CreateAdd(denominatorLess1, strmItems), denominator);
     
    5656    if (mKind == ProcessingRate::ProcessingRateKind::RoundUp) {
    5757        Type * const T = principalInputItems->getType();
    58         Constant * const denominator = ConstantInt::get(T, ratio_denominator);
    59         Constant * const denominatorLess1 = ConstantInt::get(T, ratio_denominator - 1);
     58        Constant * const denominator = ConstantInt::get(T, mRatioDenominator);
     59        Constant * const denominatorLess1 = ConstantInt::get(T, mRatioDenominator - 1);
    6060        return b->CreateMul(b->CreateUDiv(b->CreateAdd(principalInputItems, denominatorLess1), denominator), denominator);
    6161    }
     
    9595
    9696    // Create the doSegment function prototype.
    97     std::vector<Type *> doSegmentParameters = {selfType, iBuilder->getInt1Ty()};
     97    std::vector<Type *> params = {selfType, iBuilder->getInt1Ty()};
     98    // const auto count = mStreamSetInputs.size() + mStreamSetOutputs.size();
    9899    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    99         doSegmentParameters.push_back(iBuilder->getSizeTy());
     100        params.push_back(iBuilder->getSizeTy());
    100101    }
    101     FunctionType * doSegmentType = FunctionType::get(iBuilder->getVoidTy(), doSegmentParameters, false);
     102
     103    FunctionType * doSegmentType = FunctionType::get(iBuilder->getVoidTy(), params, false);
    102104    Function * doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, client);
    103105    doSegment->setCallingConv(CallingConv::C);
     
    107109    args->setName("self");
    108110    (++args)->setName("doFinal");
    109     for (auto ss : mStreamSetInputs) {
    110         (++args)->setName(ss.name + "_availableItems");
     111    for (const Binding & input : mStreamSetInputs) {
     112        (++args)->setName(input.name + "_availableItems");
    111113    }
     114//    for (const Binding & output : mStreamSetOutputs) {
     115//        (++args)->setName(output.name + "_consumedItems");
     116//    }
    112117
    113118    // Create the accumulator get function prototypes
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5391 r5398  
    4343    ProcessingRateKind getKind() const {return mKind;}
    4444    bool isExact() const {return (mKind == Fixed)||(mKind == RoundUp)||(mKind == Add1) ;}
     45    bool isUnknown() const { return !isExact(); }
    4546    llvm::Value * CreateRatioCalculation(IDISA::IDISA_Builder * b, llvm::Value * principalInputItems, llvm::Value * doFinal = nullptr) const;
    4647    friend ProcessingRate FixedRatio(unsigned strmItemsPer, unsigned perPrincipalInputItems, std::string referenceStreamSet);
     
    5051    friend ProcessingRate UnknownRate();
    5152    std::string referenceStreamSet() const { return mReferenceStreamSet;}
    52    
    5353protected:
    5454    ProcessingRate(ProcessingRateKind k, unsigned numerator, unsigned denominator, std::string referenceStreamSet)
    55     : mKind(k), ratio_numerator(numerator), ratio_denominator(denominator), mReferenceStreamSet(referenceStreamSet) {}
     55    : mKind(k), mRatioNumerator(numerator), mRatioDenominator(denominator), mReferenceStreamSet(referenceStreamSet) {}
     56private:
    5657    ProcessingRateKind mKind;
    57     uint16_t ratio_numerator;
    58     uint16_t ratio_denominator;
     58    uint16_t mRatioNumerator;
     59    uint16_t mRatioDenominator;
    5960    std::string mReferenceStreamSet;
    60     bool isVariableRate();
    6161};
    6262
     
    120120    virtual void setProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const = 0;
    121121
    122     virtual llvm::Value * getConsumedItemCount(llvm::Value * instance, const std::string & name) const = 0;
    123 
    124     virtual void setConsumedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const = 0;
    125 
    126122    virtual llvm::Value * getProcessedItemCount(llvm::Value * instance, const std::string & name) const = 0;
    127123
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5395 r5398  
    6262void KernelBuilder::prepareStreamSetNameMap() {
    6363    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    64         mStreamSetNameMap.emplace(mStreamSetInputs[i].name, i);
     64        mStreamMap.emplace(mStreamSetInputs[i].name, std::make_pair(Port::Input, i));
    6565    }
    6666    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    67         mStreamSetNameMap.emplace(mStreamSetOutputs[i].name, i);
     67        mStreamMap.emplace(mStreamSetOutputs[i].name, std::make_pair(Port::Output, i));
    6868    }
    6969}
     
    9595        if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
    9696            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
    97         }
    98        
     97        }       
    9998    }
    10099    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     
    110109        addScalar(binding.type, binding.name);
    111110    }
    112     if (mStreamSetNameMap.empty()) {
     111    if (mStreamMap.empty()) {
    113112        prepareStreamSetNameMap();
    114113    }
     
    278277Value * KernelBuilder::getProducedItemCount(Value * instance, const std::string & name, Value * doFinal) const {
    279278    assert ("instance cannot be null!" && instance);
    280     unsigned ssIdx = getStreamSetIndex(name);
     279    Port port; unsigned ssIdx;
     280    std::tie(port, ssIdx) = getStreamPort(name);
     281    assert (port == Port::Output);
    281282    if (mStreamSetOutputs[ssIdx].rate.isExact()) {
    282283        std::string refSet = mStreamSetOutputs[ssIdx].rate.referenceStreamSet();
     
    289290            }
    290291        } else {
    291             unsigned pfIndex = getStreamSetIndex(refSet);
    292             if (mStreamSetInputs.size() > pfIndex && mStreamSetInputs[pfIndex].name == refSet) {
     292            Port port; unsigned pfIndex;
     293            std::tie(port, pfIndex) = getStreamPort(refSet);
     294            if (port == Port::Input) {
    293295               principalField = refSet + PROCESSED_ITEM_COUNT_SUFFIX;
    294296            } else {
     
    302304}
    303305
    304 llvm::Value * KernelBuilder::getConsumedItemCount(llvm::Value * instance, const std::string & name) const {
    305     assert ("instance cannot be null!" && instance);
    306     return getScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX);
    307 }
    308 
    309306Value * KernelBuilder::getProcessedItemCount(Value * instance, const std::string & name) const {
    310307    assert ("instance cannot be null!" && instance);
    311     unsigned ssIdx = getStreamSetIndex(name);
     308    Port port; unsigned ssIdx;
     309    std::tie(port, ssIdx) = getStreamPort(name);
     310    assert (port == Port::Input);
    312311    if (mStreamSetInputs[ssIdx].rate.isExact()) {
    313312        std::string refSet = mStreamSetInputs[ssIdx].rate.referenceStreamSet();
     
    326325}
    327326
    328 void KernelBuilder::setConsumedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
    329     assert ("instance cannot be null!" && instance);
    330     setScalarField(instance, name + CONSUMED_ITEM_COUNT_SUFFIX, value);
    331 }
    332 
    333327void KernelBuilder::setProcessedItemCount(Value * instance, const std::string & name, Value * value) const {
    334328    assert ("instance cannot be null!" && instance);
    335329    setScalarField(instance, name + PROCESSED_ITEM_COUNT_SUFFIX, value);
    336 }
    337 
    338 void KernelBuilder::reserveBytes(llvm::Value * instance, const std::string & name, llvm::Value * value) const {
    339     assert ("instance cannot be null!" && instance);
    340     Value * itemCount = getProducedItemCount(instance, name);
    341     const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
    342     buf->reserveBytes(getStreamSetBufferPtr(name), iBuilder->CreateAdd(itemCount, value));
    343330}
    344331
     
    440427}
    441428
    442 unsigned KernelBuilder::getStreamSetIndex(const std::string & name) const {
    443     const auto f = mStreamSetNameMap.find(name);
    444     if (LLVM_UNLIKELY(f == mStreamSetNameMap.end())) {
     429void KernelBuilder::setBaseAddress(const std::string & name, llvm::Value * addr) const {
     430    unsigned index; Port port;
     431    std::tie(port, index) = getStreamPort(name);
     432    const StreamSetBuffer * buf = nullptr;
     433    if (port == Port::Input) {
     434        assert (index < mStreamSetInputBuffers.size());
     435        buf = mStreamSetInputBuffers[index];
     436    } else {
     437        assert (index < mStreamSetOutputBuffers.size());
     438        buf = mStreamSetOutputBuffers[index];
     439    }
     440    return buf->setBaseAddress(getStreamSetBufferPtr(name), addr);
     441}
     442
     443Value * KernelBuilder::getBufferedSize(const std::string & name) const {
     444    unsigned index; Port port;
     445    std::tie(port, index) = getStreamPort(name);
     446    const StreamSetBuffer * buf = nullptr;
     447    if (port == Port::Input) {
     448        assert (index < mStreamSetInputBuffers.size());
     449        buf = mStreamSetInputBuffers[index];
     450    } else {
     451        assert (index < mStreamSetOutputBuffers.size());
     452        buf = mStreamSetOutputBuffers[index];
     453    }
     454    return buf->getBufferedSize(getStreamSetBufferPtr(name));
     455}
     456
     457void KernelBuilder::setBufferedSize(const std::string & name, Value * size) const {
     458    unsigned index; Port port;
     459    std::tie(port, index) = getStreamPort(name);
     460    const StreamSetBuffer * buf = nullptr;
     461    if (port == Port::Input) {
     462        assert (index < mStreamSetInputBuffers.size());
     463        buf = mStreamSetInputBuffers[index];
     464    } else {
     465        assert (index < mStreamSetOutputBuffers.size());
     466        buf = mStreamSetOutputBuffers[index];
     467    }
     468    buf->setBufferedSize(getStreamSetBufferPtr(name), size);
     469}
     470
     471void KernelBuilder::reserveBytes(const std::string & name, llvm::Value * value) const {
     472    Value * itemCount = getProducedItemCount(name);
     473    const StreamSetBuffer * const buf = getOutputStreamSetBuffer(name);
     474    buf->reserveBytes(getStreamSetBufferPtr(name), iBuilder->CreateAdd(itemCount, value));
     475}
     476
     477KernelBuilder::StreamPort KernelBuilder::getStreamPort(const std::string & name) const {
     478    const auto f = mStreamMap.find(name);
     479    if (LLVM_UNLIKELY(f == mStreamMap.end())) {
    445480        report_fatal_error(getName() + " does not contain stream set: " + name);
    446481    }
     
    630665    if (!useIndirectBr()) {
    631666        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType()}, false);
    632         mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
     667        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + DO_BLOCK_SUFFIX, iBuilder->getModule());
    633668        mCurrentMethod->setCallingConv(CallingConv::C);
    634669        mCurrentMethod->setDoesNotThrow();
     
    709744    if (!useIndirectBr()) {
    710745        FunctionType * const type = FunctionType::get(iBuilder->getVoidTy(), {mSelf->getType(), iBuilder->getSizeTy()}, false);
    711         mCurrentMethod = Function::Create(type, GlobalValue::ExternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
     746        mCurrentMethod = Function::Create(type, GlobalValue::InternalLinkage, getName() + FINAL_BLOCK_SUFFIX, iBuilder->getModule());
    712747        mCurrentMethod->setCallingConv(CallingConv::C);
    713748        mCurrentMethod->setDoesNotThrow();
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5395 r5398  
    2525   
    2626class KernelBuilder : public KernelInterface {
    27     using NameMap = boost::container::flat_map<std::string, unsigned>;
     27protected:
     28    using KernelMap = boost::container::flat_map<std::string, unsigned>;
     29    enum class Port { Input, Output };
     30    using StreamPort = std::pair<Port, unsigned>;
     31    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
    2832public:
    2933   
     
    8084    void setProducedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const final;
    8185
    82     llvm::Value * getConsumedItemCount(llvm::Value * instance, const std::string & name) const final;
    83 
    84     void setConsumedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const final;
    85 
    8686    llvm::Value * getProcessedItemCount(llvm::Value * instance, const std::string & name) const final;
    8787
    8888    void setProcessedItemCount(llvm::Value * instance, const std::string & name, llvm::Value * value) const final;
    89 
    90     virtual void reserveBytes(llvm::Value * instance, const std::string & name, llvm::Value * requested) const;
    9189
    9290    bool hasNoTerminateAttribute() { return mNoTerminateAttribute;}
     
    236234    llvm::Value * getRawOutputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    237235
    238     void reserveBytes(const std::string & name, llvm::Value * requested) const {
    239         reserveBytes(getSelf(), name, requested);
    240     }
     236    void setBaseAddress(const std::string & name, llvm::Value * addr) const;
     237
     238    llvm::Value * getBufferedSize(const std::string & name) const;
     239
     240    void setBufferedSize(const std::string & name, llvm::Value * size) const;
     241
     242    void reserveBytes(const std::string & name, llvm::Value * requested) const;
    241243
    242244    llvm::Value * getScalarFieldPtr(const std::string & name) const {
     
    256258    }
    257259
    258     inline llvm::Value * getConsumedItemCount(const std::string & name) const {
    259         return getConsumedItemCount(getSelf(), name);
    260     }
    261 
    262     inline void setConsumedItemCount(const std::string & name, llvm::Value * value) const {
    263         setConsumedItemCount(getSelf(), name, value);
    264     }
    265 
    266260    inline llvm::Value * getProcessedItemCount(const std::string & name) const {
    267261        return getProcessedItemCount(getSelf(), name);
     
    294288    llvm::Value * getScalarFieldPtr(llvm::Value * instance, llvm::Value * index) const;
    295289
    296     unsigned getStreamSetIndex(const std::string & name) const;
     290    StreamPort getStreamPort(const std::string & name) const;
    297291
    298292    const parabix::StreamSetBuffer * getInputStreamSetBuffer(const std::string & name) const {
    299         const auto index = getStreamSetIndex(name);
    300         assert (index < mStreamSetInputBuffers.size());
    301         return mStreamSetInputBuffers[index];
     293        const auto port = getStreamPort(name);
     294        assert (port.first == Port::Input);
     295        assert (port.second < mStreamSetInputBuffers.size());
     296        return mStreamSetInputBuffers[port.second];
    302297    }
    303298
    304299    const parabix::StreamSetBuffer * getOutputStreamSetBuffer(const std::string & name) const {
    305         const auto index = getStreamSetIndex(name);
    306         assert (index < mStreamSetOutputBuffers.size());
    307         return mStreamSetOutputBuffers[index];
     300        const auto port = getStreamPort(name);
     301        assert (port.first == Port::Output);
     302        assert (port.second < mStreamSetOutputBuffers.size());
     303        return mStreamSetOutputBuffers[port.second];
    308304    }
    309305
     
    322318
    323319    std::vector<llvm::Type *>                       mKernelFields;
    324     NameMap                                         mKernelMap;
    325     NameMap                                         mStreamSetNameMap;
     320    KernelMap                                       mKernelMap;
     321    StreamMap                                       mStreamMap;
    326322    std::vector<const parabix::StreamSetBuffer *>   mStreamSetInputBuffers;
    327323    std::vector<const parabix::StreamSetBuffer *>   mStreamSetOutputBuffers;
  • icGREP/icgrep-devel/icgrep/kernels/linebreak_kernel.h

    r5358 r5398  
    1616public:
    1717    LineBreakKernelBuilder(IDISA::IDISA_Builder * iBuilder, std::string linebreak, unsigned basisBitsCount);
     18    bool moduleIDisSignature() override {return true;}
    1819};
    1920
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.cpp

    r5391 r5398  
    4343}
    4444
    45 void MMapSourceKernel::generateInitMethod() {
    46 //    Value * fileSize = getScalarField("fileSize");
    47 //    fileSize = iBuilder->CreateUDiv(fileSize, iBuilder->getSize(mCodeUnitWidth / 8));
    48 //    setProducedItemCount("sourceBuffer", fileSize);
    49 }
    50 
    5145MMapSourceKernel::MMapSourceKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment, unsigned codeUnitWidth)
    5246: SegmentOrientedKernel(iBuilder, "Parabix:mmap_source",
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.h

    r5392 r5398  
    2020    bool moduleIDisSignature() override {return true;}
    2121private:
    22     void generateInitMethod() override;
    2322    void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
    2423private:
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5390 r5398  
    162162        }
    163163        iBuilder->SetInsertPoint(segmentLoopBody[k]);
    164         std::vector<Value *> doSegmentArgs = {instancePtrs[k], doFinal};
     164        std::vector<Value *> args = {instancePtrs[k], doFinal};
    165165        for (unsigned j = 0; j < K->getStreamInputs().size(); j++) {
    166166            unsigned producerKernel, outputIndex;
    167167            std::tie(producerKernel, outputIndex) = producerTable[k][j];
    168             doSegmentArgs.push_back(ProducerPos[producerKernel][outputIndex]);
    169         }
    170         K->createDoSegmentCall(doSegmentArgs);
     168            args.push_back(ProducerPos[producerKernel][outputIndex]);
     169        }
     170        K->createDoSegmentCall(args);
    171171         if (! (K->hasNoTerminateAttribute())) {
    172172            Value * terminated = K->getTerminationSignal(instancePtrs[k]);
    173173            doFinal = iBuilder->CreateOr(doFinal, terminated);
    174174        }
    175        std::vector<Value *> produced;
     175        std::vector<Value *> produced;
    176176        for (unsigned i = 0; i < K->getStreamOutputs().size(); i++) {
    177177            produced.push_back(K->getProducedItemCount(instancePtrs[k], K->getStreamOutputs()[i].name, doFinal));
     
    438438    const auto ip = iBuilder->saveIP();
    439439    for (unsigned i = 0; i < threadNum; i++) {
    440         thread_functions.push_back(generateParallelPipelineThreadFunction("thread"+std::to_string(i), iBuilder, kernels, sharedStructType, producerTable, consumerTable, i));
     440        thread_functions.push_back(generateParallelPipelineThreadFunction("thread" + std::to_string(i), iBuilder, kernels, sharedStructType, producerTable, consumerTable, i));
    441441    }
    442442    iBuilder->restoreIP(ip);
     
    470470    const ProducerTable producer = createProducerTable(kernels);
    471471
    472 //    const ConsumerTable consumer = createConsumerTable(kernels);
     472 //   const ConsumerTable consumer = createConsumerTable(kernels);
    473473   
    474474    // ProducerPos[k][i] will hold the producedItemCount of the i^th output stream
     
    484484    Value * terminated = ConstantInt::getFalse(iBuilder->getContext());
    485485    for (unsigned k = 0; k < kernels.size(); k++) {
    486         KernelBuilder * const K = kernels[k];
    487         Value * const instance = K->getInstance();
     486        KernelBuilder * const kernel = kernels[k];
     487        Value * const instance = kernel->getInstance();
    488488        std::vector<Value *> args = {instance, terminated};
    489         for (unsigned j = 0; j < K->getStreamInputs().size(); j++) {
     489        for (unsigned j = 0; j < kernel->getStreamInputs().size(); j++) {
    490490            unsigned producerKernel, outputIndex;
    491491            std::tie(producerKernel, outputIndex) = producer[k][j];
    492492            args.push_back(ProducerPos[producerKernel][outputIndex]);
    493493        }
    494         K->createDoSegmentCall(args);
    495         if (!K->hasNoTerminateAttribute()) {
    496             terminated = iBuilder->CreateOr(terminated, K->getTerminationSignal(instance));
     494        kernel->createDoSegmentCall(args);
     495        if (!kernel->hasNoTerminateAttribute()) {
     496            terminated = iBuilder->CreateOr(terminated, kernel->getTerminationSignal(instance));
    497497        }
    498498        std::vector<Value *> produced;
    499         const auto & streamOutputs = K->getStreamOutputs();
     499        const auto & streamOutputs = kernel->getStreamOutputs();
    500500        for (unsigned i = 0; i < streamOutputs.size(); i++) {
    501             produced.push_back(K->getProducedItemCount(instance, streamOutputs[i].name, terminated));
     501            produced.push_back(kernel->getProducedItemCount(instance, streamOutputs[i].name, terminated));
    502502        }
    503503        ProducerPos.push_back(produced);
    504         Value * segNo = K->acquireLogicalSegmentNo(instance);
    505         K->releaseLogicalSegmentNo(instance, iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
     504        Value * segNo = kernel->acquireLogicalSegmentNo(instance);
     505        kernel->releaseLogicalSegmentNo(instance, iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
    506506    }
    507507
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.cpp

    r5391 r5398  
    1111using namespace llvm;
    1212
     13inline static unsigned floor_log2(const unsigned v) {
     14    assert ("log2(0) is undefined!" && v != 0);
     15    return 31 - __builtin_clz(v);
     16}
     17
    1318namespace kernel {
    1419
    15 Value * generateForwardZeroesMask(IDISA::IDISA_Builder * iBuilder, Value * bits) {
    16     Value * bits_minus1 = iBuilder->CreateSub(bits, ConstantInt::get(bits->getType(), 1));
    17     return iBuilder->CreateAnd(bits_minus1, iBuilder->CreateNot(bits));
    18 }
    19 
    20 Value * generatePopcount(IDISA::IDISA_Builder * iBuilder, Value * bits) {
    21     Value * ctpopFunc = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::ctpop, bits->getType());
    22     return iBuilder->CreateCall(ctpopFunc, std::vector<Value *>({bits}));
    23 }
    24 
    25 Value * generateCountForwardZeroes(IDISA::IDISA_Builder * iBuilder, Value * bits) {
    26     Value * cttzFunc = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::cttz, bits->getType());
    27     return iBuilder->CreateCall(cttzFunc, std::vector<Value *>({bits, ConstantInt::get(iBuilder->getInt1Ty(), 0)}));
    28 }
    29 
    30 Value * generateCountReverseZeroes(IDISA::IDISA_Builder * iBuilder, Value * bits) {
    31     Value * ctlzFunc = Intrinsic::getDeclaration(iBuilder->getModule(), Intrinsic::ctlz, bits->getType());
    32     return iBuilder->CreateCall(ctlzFunc, std::vector<Value *>({bits, ConstantInt::get(iBuilder->getInt1Ty(), 0)}));
    33 }
    34 
    35 Value * generateResetLowestBit(IDISA::IDISA_Builder * iBuilder, Value * bits) {
    36     Value * bits_minus1 = iBuilder->CreateSub(bits, ConstantInt::get(bits->getType(), 1));
    37     return iBuilder->CreateAnd(bits_minus1, bits);
     20inline std::string getGrepTypeId(const GrepType grepType) {
     21    switch (grepType) {
     22        case GrepType::Normal:
     23            return "N";
     24        case GrepType::NameExpression:
     25            return "E";
     26        case GrepType::PropertyValue:
     27            return "P";
     28        default:
     29            llvm_unreachable("unknown grep type!");
     30    }
    3831}
    3932
     
    5649    VectorType * const scanwordVectorType =  VectorType::get(sizeTy, fieldCount);
    5750    Value * const blockNo = getScalarField("BlockNo");
    58     Value * const scanwordPos = iBuilder->CreateMul(blockNo, ConstantInt::get(blockNo->getType(), iBuilder->getBitBlockWidth()));
    59     Value * const lastRecordStart = getScalarField("LineStart");
     51    Value * const scanwordPos = iBuilder->CreateShl(blockNo, floor_log2(iBuilder->getBitBlockWidth()));
     52    Value * const lastRecordStart = getProcessedItemCount("InputStream");
    6053    Value * const lastRecordNum = getScalarField("LineNum");
    6154    Value * const inputStream = iBuilder->CreatePointerCast(getRawInputPointer("InputStream", iBuilder->getInt32(0), iBuilder->getInt32(0)), codeUnitTy);
    62 
    63     Value * const fileSize = iBuilder->CreateAdd(getProcessedItemCount("InputStream"), getScalarField("PendingBytes"));
    64 
    65     Constant * matchProcessor = nullptr;
    66     Value * fileIdx = nullptr;
    67     switch (mGrepType) {
    68         case GrepType::Normal:
    69             fileIdx = getScalarField("FileIdx");
    70             matchProcessor = m->getOrInsertFunction("wrapped_report_match" + std::to_string(mCodeUnitWidth), iBuilder->getVoidTy(), sizeTy, sizeTy, sizeTy, codeUnitTy, sizeTy, sizeTy, nullptr);
    71             break;
    72         case GrepType::NameExpression:
    73             matchProcessor = m->getOrInsertFunction("insert_codepoints", iBuilder->getVoidTy(), sizeTy, sizeTy, sizeTy, codeUnitTy, nullptr);
    74             break;
    75         case GrepType::PropertyValue:
    76             matchProcessor = m->getOrInsertFunction("insert_property_values", iBuilder->getVoidTy(), sizeTy, sizeTy, sizeTy, codeUnitTy, nullptr);
    77             break;
    78         default: llvm_unreachable("unknown grep type");
    79     }
    8055
    8156    Value * const matches = iBuilder->CreateBitCast(loadInputStreamBlock("matchResult", iBuilder->getInt32(0)), scanwordVectorType);
     
    125100            // The loop body is entered if we have more matches to process.
    126101            iBuilder->SetInsertPoint(processMatchesEntry);
    127             Value * prior_breaks = iBuilder->CreateAnd(generateForwardZeroesMask(iBuilder, phiMatchWord), phiRecordBreaks);
     102            Value * prior_breaks = iBuilder->CreateAnd(makeForwardZeroesMask(phiMatchWord), phiRecordBreaks);
    128103            // Within the loop we have a conditional block that is executed if there are any prior record breaks.
    129104            Value * prior_breaks_cond = iBuilder->CreateICmpNE(prior_breaks, ConstantInt::getNullValue(sizeTy));
     
    132107                // PRIOR_BREAKS_BLOCK
    133108                // If there are prior breaks, we count them and compute the record start position.
    134                 iBuilder->SetInsertPoint(prior_breaks_block);
    135                 Value * matchedRecordNum = iBuilder->CreateAdd(generatePopcount(iBuilder, prior_breaks), phiRecordNum);
    136                 Value * reverseDistance = generateCountReverseZeroes(iBuilder, prior_breaks);
     109                iBuilder->SetInsertPoint(prior_breaks_block);               
     110                Value * matchedRecordNum = iBuilder->CreateAdd(iBuilder->CreatePopcount(prior_breaks), phiRecordNum);
     111                Value * reverseDistance = iBuilder->CreateCountReverseZeroes(prior_breaks);
    137112                Value * width = ConstantInt::get(sizeTy, sizeTy->getBitWidth());
    138113                Value * priorRecordStart = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateSub(width, reverseDistance));
     
    153128            phiRecordStart->addIncoming(matchRecordStart, loop_final_block);
    154129
    155             Value * matchRecordEnd = iBuilder->CreateAdd(phiScanwordPos, generateCountForwardZeroes(iBuilder, phiMatchWord));
     130            Value * matchRecordEnd = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateCountForwardZeroes(phiMatchWord));
     131            Function * const matcher = m->getFunction("matcher");
     132            assert (matcher);
    156133            switch (mGrepType) {
    157134                case GrepType::Normal:
    158                     iBuilder->CreateCall(matchProcessor, {matchRecordNum, matchRecordStart, matchRecordEnd, inputStream, fileSize, fileIdx});
     135                    iBuilder->CreateCall(matcher, {matchRecordNum, matchRecordStart, matchRecordEnd, inputStream, getBufferedSize("InputStream"), getScalarField("FileIdx")});
    159136                    break;
    160137                case GrepType::NameExpression:
    161138                case GrepType::PropertyValue:
    162                     iBuilder->CreateCall(matchProcessor, {matchRecordNum, matchRecordStart, matchRecordEnd, inputStream});
     139                    iBuilder->CreateCall(matcher, {matchRecordNum, matchRecordStart, matchRecordEnd, inputStream});
    163140                    break;
    164141                default: break;
    165142            }
    166143
    167             Value * remaining_matches = generateResetLowestBit(iBuilder, phiMatchWord);
     144            Value * remaining_matches = resetLowestBit(phiMatchWord);
    168145            phiMatchWord->addIncoming(remaining_matches, loop_final_block);
    169146
     
    181158            // REMAINING_BREAKS_BLOCK: process remaining record breaks after all matches are processed
    182159            iBuilder->SetInsertPoint(remaining_breaks_block);
    183             Value * break_count = generatePopcount(iBuilder, phiRecordBreaks);
     160            Value * break_count = iBuilder->CreatePopcount(phiRecordBreaks);
    184161            Value * final_record_num = iBuilder->CreateAdd(phiRecordNum, break_count);
    185             Value * reverseZeroes = generateCountReverseZeroes(iBuilder, phiRecordBreaks);
     162            Value * reverseZeroes = iBuilder->CreateCountReverseZeroes(phiRecordBreaks);
    186163            Value * pendingLineStart = iBuilder->CreateAdd(phiScanwordPos, iBuilder->CreateSub(width, reverseZeroes));
    187164            iBuilder->CreateBr(return_block);
     
    208185    iBuilder->SetInsertPoint(scanWordExit);
    209186    setScalarField("BlockNo", iBuilder->CreateAdd(blockNo, ConstantInt::get(blockNo->getType(), 1)));
    210     setScalarField("LineStart", phiFinalRecordStart);
    211187    setScalarField("LineNum", phiFinalRecordNum);
    212 }
    213 
    214 void ScanMatchKernel::generateInitMethod() {
    215     setScalarField("PendingBytes", iBuilder->getSize(iBuilder->getBitBlockWidth() + 2));
    216 }
    217 
    218 void ScanMatchKernel::generateFinalBlockMethod(llvm::Value * remainingItems) {
    219     setScalarField("PendingBytes", remainingItems);
    220     CreateDoBlockMethodCall();
     188    setProcessedItemCount("InputStream", phiFinalRecordStart);
     189}
     190
     191inline Value * ScanMatchKernel::makeForwardZeroesMask(Value * const value) const {
     192    return iBuilder->CreateAnd(iBuilder->CreateSub(value, ConstantInt::get(value->getType(), 1)), iBuilder->CreateNot(value));
     193}
     194
     195inline Value * ScanMatchKernel::resetLowestBit(Value * const value) const {
     196    return iBuilder->CreateAnd(iBuilder->CreateSub(value, ConstantInt::get(value->getType(), 1)), value);
    221197}
    222198
    223199ScanMatchKernel::ScanMatchKernel(IDISA::IDISA_Builder * iBuilder, GrepType grepType, const unsigned codeUnitWidth)
    224 : BlockOrientedKernel(iBuilder, "Parabix:scanMatch" + std::to_string(codeUnitWidth),
    225     {Binding{iBuilder->getStreamSetTy(1, 8), "InputStream"}, Binding{iBuilder->getStreamSetTy(1, 1), "matchResult"}, Binding{iBuilder->getStreamSetTy(1, 1), "lineBreak"}},
     200: BlockOrientedKernel(iBuilder, "Parabix:scanMatch" + getGrepTypeId(grepType) + std::to_string(codeUnitWidth),
     201    {Binding{iBuilder->getStreamSetTy(1, 1), "matchResult"}, Binding{iBuilder->getStreamSetTy(1, 1), "lineBreak"}, Binding{iBuilder->getStreamSetTy(1, 8), "InputStream", UnknownRate()}},
    226202    {},
    227203    {Binding{iBuilder->getSizeTy(), "FileIdx"}},
    228204    {},
    229     {Binding{iBuilder->getSizeTy(), "BlockNo"}, Binding{iBuilder->getSizeTy(), "LineStart"}, Binding{iBuilder->getSizeTy(), "LineNum"}, Binding{iBuilder->getSizeTy(), "PendingBytes"}})
     205    {Binding{iBuilder->getSizeTy(), "BlockNo"}, Binding{iBuilder->getSizeTy(), "LineNum"}})
    230206, mGrepType(grepType)
    231207, mCodeUnitWidth(codeUnitWidth) {
    232 }
    233 
    234 }
     208
     209}
     210
     211}
  • icGREP/icgrep-devel/icgrep/kernels/scanmatchgen.h

    r5392 r5398  
    1919    bool moduleIDisSignature() override {return true;}
    2020protected:
    21     void generateInitMethod() override;
    2221    void generateDoBlockMethod() override;
    23     void generateFinalBlockMethod(llvm::Value * remainingItems) override;
    2422private:
    25     GrepType        mGrepType;
    26     const unsigned  mCodeUnitWidth;
     23    llvm::Value * makeForwardZeroesMask(llvm::Value * const value) const;
     24    llvm::Value * resetLowestBit(llvm::Value * const value) const;
     25private:
     26    const GrepType      mGrepType;
     27    const unsigned      mCodeUnitWidth;
    2728};
    2829}
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.cpp

    r5386 r5398  
    77#include <kernels/kernel.h>
    88#include <IR_Gen/idisa_builder.h>
     9#include <llvm/Support/raw_ostream.h>
    910
    1011using namespace llvm;
     
    2425
    2526    ConstantInt * const segmentSize = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
    26     Value * bufferedSize = getScalarField("BufferedSize");
    27     Value * const itemsAlreadyRead = getProducedItemCount("codeUnitBuffer");
     27    Value * bufferedSize = getBufferedSize("InputStream");
     28    Value * const itemsAlreadyRead = getProducedItemCount("InputStream");
    2829    Value * const bytesAlreadyRead = iBuilder->CreateMul(itemsAlreadyRead, iBuilder->getSize(mCodeUnitWidth / 8));
    2930    Value * unreadSize = iBuilder->CreateSub(bufferedSize, bytesAlreadyRead);
     31
    3032    Value * const exaustedBuffer = iBuilder->CreateICmpULT(unreadSize, segmentSize);
    3133    iBuilder->CreateUnlikelyCondBr(exaustedBuffer, readBlock, stdInExit);
     
    3537    const auto PageAlignedSegmentSize = round_up_to_nearest((mSegmentBlocks + 1) * iBuilder->getBitBlockWidth() * (mCodeUnitWidth / 8), getpagesize());
    3638    ConstantInt * const bytesToRead = iBuilder->getSize(PageAlignedSegmentSize);
    37     reserveBytes("codeUnitBuffer", bytesToRead);
     39    reserveBytes("InputStream", bytesToRead);
    3840    BasicBlock * const readExit = iBuilder->GetInsertBlock();
    3941
    40     Value * const ptr = getRawOutputPointer("codeUnitBuffer", iBuilder->getInt32(0), bufferedSize);
     42    Value * const ptr = getRawOutputPointer("InputStream", iBuilder->getInt32(0), bufferedSize);
    4143    Value * const bytePtr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
    4244    Value * const bytesRead = iBuilder->CreateReadCall(iBuilder->getInt32(STDIN_FILENO), bytePtr, bytesToRead);
     
    4446    unreadSize = iBuilder->CreateAdd(unreadSize, bytesRead);
    4547    bufferedSize = iBuilder->CreateAdd(bufferedSize, bytesRead);
    46     setScalarField("BufferedSize", bufferedSize);
     48    setBufferedSize("InputStream", bufferedSize);
    4749    iBuilder->CreateUnlikelyCondBr(iBuilder->CreateICmpULT(unreadSize, segmentSize), setTermination, stdInExit);
    4850
     
    6264    Value * const itemsRead = iBuilder->CreateAdd(itemsAlreadyRead, produced);
    6365
    64     setProducedItemCount("codeUnitBuffer", itemsRead);
     66    setProducedItemCount("InputStream", itemsRead);
    6567}
    6668
    6769StdInKernel::StdInKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment, unsigned codeUnitWidth)
    68 : SegmentOrientedKernel(iBuilder, "stdin_source", {}, {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "codeUnitBuffer"}}, {}, {}, {Binding{iBuilder->getSizeTy(), "BufferedSize"}})
     70: SegmentOrientedKernel(iBuilder, "stdin_source", {}, {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "InputStream"}}, {}, {}, {})
    6971, mSegmentBlocks(blocksPerSegment)
    7072, mCodeUnitWidth(codeUnitWidth) {
     
    7274}
    7375
    74 void FileSource::generateInitMethod() {
    75     BasicBlock * setTerminationOnFailure = CreateBasicBlock("setTerminationOnFailure");
    76     BasicBlock * fileSourceInitExit = CreateBasicBlock("fileSourceInitExit");
    77     Value * handle = iBuilder->CreateFOpenCall(getScalarField("fileName"), iBuilder->CreateGlobalStringPtr("r"));
    78     setScalarField("IOstreamPtr", handle);
    79     Value * failure = iBuilder->CreateICmpEQ(iBuilder->CreatePtrToInt(handle, iBuilder->getSizeTy()), iBuilder->getSize(0));
    80     iBuilder->CreateCondBr(failure, setTerminationOnFailure, fileSourceInitExit);
    81     iBuilder->SetInsertPoint(setTerminationOnFailure);
     76void FileSourceKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     77
     78    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
     79    BasicBlock * setTermination = CreateBasicBlock("setTermination");
     80    BasicBlock * mmapSourceExit = CreateBasicBlock("mmapSourceExit");
     81    ConstantInt * segmentItems = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
     82    Value * fileItems = getScalarField("fileSize");
     83    if (mCodeUnitWidth > 8) {
     84        fileItems = iBuilder->CreateUDiv(fileItems, iBuilder->getSize(mCodeUnitWidth / 8));
     85    }
     86    Value * produced = getProducedItemCount("sourceBuffer");
     87    produced = iBuilder->CreateAdd(produced, segmentItems);
     88    Value * lessThanFullSegment = iBuilder->CreateICmpULT(fileItems, produced);
     89    iBuilder->CreateCondBr(lessThanFullSegment, setTermination, mmapSourceExit);
     90    iBuilder->SetInsertPoint(setTermination);
    8291    setTerminationSignal();
    83     iBuilder->CreateBr(fileSourceInitExit);
    84     iBuilder->SetInsertPoint(fileSourceInitExit);
     92    iBuilder->CreateBr(mmapSourceExit);
     93
     94    iBuilder->SetInsertPoint(mmapSourceExit);
     95
     96    PHINode * itemsRead = iBuilder->CreatePHI(produced->getType(), 2);
     97    itemsRead->addIncoming(produced, entryBlock);
     98    itemsRead->addIncoming(fileItems, setTermination);
     99    setProducedItemCount("sourceBuffer", itemsRead);
    85100}
    86    
    87 void FileSource::generateDoSegmentMethod(Value * /* doFinal */, const std::vector<Value *> & /* producerPos */) {
    88101
    89     BasicBlock * closeFile = CreateBasicBlock("closeFile");
    90     BasicBlock * fileSourceExit = CreateBasicBlock("fileSourceExit");
    91     Constant * itemBytes = iBuilder->getSize(mCodeUnitWidth/8);
    92    
    93     Value * produced = getProducedItemCount("codeUnitBuffer");
    94     Value * bytePtr = getOutputStreamBlockPtr("codeUnitBuffer", iBuilder->getInt32(0));
    95     bytePtr = iBuilder->CreatePointerCast(bytePtr, iBuilder->getInt8PtrTy());
     102void FileSourceKernel::generateInitMethod() {
     103    setBaseAddress("sourceBuffer", getScalarField("fileSource"));
     104    setBufferedSize("sourceBuffer", getScalarField("fileSize"));
     105}
    96106
    97     Value * IOstreamPtr = getScalarField("IOstreamPtr");
    98     Value * itemsToDo = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
    99     Value * nRead = iBuilder->CreateFReadCall(bytePtr, itemsToDo, itemBytes, IOstreamPtr);
    100     produced = iBuilder->CreateAdd(produced, nRead);
    101     setProducedItemCount("codeUnitBuffer", produced);
    102     Value * lessThanFullSegment = iBuilder->CreateICmpULT(nRead, itemsToDo);
    103     iBuilder->CreateCondBr(lessThanFullSegment, closeFile, fileSourceExit);
    104 
    105     iBuilder->SetInsertPoint(closeFile);
    106     iBuilder->CreateFCloseCall(IOstreamPtr);
    107     setTerminationSignal();
    108     iBuilder->CreateBr(fileSourceExit);
    109    
    110     iBuilder->SetInsertPoint(fileSourceExit);
    111    
    112 }
    113    
    114 FileSource::FileSource(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment, unsigned codeUnitWidth)
    115 : SegmentOrientedKernel(iBuilder, "filesink", {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "codeUnitBuffer"}}, {},
    116                 {Binding{iBuilder->getInt8PtrTy(), "fileName"}}, {}, {Binding{iBuilder->getFILEptrTy(), "IOstreamPtr"}})
     107FileSourceKernel::FileSourceKernel(IDISA::IDISA_Builder * iBuilder, Type * fileSourceTy, unsigned blocksPerSegment, unsigned codeUnitWidth)
     108: SegmentOrientedKernel(iBuilder, "Parabix:file_source",
     109    {},
     110    {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "sourceBuffer"}},
     111    {Binding{fileSourceTy, "fileSource"}, Binding{iBuilder->getSizeTy(), "fileSize"}}, {}, {})
    117112, mSegmentBlocks(blocksPerSegment)
    118113, mCodeUnitWidth(codeUnitWidth) {
     114
    119115}
    120116
     117
     118
    121119}
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.h

    r5377 r5398  
    1717public:
    1818    StdInKernel(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
     19    bool moduleIDisSignature() override { return true; }
    1920protected:
    2021    void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
     
    2324    unsigned mCodeUnitWidth;
    2425};
    25    
    2626
    27 class FileSource final : public SegmentOrientedKernel {
     27class FileSourceKernel final : public SegmentOrientedKernel {
    2828public:
    29     FileSource(IDISA::IDISA_Builder * iBuilder, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
     29    FileSourceKernel(IDISA::IDISA_Builder * iBuilder, llvm::Type * fileSourceTy, unsigned blocksPerSegment = 1, unsigned codeUnitWidth = 8);
     30    bool moduleIDisSignature() override { return true; }
    3031protected:
    3132    void generateInitMethod() override;
     
    3435    unsigned mSegmentBlocks;
    3536    unsigned mCodeUnitWidth;
    36  
    37 };
     37  };
    3838
    3939}
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5397 r5398  
    4444}
    4545
     46void StreamSetBuffer::setBaseAddress(Value * /* self */, Value * /* addr */) const {
     47    report_fatal_error("setBaseAddress is not supported by this buffer type");
     48}
     49
     50Value * StreamSetBuffer::getBufferedSize(Value * /* self */) const {
     51    report_fatal_error("getBufferedSize is not supported by this buffer type");
     52}
     53
     54void StreamSetBuffer::setBufferedSize(Value * /* self */, llvm::Value * /* size */) const {
     55    report_fatal_error("setBufferedSize is not supported by this buffer type");
     56}
     57
    4658inline bool StreamSetBuffer::isCapacityGuaranteed(const Value * const index, const size_t capacity) const {
    4759    if (LLVM_UNLIKELY(isa<ConstantInt>(index))) {
     
    8395Value * StreamSetBuffer::getRawItemPointer(Value * self, Value * streamIndex, Value * absolutePosition) const {
    8496    Value * ptr = getBaseAddress(self);
    85     if (isa<ConstantInt>(streamIndex) && cast<ConstantInt>(streamIndex)->isZero()) {
     97    if (!isa<ConstantInt>(streamIndex) || !cast<ConstantInt>(streamIndex)->isZero()) {
    8698        ptr = iBuilder->CreateGEP(ptr, {iBuilder->getInt32(0), streamIndex});
    8799    }
     
    148160
    149161Value * ExternalFileBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
     162    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
     163}
     164
     165// Source File Buffer
     166Value * SourceFileBuffer::getBufferedSize(Value * self) const {
     167    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     168    return iBuilder->CreateLoad(ptr);
     169}
     170
     171void SourceFileBuffer::setBufferedSize(Value * self, llvm::Value * size) const {
     172    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
     173    iBuilder->CreateStore(size, ptr);
     174}
     175
     176void SourceFileBuffer::setBaseAddress(Value * self, Value * addr) const {
     177    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     178    iBuilder->CreateStore(addr, ptr);
     179}
     180
     181Value * SourceFileBuffer::getBaseAddress(Value * const self) const {
     182    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     183    return iBuilder->CreateLoad(ptr);
     184}
     185
     186Value * SourceFileBuffer::getStreamSetBlockPtr(Value * self, Value * blockIndex) const {
     187    return iBuilder->CreateGEP(self, blockIndex);
     188}
     189
     190Value * SourceFileBuffer::getLinearlyAccessibleItems(Value * self, Value *) const {
    150191    report_fatal_error("External buffers: getLinearlyAccessibleItems is not supported.");
    151192}
     
    170211    addr = iBuilder->CreatePointerCast(addr, addrPtr->getType()->getPointerElementType());
    171212    iBuilder->CreateStore(addr, addrPtr);
     213    Value * const bufferSizePtr = iBuilder->CreateGEP(instance, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
     214    iBuilder->CreateStore(ConstantInt::getNullValue(bufferSizePtr->getType()->getPointerElementType()), bufferSizePtr);
    172215    mStreamSetBufferPtr = instance;
    173216}
     
    205248    iBuilder->CreateBr(resume);
    206249    iBuilder->SetInsertPoint(resume);
     250}
     251
     252Value * ExtensibleBuffer::getBufferedSize(Value * self) const {
     253    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
     254    return iBuilder->CreateLoad(ptr);
     255}
     256
     257void ExtensibleBuffer::setBufferedSize(Value * self, llvm::Value * size) const {
     258    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(2)});
     259    iBuilder->CreateStore(size, ptr);
    207260}
    208261
     
    485538}
    486539
     540SourceFileBuffer::SourceFileBuffer(IDISA::IDISA_Builder * b, Type * type, unsigned AddressSpace)
     541: StreamSetBuffer(BufferKind::SourceFileBuffer, b, type, StructType::get(resolveStreamSetType(b, type)->getPointerTo(), b->getSizeTy(), nullptr), 0, AddressSpace) {
     542
     543}
     544
    487545ExtensibleBuffer::ExtensibleBuffer(IDISA::IDISA_Builder * b, Type * type, size_t bufferBlocks, unsigned AddressSpace)
    488 : StreamSetBuffer(BufferKind::ExtensibleBuffer, b, type, StructType::get(b->getSizeTy(), resolveStreamSetType(b, type)->getPointerTo(), nullptr), bufferBlocks, AddressSpace) {
     546: StreamSetBuffer(BufferKind::ExtensibleBuffer, b, type, StructType::get(b->getSizeTy(), resolveStreamSetType(b, type)->getPointerTo(), b->getSizeTy(), nullptr), bufferBlocks, AddressSpace) {
    489547    mUniqueID = "XT" + std::to_string(bufferBlocks);
    490548    if (AddressSpace > 0) mUniqueID += "@" + std::to_string(AddressSpace);
  • icGREP/icgrep-devel/icgrep/kernels/streamset.h

    r5395 r5398  
    1919public:
    2020
    21     enum class BufferKind : unsigned {BlockBuffer, ExternalFileBuffer, CircularBuffer, CircularCopybackBuffer, SwizzledCopybackBuffer, ExpandableBuffer, ExtensibleBuffer};
     21    enum class BufferKind : unsigned {
     22        BlockBuffer
     23        , ExternalFileBuffer
     24        , SourceFileBuffer
     25        , CircularBuffer
     26        , CircularCopybackBuffer
     27        , SwizzledCopybackBuffer
     28        , ExpandableBuffer
     29        , ExtensibleBuffer
     30    };
    2231
    2332    BufferKind getBufferKind() const {
     
    6170    llvm::Value * getRawItemPointer(llvm::Value * self, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    6271
     72
     73    virtual void setBaseAddress(llvm::Value * self, llvm::Value * addr) const;
     74
     75    virtual void setBufferedSize(llvm::Value * self, llvm::Value * size) const;
     76
     77    virtual llvm::Value * getBufferedSize(llvm::Value * self) const;
     78
    6379    // The number of items that cam be linearly accessed from a given logical stream position.
    6480    virtual llvm::Value * getLinearlyAccessibleItems(llvm::Value * self, llvm::Value * fromPosition) const;
     81
    6582    virtual llvm::Value * getLinearlyAccessibleBlocks(llvm::Value * self, llvm::Value * fromBlock) const;
    6683
     
    102119
    103120protected:
     121
    104122    llvm::Value * getStreamSetBlockPtr(llvm::Value * self, llvm::Value * blockNo) const override;
    105123};
     
    110128        return b->getBufferKind() == BufferKind::ExternalFileBuffer;
    111129    }
    112    
     130
    113131    ExternalFileBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, unsigned AddressSpace = 0);
    114132
     
    119137
    120138    llvm::Value * getLinearlyAccessibleItems(llvm::Value * self, llvm::Value * fromPosition) const override;
    121    
    122 protected:
     139
     140protected:
     141    llvm::Value * getStreamSetBlockPtr(llvm::Value * self, llvm::Value * blockNo) const override;
     142};
     143
     144class SourceFileBuffer final : public StreamSetBuffer {
     145public:
     146    static inline bool classof(const StreamSetBuffer * b) {
     147        return b->getBufferKind() == BufferKind::SourceFileBuffer;
     148    }
     149
     150    SourceFileBuffer(IDISA::IDISA_Builder * b, llvm::Type * type, unsigned AddressSpace = 0);
     151
     152    void setBaseAddress(llvm::Value * self, llvm::Value * addr) const override;
     153
     154    void setBufferedSize(llvm::Value * self, llvm::Value * size) const override;
     155
     156    llvm::Value * getBufferedSize(llvm::Value * self) const override;
     157
     158    llvm::Value * getLinearlyAccessibleItems(llvm::Value * self, llvm::Value * fromPosition) const override;
     159
     160protected:
     161
     162    llvm::Value * getBaseAddress(llvm::Value * self) const override;
     163
    123164    llvm::Value * getStreamSetBlockPtr(llvm::Value * self, llvm::Value * blockNo) const override;
    124165};
     
    137178
    138179    void reserveBytes(llvm::Value * self, llvm::Value * required) const override;
     180
     181    void setBufferedSize(llvm::Value * self, llvm::Value * size) const override;
     182
     183    llvm::Value * getBufferedSize(llvm::Value * self) const override;
    139184
    140185    void releaseBuffer(llvm::Value * self) const override;
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5392 r5398  
    2828}
    2929
    30 Var * PabloKernel::getInputStreamVar(const std::string & inputSetName) {
    31     return mInputs[getStreamSetIndex(inputSetName)];
     30Var * PabloKernel::getInputStreamVar(const std::string & name) {
     31    Port port; unsigned index;
     32    std::tie(port, index) = getStreamPort(name);
     33    assert (port == Port::Input);
     34    return mInputs[index];
    3235}
    3336
    34 Var * PabloKernel::getOutputStreamVar(const std::string & outputSetName) {
    35     return mOutputs[getStreamSetIndex(outputSetName)];
     37Var * PabloKernel::getOutputStreamVar(const std::string & name) {
     38    Port port; unsigned index;
     39    std::tie(port, index) = getStreamPort(name);
     40    assert (port == Port::Output);
     41    return mOutputs[index];
    3642}
    3743
    38 Var * PabloKernel::getOutputScalarVar(const std::string & outputName) {
    39     const auto f = mScalarOutputNameMap.find(outputName);
     44Var * PabloKernel::getOutputScalarVar(const std::string & name) {
     45    const auto f = mScalarOutputNameMap.find(name);
    4046    if (LLVM_UNLIKELY(f == mScalarOutputNameMap.end())) {
    41         report_fatal_error("Kernel does not contain scalar: " + outputName);
     47        report_fatal_error("Kernel does not contain scalar: " + name);
    4248    }
    4349    return f->second;
     
    5056    mVariables.push_back(param);
    5157    if (isStreamType(type)) {
    52         mStreamSetNameMap.emplace(name, mStreamSetInputs.size());
     58        mStreamMap.emplace(name, std::make_pair(Port::Input, mStreamSetInputs.size()));
    5359        mStreamSetInputs.emplace_back(type, name);       
    5460    } else {
     
    6672    mVariables.push_back(result);
    6773    if (isStreamType(type)) {
    68         mStreamSetNameMap.emplace(name, mStreamSetOutputs.size());
     74        mStreamMap.emplace(name, std::make_pair(Port::Output, mStreamSetOutputs.size()));
    6975        mStreamSetOutputs.emplace_back(type, name);
    7076    } else {
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.h

    r5371 r5398  
    6868    }
    6969   
    70     Var * getInputStreamVar(const std::string & inputSetName);
     70    Var * getInputStreamVar(const std::string & name);
    7171
    7272    Var * getInput(const unsigned index) {
     
    8484    }
    8585   
    86     Var * getOutputStreamVar(const std::string & inputSetName);
     86    Var * getOutputStreamVar(const std::string & name);
    8787   
    88     Var * getOutputScalarVar(const std::string & outputName);
     88    Var * getOutputScalarVar(const std::string & name);
    8989
    9090    Var * getOutput(const unsigned index) {
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r5371 r5398  
    198198}
    199199
     200//void RE_Compiler::finalizeMatchResult(MarkerType match_result, bool InvertMatches) {
     201//    PabloAST * match_follow = mPB.createMatchStar(markerVar(match_result), mAny);
     202//    if (InvertMatches) {
     203//        match_follow = mPB.createNot(match_follow);
     204//    }
     205//    PabloAST * matches = mPB.createAnd(match_follow, mLineBreak, "matches");
     206//    if (mCountOnly) {
     207//        Var * const output = mKernel->getOutputScalarVar("matchedLineCount");
     208//        PabloBuilder nestedCount = PabloBuilder::Create(mPB);
     209//        mPB.createIf(matches, nestedCount);
     210//        nestedCount.createAssign(output, nestedCount.createCount(matches));
     211//    } else {
     212//        Var * const output = mKernel->getOutputStreamVar("output");
     213//        mPB.createAssign(mPB.createExtract(output, mPB.getInteger(0)), matches);
     214//    }
     215//}
     216
    200217void RE_Compiler::finalizeMatchResult(MarkerType match_result, bool InvertMatches) {
    201218    PabloAST * match_follow = mPB.createMatchStar(markerVar(match_result), mAny);
     
    212229        Var * const output = mKernel->addOutput("output", mKernel->getStreamSetTy(1));
    213230        mPB.createAssign(mPB.createExtract(output, mPB.getInteger(0)), matches);
    214         //mPB.createAssign(mPB.createExtract(output, mPB.getInteger(1)), mLineBreak);
    215231    }
    216232}
  • icGREP/icgrep-devel/icgrep/toolchain.cpp

    r5394 r5398  
    2222#include <llvm/Transforms/Scalar.h>
    2323#include <llvm/Transforms/Utils/Local.h>
     24#include <llvm/IR/Module.h>
    2425#include <object_cache.h>
    2526#include <kernels/pipeline.h>
     
    3031#endif
    3132 
     33
     34
    3235using namespace llvm;
    3336
     
    8689   
    8790static cl::opt<bool> segmentPipelineParallel("enable-segment-pipeline-parallel", cl::desc("Enable multithreading with segment pipeline parallelism."), cl::cat(CodeGenOptions));
    88    
     91
    8992
    9093   
     
    273276}
    274277
    275 
    276 ParabixDriver::ParabixDriver(IDISA::IDISA_Builder * iBuilder) : iBuilder(iBuilder) {
    277     mMainModule = iBuilder->getModule();
    278     if (codegen::EnableObjectCache) {
    279         if (codegen::ObjectCacheDir.empty()) {
    280             mCache = llvm::make_unique<ParabixObjectCache>();
    281         }
    282         else {
    283             mCache = llvm::make_unique<ParabixObjectCache>(codegen::ObjectCacheDir);
    284         }
    285     }
    286 }
    287 
    288 void ParabixDriver::JITcompileMain () {
    289 
    290     // Use the pass manager to optimize the function.
    291     #ifndef NDEBUG
    292     try {
    293     #endif
    294     legacy::PassManager PM;
    295     #ifndef NDEBUG
    296     PM.add(createVerifierPass());
    297     #endif
    298     PM.add(createReassociatePass());             //Reassociate expressions.
    299     PM.add(createGVNPass());                     //Eliminate common subexpressions.
    300     PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
    301     PM.add(createCFGSimplificationPass());   
    302     PM.run(*mMainModule);
    303     #ifndef NDEBUG
    304     } catch (...) { mMainModule->dump(); throw; }
    305     #endif
     278ParabixDriver::ParabixDriver(IDISA::IDISA_Builder * iBuilder)
     279: iBuilder(iBuilder)
     280, mMainModule(iBuilder->getModule())
     281, mTarget(nullptr)
     282, mEngine(nullptr)
     283{
    306284    InitializeNativeTarget();
    307285    InitializeNativeTargetAsmPrinter();
     
    333311    setAllFeatures(builder);
    334312
     313    mEngine = builder.create();
     314    if (mEngine == nullptr) {
     315        throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
     316    }
     317    mTarget = builder.selectTarget();
     318    if (codegen::EnableObjectCache) {
     319        if (codegen::ObjectCacheDir.empty()) {
     320            mCache = llvm::make_unique<ParabixObjectCache>();
     321        } else {
     322            mCache = llvm::make_unique<ParabixObjectCache>(codegen::ObjectCacheDir);
     323        }
     324        if (mCache) {
     325            mEngine->setObjectCache(mCache.get());
     326        }
     327    }
     328}
     329
     330void ParabixDriver::JITcompileMain () {
     331    // Use the pass manager to optimize the function.
     332    #ifndef NDEBUG
     333    try {
     334    #endif
     335    legacy::PassManager PM;
     336    #ifndef NDEBUG
     337    PM.add(createVerifierPass());
     338    #endif
     339    PM.add(createReassociatePass());             //Reassociate expressions.
     340    PM.add(createGVNPass());                     //Eliminate common subexpressions.
     341    PM.add(createInstructionCombiningPass());    //Simple peephole optimizations and bit-twiddling.
     342    PM.add(createCFGSimplificationPass());   
     343    PM.run(*mMainModule);
     344    #ifndef NDEBUG
     345    } catch (...) { mMainModule->dump(); throw; }
     346    #endif
     347
    335348    if (LLVM_UNLIKELY(codegen::DebugOptionIsSet(codegen::ShowIR))) {
    336349        if (codegen::IROutputFilename.empty()) {
     
    342355        }
    343356    }
    344 #if LLVM_VERSION_MINOR > 6
     357    #if LLVM_VERSION_MINOR > 6
    345358    if (codegen::DebugOptionIsSet(codegen::ShowASM)) {
    346         WriteAssembly(builder.selectTarget(), mMainModule);
    347     }
    348 #endif
    349     ExecutionEngine * engine = builder.create();
    350     if (engine == nullptr) {
    351         throw std::runtime_error("Could not create ExecutionEngine: " + errMessage);
    352     }
    353     if (mCache) {
    354         engine->setObjectCache(mCache.get());
    355     }
    356     mEngine = engine;
     359        WriteAssembly(mTarget, mMainModule);
     360    }
     361    #endif
    357362}
    358363
    359364void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
     365    assert (mModuleMap.count(&kb) == 0);
    360366    mKernelList.push_back(&kb);
     367    Module * saveM = iBuilder->getModule();
     368    mModuleMap.emplace(&kb, std::move(kb.createKernelStub()));
     369    assert (iBuilder->getModule() == saveM);
    361370    kb.setCallParameters(inputs, outputs);
    362371}
    363372
    364 
    365373void ParabixDriver::generatePipelineIR() {
    366     for (auto kb : mKernelList) {
     374    for (kernel::KernelBuilder * kb : mKernelList) {
    367375        kb->addKernelDeclarations(mMainModule);
    368376    }
     
    377385}
    378386
     387void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, FunctionType *type, void * functionPtr) const {
     388    const auto f = mModuleMap.find(&kb);
     389    assert ("addKernelCall(kb, ...) must be called before addExternalLink(kb, ...)" && f != mModuleMap.end());
     390    llvm::Module * const m = f->second.get();
     391    mEngine->addGlobalMapping(cast<Function>(m->getOrInsertFunction(name, type)), functionPtr);
     392}
     393
    379394void ParabixDriver::linkAndFinalize() {
    380     for (auto kb : mKernelList) {
     395    for (kernel::KernelBuilder * kb : mKernelList) {
    381396        Module * saveM = iBuilder->getModule();
    382         std::unique_ptr<Module> km = kb->createKernelStub();
     397        const auto f = mModuleMap.find(kb);
     398        if (LLVM_UNLIKELY(f == mModuleMap.end())) {
     399            report_fatal_error("linkAndFinalize was called twice!");
     400        }
     401        std::unique_ptr<Module> km(std::move(f->second));
    383402        std::string moduleID = km->getModuleIdentifier();
    384403        std::string signature;
    385404        if (kb->moduleIDisSignature()) {
    386405            signature = moduleID;
    387         }
    388         else {
     406        } else {
    389407            kb->generateKernelSignature(signature);
    390408        }
     
    397415    }
    398416    mEngine->finalizeObject();
     417    mModuleMap.clear();
    399418}
    400419
  • icGREP/icgrep-devel/icgrep/toolchain.h

    r5391 r5398  
    99#include <string>
    1010#include <IR_Gen/idisa_builder.h>
     11#include <llvm/IR/TypeBuilder.h>
     12#include <boost/container/flat_map.hpp>
    1113#include <object_cache.h>
    1214
    1315namespace llvm { class ExecutionEngine; }
    1416namespace llvm { class Module; }
     17namespace llvm { class TargetMachine; }
    1518namespace llvm { namespace cl { class OptionCategory; } }
    1619namespace IDISA { class IDISA_Builder; }
     
    6265
    6366class ParabixDriver {
     67
     68    using ModuleMap = boost::container::flat_map<kernel::KernelBuilder *, std::unique_ptr<llvm::Module>>;
     69
    6470public:
    6571    ParabixDriver(IDISA::IDISA_Builder * iBuilder);
     
    6874   
    6975    void JITcompileMain ();
    70    
     76
    7177    void addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs);
    7278   
    7379    void generatePipelineIR();
    7480   
     81    template <typename ExternalFunctionType>
     82    void addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, ExternalFunctionType * functionPtr) const;
     83
     84    void addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
     85
    7586    void linkAndFinalize();
    7687   
    7788    void * getPointerToMain();
    7889
    79    
    8090private:
    81     llvm::Module * mMainModule;
    82     IDISA::IDISA_Builder * iBuilder;
    83     std::unique_ptr<ParabixObjectCache> mCache;
    84     //std::unique_ptr<llvm::ExecutionEngine> mEngine;
    85     llvm::ExecutionEngine * mEngine;
    86     std::vector<kernel::KernelBuilder *> mKernelList;
     91    IDISA::IDISA_Builder * const            iBuilder;
     92    llvm::Module * const                    mMainModule;
     93    llvm::TargetMachine *                   mTarget;
     94    llvm::ExecutionEngine *                 mEngine;
     95    std::unique_ptr<ParabixObjectCache>     mCache;
     96    std::vector<kernel::KernelBuilder *>    mKernelList;
     97    ModuleMap                               mModuleMap;
    8798};
     99
     100namespace {
     101
     102// NOTE: Currently, LLVM TypeBuilder only work for up to 5 arguments. The following templates
     103// avoid that limit but should be deprecated if the TypeBuilder ever supports n-ary functions.
     104
     105template <typename... Args>
     106class ParameterTypeBuilder;
     107
     108template<typename A1, typename... An>
     109struct ParameterTypeBuilder<A1, An...> {
     110    static void get(llvm::LLVMContext & C, std::vector<llvm::Type *> & params) {
     111        ParameterTypeBuilder<A1>::get(C, params);
     112        ParameterTypeBuilder<An...>::get(C, params);
     113    }
     114};
     115
     116template<typename A>
     117struct ParameterTypeBuilder<A> {
     118    static void get(llvm::LLVMContext & C, std::vector<llvm::Type *> & params) {
     119        params.push_back(llvm::TypeBuilder<A, false>::get(C));
     120    }
     121};
     122
     123template<typename T>
     124struct FunctionTypeBuilder;
     125
     126template<typename R, typename... Args>
     127struct FunctionTypeBuilder<R(Args...)> {
     128    static llvm::FunctionType * get(llvm::LLVMContext & C) {
     129        auto result = llvm::TypeBuilder<R, false>::get(C);
     130        std::vector<llvm::Type *> params(0);
     131        params.reserve(sizeof...(Args));
     132        ParameterTypeBuilder<Args...>::get(C, params);
     133        return llvm::FunctionType::get(result, params, false);
     134    }
     135};
     136
     137template<typename R>
     138struct FunctionTypeBuilder<R()> {
     139    static llvm::FunctionType * get(llvm::LLVMContext & C) {
     140        auto result = llvm::TypeBuilder<R, false>::get(C);
     141        return llvm::FunctionType::get(result, false);
     142    }
     143};
     144
     145}
     146
     147template <typename ExternalFunctionType>
     148void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, ExternalFunctionType * functionPtr) const {
     149    llvm::FunctionType * const type = FunctionTypeBuilder<ExternalFunctionType>::get(iBuilder->getContext());
     150    assert ("FunctionTypeBuilder did not correctly resolve the current function type." && type);
     151    addExternalLink(kb, name, type, reinterpret_cast<void *>(functionPtr));
     152}
     153
    88154#endif
Note: See TracChangeset for help on using the changeset viewer.