Changeset 5418


Ignore:
Timestamp:
Apr 22, 2017, 4:03:25 PM (2 years ago)
Author:
nmedfort
Message:

Removed non-functional CUDA code from icgrep and consolidated grep and multigrep mode into a single function; allowed segment parallel pipeline to utilize process as its initial thread; modified MMapSourceKernel to map and perform mmap directly and advise the OS to drop consumed data streams.

Location:
icGREP/icgrep-devel/icgrep
Files:
1 deleted
29 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.cpp

    r5415 r5418  
    1313#include <llvm/Support/raw_ostream.h>
    1414#include <kernels/toolchain.h>
     15#include <llvm/ADT/Triple.h>
    1516#include <fcntl.h>
    1617#include <unistd.h>
    1718#include <sys/mman.h>
     19#include <sys/stat.h>
    1820#include <errno.h>
    19 #include <llvm/ADT/Triple.h>
    2021
    2122using namespace llvm;
     
    3334
    3435// ssize_t write(int fildes, const void *buf, size_t nbyte);
    35 Value * CBuilder::CreateWriteCall(Value * fildes, Value * buf, Value * nbyte) {
     36Value * CBuilder::CreateWriteCall(Value * fileDescriptor, Value * buf, Value * nbyte) {
    3637    Function * write = mMod->getFunction("write");
    3738    if (write == nullptr) {
     
    4344                                                        sizeTy, int32Ty, int8PtrTy, sizeTy, nullptr));
    4445    }
    45     return CreateCall(write, {fildes, buf, nbyte});
     46    return CreateCall(write, {fileDescriptor, buf, nbyte});
    4647}
    4748
     
    5960}
    6061
    61 Value * CBuilder::CreateCloseCall(Value * fildes) {
     62Value * CBuilder::CreateCloseCall(Value * fileDescriptor) {
    6263    Function * closeFn = mMod->getFunction("close");
    6364    if (closeFn == nullptr) {
     
    6667        closeFn = Function::Create(fty, Function::ExternalLinkage, "close", mMod);
    6768    }
    68     return CreateCall(closeFn, {fildes});
     69    return CreateCall(closeFn, {fileDescriptor});
    6970}
    7071
     
    7879    }
    7980    return CreateCall(unlinkFunc, {path});
     81}
     82
     83Value * CBuilder::CreateFileSize(Value * fileDescriptor) {
     84    Function * fileSizeFunc = mMod->getFunction("file_size");
     85    if (fileSizeFunc == nullptr) {
     86        FunctionType * fty = FunctionType::get(getSizeTy(), {getInt32Ty()}, true);
     87        fileSizeFunc = Function::Create(fty, Function::ExternalLinkage, "file_size", mMod);
     88    }
     89    return CreateCall(fileSizeFunc, {fileDescriptor});
    8090}
    8191
     
    212222}
    213223
    214 Value * CBuilder::CreateFileSourceMMap(Value * const fd, Value * size) {
     224Value * CBuilder::CreateFileSourceMMap(Value * fd, Value * size) {
    215225    PointerType * const voidPtrTy = getVoidPtrTy();
    216226    IntegerType * const intTy = getInt32Ty();
     227    fd = CreateZExtOrTrunc(fd, intTy);
    217228    IntegerType * const sizeTy = getSizeTy();
    218229    size = CreateZExtOrTrunc(size, sizeTy);
     
    256267*/
    257268
    258 Value * CBuilder::CreateMMapAdvise(Value * addr, Value * length, std::initializer_list<MADV> advice) {
     269Value * CBuilder::CreateMAdvise(Value * addr, Value * length, std::initializer_list<MAdviceFlags> advice) {
    259270    Triple T(mMod->getTargetTriple());
    260271    Value * result = nullptr;
     
    272283        length = CreateZExtOrTrunc(length, sizeTy);
    273284        int adviceFlags = 0;
    274         for (const MADV adv : advice) {
     285        for (const MAdviceFlags adv : advice) {
    275286            switch (adv) {
    276                 case MADV::NORMAL: adviceFlags |= MADV_NORMAL; break;
    277                 case MADV::RANDOM: adviceFlags |= MADV_RANDOM; break;
    278                 case MADV::SEQUENTIAL: adviceFlags |= MADV_SEQUENTIAL; break;
    279                 case MADV::DONTNEED: adviceFlags |= MADV_DONTNEED; break;
    280                 case MADV::WILLNEED: adviceFlags |= MADV_WILLNEED; break;
     287                case MAdviceFlags::MMAP_NORMAL: adviceFlags |= MADV_NORMAL; break;
     288                case MAdviceFlags::MMAP_RANDOM: adviceFlags |= MADV_RANDOM; break;
     289                case MAdviceFlags::MMAP_SEQUENTIAL: adviceFlags |= MADV_SEQUENTIAL; break;
     290                case MAdviceFlags::MMAP_DONTNEED: adviceFlags |= MADV_DONTNEED; break;
     291                case MAdviceFlags::MMAP_WILLNEED: adviceFlags |= MADV_WILLNEED; break;
    281292//                case MADV::REMOVE: adviceFlags |= MADV_REMOVE; break;
    282293//                case MADV::DONTFORK: adviceFlags |= MADV_DONTFORK; break;
  • icGREP/icgrep-devel/icgrep/IR_Gen/CBuilder.h

    r5415 r5418  
    7575    llvm::Value * CreateOpenCall(llvm::Value * filename, llvm::Value * oflag, llvm::Value * mode);
    7676    //  Create a call to:  ssize_t write(int fildes, const void *buf, size_t nbyte);
    77     llvm::Value * CreateWriteCall(llvm::Value * fildes, llvm::Value * buf, llvm::Value * nbyte);
     77    llvm::Value * CreateWriteCall(llvm::Value * fileDescriptor, llvm::Value * buf, llvm::Value * nbyte);
    7878    //  Create a call to:  ssize_t read(int fildes, void *buf, size_t nbyte);
    79     llvm::Value * CreateReadCall(llvm::Value * fildes, llvm::Value * buf, llvm::Value * nbyte);
     79    llvm::Value * CreateReadCall(llvm::Value * fileDescriptor, llvm::Value * buf, llvm::Value * nbyte);
    8080    //  Create a call to:  int close(int filedes);
    81     llvm::Value * CreateCloseCall(llvm::Value * fildes);
     81    llvm::Value * CreateCloseCall(llvm::Value * fileDescriptor);
    8282    //  Create a call to:  int unlink(const char *path);
    8383    llvm::Value * CreateUnlinkCall(llvm::Value * path);
     84
     85    llvm::Value * CreateFileSize(llvm::Value * fileDescriptor);
    8486
    8587    //  Create calls to stdlib.h functions.
     
    9193    llvm::Value * CreateStrlenCall(llvm::Value * str);
    9294   
    93    
    94    
    9595    llvm::Value * CreateAnonymousMMap(llvm::Value * size);
    9696
    9797    llvm::Value * CreateFileSourceMMap(llvm::Value * fd, llvm::Value * size);
    9898
    99     enum class MADV {
    100         NORMAL
    101         , RANDOM
    102         , SEQUENTIAL
    103         , WILLNEED
    104         , DONTNEED
    105 //        , REMOVE
    106 //        , DONTFORK
    107 //        , DOFORK
    108 //        , HWPOISON
    109 //        , MERGEABLE
    110 //        , UNMERGEABLE
    111 //        , HUGEPAGE
    112 //        , NOHUGEPAGE
    113 //        , DONTDUMP
    114 //        , DODUMP
     99    enum MAdviceFlags {
     100        MMAP_NORMAL
     101        , MMAP_RANDOM
     102        , MMAP_SEQUENTIAL
     103        , MMAP_WILLNEED
     104        , MMAP_DONTNEED
    115105    };
    116106
    117     llvm::Value * CreateMMapAdvise(llvm::Value * addr, llvm::Value * length, MADV advice) {
    118         return CreateMMapAdvise(addr, length, { advice });
     107    llvm::Value * CreateMAdvise(llvm::Value * addr, llvm::Value * length, MAdviceFlags advice) {
     108        return CreateMAdvise(addr, length, { advice });
    119109    }
    120110
    121     llvm::Value * CreateMMapAdvise(llvm::Value * addr, llvm::Value * length, std::initializer_list<MADV> advice);
     111    llvm::Value * CreateMAdvise(llvm::Value * addr, llvm::Value * length, std::initializer_list<MAdviceFlags> advice);
    122112
    123113    llvm::Value * CreateMMap(llvm::Value * const addr, llvm::Value * size, llvm::Value * const prot, llvm::Value * const flags, llvm::Value * const fd, llvm::Value * const offset);
  • icGREP/icgrep-devel/icgrep/base64.cpp

    r5402 r5418  
    1919#include <kernels/radix64.h>
    2020#include <kernels/stdout_kernel.h>
    21 #include <boost/filesystem.hpp>
    22 #include <boost/iostreams/device/mapped_file.hpp>
     21#include <boost/interprocess/mapped_region.hpp>
    2322#include <boost/interprocess/anonymous_shared_memory.hpp>
     23#include <sys/stat.h>
     24#include <fcntl.h>
    2425
    2526using namespace llvm;
     
    4344    Type * mBitBlockType = iBuilder->getBitBlockType();
    4445
    45     Type * const size_ty = iBuilder->getSizeTy();
    4646    Type * const voidTy = Type::getVoidTy(mod->getContext());
    47     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
     47    Type * const int32Ty = iBuilder->getInt32Ty();
    4848    Type * const outputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
    4949   
    5050   
    51     Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, inputType, outputType, size_ty, nullptr));
     51    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, int32Ty, outputType, nullptr));
    5252    main->setCallingConv(CallingConv::C);
    5353    Function::arg_iterator args = main->arg_begin();
    5454   
    55     Value * const inputStream = &*(args++);
    56     inputStream->setName("inputStream");
     55    Value * const fileDescriptor = &*(args++);
     56    fileDescriptor->setName("fileDescriptor");
    5757    Value * const outputStream = &*(args++);
    5858    outputStream->setName("outputStream");
    59     Value * const fileSize = &*(args++);
    60     fileSize->setName("fileSize");
    6159
    6260    //Round up to a multiple of 3.
     
    6563    const unsigned bufferSegments = codegen::BufferSegments;
    6664   
    67     ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
     65    SourceFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    6866
    6967    CircularBuffer Expanded3_4Out(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * 4/3 * bufferSegments);
     
    7270   
    7371    MMapSourceKernel mmapK(iBuilder, segmentSize);
    74     mmapK.setInitialArguments({fileSize});
     72    mmapK.setInitialArguments({fileDescriptor});
    7573    pxDriver.addKernelCall(mmapK, {}, {&ByteStream});
    7674   
     
    8987    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
    9088
    91     ByteStream.setStreamSetBuffer(inputStream);
     89    ByteStream.allocateBuffer();
    9290    Expanded3_4Out.allocateBuffer();
    9391    Radix64out.allocateBuffer();
     
    102100
    103101
    104 typedef void (*base64FunctionType)(char * byte_data, char * output_data, size_t filesize);
     102typedef void (*base64FunctionType)(const uint32_t fd, char * outputBuffer);
    105103
    106104base64FunctionType base64CodeGen(void) {
     
    117115}
    118116
     117size_t file_size(const int fd) {
     118    struct stat st;
     119    if (LLVM_UNLIKELY(fstat(fd, &st) != 0)) {
     120        st.st_size = 0;
     121    }
     122    return st.st_size;
     123}
     124
    119125void base64(base64FunctionType fn_ptr, const std::string & fileName) {
    120     std::string mFileName = fileName;
    121     size_t mFileSize;
    122     char * mFileBuffer;
    123126
    124     const boost::filesystem::path file(mFileName);
    125     if (exists(file)) {
    126         if (is_directory(file)) {
    127             return;
    128         }
    129     } else {
    130         std::cerr << "Error: cannot open " << mFileName << " for processing. Skipped.\n";
     127    const int fd = open(fileName.c_str(), O_RDONLY);
     128    if (LLVM_UNLIKELY(fd == -1)) {
     129        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
    131130        return;
    132131    }
    133    
    134     mFileSize = file_size(file);
    135     boost::iostreams::mapped_file_source mFile;
    136     if (mFileSize == 0) {
    137         mFileBuffer = nullptr;
    138     }
    139     else {
    140         try {
    141             mFile.open(mFileName);
    142         } catch (std::exception &e) {
    143             std::cerr << "Error: Boost mmap of " << mFileName << ": " << e.what() << std::endl;
    144             return;
    145         }
    146         mFileBuffer = const_cast<char *>(mFile.data());
    147     }
    148 
    149132    if (mMapBuffering) {
    150         boost::interprocess::mapped_region outputBuffer(boost::interprocess::anonymous_shared_memory(2*mFileSize));
     133        boost::interprocess::mapped_region outputBuffer(boost::interprocess::anonymous_shared_memory(2 * file_size(fd)));
    151134        outputBuffer.advise(boost::interprocess::mapped_region::advice_willneed);
    152135        outputBuffer.advise(boost::interprocess::mapped_region::advice_sequential);
    153         fn_ptr(mFileBuffer, static_cast<char*>(outputBuffer.get_address()), mFileSize);
    154     }
    155     else if (memAlignBuffering) {
     136        fn_ptr(fd, static_cast<char*>(outputBuffer.get_address()));
     137    } else if (memAlignBuffering) {
    156138        char * outputBuffer;
    157         if (posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2*mFileSize)) {
     139        if (posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2 * file_size(fd))) {
    158140            throw std::bad_alloc();
    159141        }
    160         fn_ptr(mFileBuffer, outputBuffer, mFileSize);
     142        fn_ptr(fd, outputBuffer);
    161143        free(reinterpret_cast<void *>(outputBuffer));
     144    } else { /* No external output buffer */
     145        fn_ptr(fd, nullptr);
    162146    }
    163     else {
    164         /* No external output buffer */
    165         fn_ptr(mFileBuffer, nullptr, mFileSize);
    166     }
    167     mFile.close();
     147    close(fd);
    168148   
    169149}
  • icGREP/icgrep-devel/icgrep/editd/editd.cpp

    r5402 r5418  
    2222#include <kernels/streamset.h>
    2323#include <kernels/mmap_kernel.h>
     24#include <kernels/stdin_kernel.h>
    2425#include <kernels/s2p_kernel.h>
    2526#include <editd/editdscan_kernel.h>
    2627#include <kernels/pipeline.h>
    2728#include <editd/pattern_compiler.h>
    28 #include <boost/filesystem.hpp>
    29 #include <boost/iostreams/device/mapped_file.hpp>
     29#include <sys/stat.h>
     30#include <fcntl.h>
    3031#include <mutex>
    3132#ifdef CUDA_ENABLED
     
    187188}
    188189
    189 void buildPatternKernel(PabloKernel & kernel, IDISA::IDISA_Builder * iBuilder, const std::vector<std::string> & patterns) {
    190     PabloBuilder entry(kernel.getEntryBlock());
    191 
    192     Var * pat = kernel.getInputStreamVar("pat");
     190void buildPatternKernel(PabloKernel * const kernel, const std::vector<std::string> & patterns) {
     191    PabloBuilder entry(kernel->getEntryBlock());
     192
     193    Var * pat = kernel->getInputStreamVar("pat");
    193194
    194195    PabloAST * basisBits[4];
     
    199200    basisBits[3] = entry.createExtract(pat, 3, "G");
    200201
    201     re::Pattern_Compiler pattern_compiler(kernel);
     202    re::Pattern_Compiler pattern_compiler(*kernel);
    202203    pattern_compiler.compile(patterns, entry, basisBits, editDistance, optPosition, stepSize);
    203204
    204     pablo_function_passes(&kernel);
     205    pablo_function_passes(kernel);
    205206}
    206207
     
    224225
    225226
    226     ExternalFileBuffer ChStream(iBuilder, iBuilder->getStreamSetTy(4));
    227     SingleBlockBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(editDistance + 1));
    228 
    229     MMapSourceKernel mmapK(iBuilder);
    230     mmapK.setInitialArguments({fileSize});
    231     pxDriver.addKernelCall(mmapK, {}, {&ChStream});
    232 
    233     PabloKernel editdk(iBuilder, "editd",
    234                         {Binding{iBuilder->getStreamSetTy(4), "pat"}},
    235                         {Binding{iBuilder->getStreamSetTy(editDistance + 1), "E"}});
    236 
    237     ChStream.setStreamSetBuffer(inputStream);
    238     MatchResults.allocateBuffer();
    239 
    240     buildPatternKernel(editdk, iBuilder, patterns);
    241     pxDriver.addKernelCall(editdk, {&ChStream}, {&MatchResults});
    242 
    243     kernel::editdScanKernel editdScanK(iBuilder, editDistance);
    244     pxDriver.addKernelCall(editdScanK, {&MatchResults}, {});
     227    auto ChStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(4)));
     228
     229    auto mmapK = pxDriver.addKernelInstance(make_unique<kernel::FileSourceKernel>(iBuilder, inputType));
     230    mmapK->setInitialArguments({inputStream, fileSize});
     231
     232    pxDriver.makeKernelCall(mmapK, {}, {ChStream});
     233
     234    auto MatchResults = pxDriver.addBuffer(make_unique<SingleBlockBuffer>(iBuilder, iBuilder->getStreamSetTy(editDistance + 1)));
     235
     236    auto editdk = pxDriver.addKernelInstance(make_unique<PabloKernel>(
     237        iBuilder, "editd", std::vector<Binding>{{iBuilder->getStreamSetTy(4), "pat"}}, std::vector<Binding>{{iBuilder->getStreamSetTy(editDistance + 1), "E"}}
     238        ));
     239
     240    buildPatternKernel(reinterpret_cast<PabloKernel *>(editdk), patterns);
     241    pxDriver.makeKernelCall(editdk, {ChStream}, {MatchResults});
     242
     243    auto editdScanK = pxDriver.addKernelInstance(make_unique<editdScanKernel>(iBuilder, editDistance));
     244    pxDriver.makeKernelCall(editdScanK, {MatchResults}, {});
    245245       
    246246    pxDriver.generatePipelineIR();
     
    251251}
    252252
    253 void buildPreprocessKernel(PabloKernel & kernel, IDISA::IDISA_Builder * iBuilder) {
    254     cc::CC_Compiler ccc(&kernel, kernel.getInputStreamVar("basis"));
     253void buildPreprocessKernel(PabloKernel * const kernel) {
     254    cc::CC_Compiler ccc(kernel, kernel->getInputStreamVar("basis"));
    255255
    256256    PabloBuilder & pb = ccc.getBuilder();
     
    261261    PabloAST * G = ccc.compileCC(re::makeCC(re::makeCC(0x47), re::makeCC(0x67)), pb);
    262262
    263     Var * const pat = kernel.getOutputStreamVar("pat");
     263    Var * const pat = kernel->getOutputStreamVar("pat");
    264264
    265265    pb.createAssign(pb.createExtract(pat, 0), A);
     
    268268    pb.createAssign(pb.createExtract(pat, 3), G);
    269269
    270     pablo_function_passes(&kernel);
     270    pablo_function_passes(kernel);
    271271}
    272272
     
    277277    Type * mBitBlockType = iBuilder->getBitBlockType();
    278278   
    279     Type * const size_ty = iBuilder->getSizeTy();
     279
    280280    Type * const voidTy = iBuilder->getVoidTy();
    281     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
     281    Type * const int32Ty = iBuilder->getInt32Ty();
    282282    Type * const outputType = PointerType::get(ArrayType::get(mBitBlockType, 4), 0);
    283283   
    284     Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, size_ty, outputType, nullptr));
     284    Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, int32Ty, outputType, nullptr));
    285285    main->setCallingConv(CallingConv::C);
    286286    Function::arg_iterator args = main->arg_begin();
    287287   
    288     Value * const inputStream = &*(args++);
    289     inputStream->setName("input");
    290     Value * const fileSize = &*(args++);
    291     fileSize->setName("fileSize");
     288    Value * const fileDescriptor = &*(args++);
     289    fileDescriptor->setName("fileDescriptor");
    292290    Value * const outputStream = &*(args++);
    293291    outputStream->setName("output");
     292
    294293    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main));
    295294
    296     ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    297     SingleBlockBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8));
    298     ExternalFileBuffer CCResults(iBuilder, iBuilder->getStreamSetTy(4));
    299 
    300     MMapSourceKernel mmapK(iBuilder);
    301     mmapK.setInitialArguments({fileSize});
    302     pxDriver.addKernelCall(mmapK, {}, {&ByteStream});
    303    
    304     S2PKernel s2pk(iBuilder);
    305     pxDriver.addKernelCall(s2pk, {&ByteStream}, {&BasisBits});
    306 
    307     PabloKernel ccck(iBuilder, "ccc",
    308                 {{iBuilder->getStreamSetTy(8), "basis"}},
    309                 {{iBuilder->getStreamSetTy(4), "pat"}});
    310 
    311     buildPreprocessKernel(ccck, iBuilder);
    312     pxDriver.addKernelCall(ccck, {&BasisBits}, {&CCResults});
    313          
    314     ByteStream.setStreamSetBuffer(inputStream);
    315 
    316     BasisBits.allocateBuffer();
    317 
    318     CCResults.setStreamSetBuffer(outputStream);
    319    
     295    auto ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
     296
     297    auto mmapK = pxDriver.addKernelInstance(make_unique<kernel::MMapSourceKernel>(iBuilder, 1));
     298    mmapK->setInitialArguments({fileDescriptor});
     299    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
     300
     301    auto BasisBits = pxDriver.addBuffer(make_unique<SingleBlockBuffer>(iBuilder, iBuilder->getStreamSetTy(8)));
     302    auto s2pk = pxDriver.addKernelInstance(make_unique<S2PKernel>(iBuilder));
     303    pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     304
     305    auto CCResults = pxDriver.addExternalBuffer(make_unique<ExternalFileBuffer>(iBuilder, iBuilder->getStreamSetTy(4)), outputStream);
     306
     307
     308
     309    auto ccck = pxDriver.addKernelInstance(make_unique<PabloKernel>(
     310        iBuilder, "ccc", std::vector<Binding>{{iBuilder->getStreamSetTy(8), "basis"}}, std::vector<Binding>{{iBuilder->getStreamSetTy(4), "pat"}}
     311        ));
     312    buildPreprocessKernel(reinterpret_cast<PabloKernel *>(ccck));
     313    pxDriver.makeKernelCall(ccck, {BasisBits}, {CCResults});
     314             
    320315    pxDriver.generatePipelineIR();
    321316
     
    326321
    327322
    328 typedef void (*preprocessFunctionType)(char * byte_data, size_t filesize, char * output_data);
     323typedef void (*preprocessFunctionType)(const int fd, char * output_data);
    329324
    330325preprocessFunctionType preprocessCodeGen() {                           
     
    352347}
    353348
    354 char * chStream;
    355 int size;
     349static char * chStream;
     350static size_t size;
     351
     352size_t file_size(const int fd) {
     353    struct stat st;
     354    if (LLVM_UNLIKELY(fstat(fd, &st) != 0)) {
     355        st.st_size = 0;
     356    }
     357    return st.st_size;
     358}
     359
    356360
    357361char * preprocess(preprocessFunctionType fn_ptr) {
    358362    std::string fileName = inputFiles[0];
    359     size_t fileSize;
    360     char * fileBuffer;
    361    
    362     const boost::filesystem::path file(fileName);
    363     if (exists(file)) {
    364         if (is_directory(file)) {
    365             exit(0);
    366         }
    367     } else {
     363    const int fd = open(inputFiles[0].c_str(), O_RDONLY);
     364    if (LLVM_UNLIKELY(fd == -1)) {
    368365        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
    369         exit(0);
    370     }
    371    
    372     fileSize = file_size(file);
    373     boost::iostreams::mapped_file_source mappedFile;
    374     if (fileSize == 0) {
    375         fileBuffer = nullptr;
    376     }
    377     else {
    378         try {
    379             mappedFile.open(fileName);
    380         } catch (std::exception &e) {
    381             std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
    382             exit(0);
    383         }
    384         fileBuffer = const_cast<char *>(mappedFile.data());
    385     }
    386 
    387     int ret = posix_memalign((void**)&chStream, 32, fileSize);
     366        exit(-1);
     367    }
     368    size = file_size(fd);
     369    int ret = posix_memalign((void**)&chStream, 32, size);
    388370    if (ret) {
    389371        std::cerr << "Cannot allocate memory for output.\n";
    390         exit(-1);
    391     }
    392 
    393     fn_ptr(fileBuffer, fileSize, chStream);
    394     size = fileSize;
    395 
    396     mappedFile.close();
    397 
     372        exit(-2);
     373    }
     374    fn_ptr(fd, chStream);
     375    close(fd);
    398376    return chStream;   
    399377}
    400378
    401 void editd(editdFunctionType fn_ptr, char * inputStream, int size) {
     379void editd(editdFunctionType fn_ptr, char * inputStream, size_t size) {
    402380 
    403381    if (size == 0) {
     
    411389std::mutex count_mutex;
    412390size_t groupCount;
    413 void *DoEditd(void *threadid)
     391void * DoEditd(void *)
    414392{
    415393    size_t groupIdx;
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5416 r5418  
    1111#include <llvm/Support/CommandLine.h>
    1212#include <boost/filesystem.hpp>
    13 #include <boost/iostreams/device/mapped_file.hpp>
    1413#include <IR_Gen/idisa_builder.h>
    1514#include <IR_Gen/idisa_target.h>
     
    3332#include <sstream>
    3433#include <cc/multiplex_CCs.h>
    35 
    3634#include <llvm/Support/raw_ostream.h>
     35#include <util/aligned_allocator.h>
    3736#include <sys/stat.h>
    38 
    39 
    40 #ifdef CUDA_ENABLED
    41 #include <IR_Gen/CudaDriver.h>
    42 #include "preprocess.cpp"
    43 #endif
    44 #include <util/aligned_allocator.h>
     37#include <fcntl.h>
    4538
    4639using namespace parabix;
     
    6154static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
    6255
    63 #ifdef CUDA_ENABLED
    64 const auto IRFilename = "icgrep.ll";
    65 const auto PTXFilename = "icgrep.ptx";
    66 #endif
    67 
    6856static re::CC * parsedCodePointSet = nullptr;
    6957
    7058static std::vector<std::string> parsedPropertyValues;
    7159
    72 #ifdef CUDA_ENABLED
    73 int blockNo = 0;
    74 size_t * startPoints = nullptr;
    75 size_t * accumBytes = nullptr;
    76 #endif
    77 
    78 void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly) {
    79     boost::filesystem::path file(fileName);
    80     if (exists(file)) {
    81         if (is_directory(file)) {
    82             return;
    83         }
    84     } else {
    85         if (!SilenceFileErrors) {
    86             std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
    87             return;
    88         }
    89     }
    90 
    91     const auto fileSize = file_size(file);
    92     if (fileSize > 0) {
    93         try {
    94             boost::iostreams::mapped_file_source source(fileName, fileSize, 0);
    95             char * fileBuffer = const_cast<char *>(source.data());
    96            
    97 #ifdef CUDA_ENABLED 
    98             if(codegen::NVPTX){
    99                 codegen::BlockSize = 128;
    100                 char * LineBreak;
    101                 if (posix_memalign((void**)&LineBreak, 32, fileSize)) {
    102                     std::cerr << "Cannot allocate memory for linebreak.\n";
    103                     exit(-1);
    104                 }
    105                 std::vector<size_t> LFPositions = preprocess(fileBuffer, fileSize, LineBreak);
    106 
    107                 const unsigned numOfGroups = codegen::GroupNum;
    108                 if (posix_memalign((void**)&startPoints, 8, (numOfGroups+1)*sizeof(size_t)) ||
    109                     posix_memalign((void**)&accumBytes, 8, (numOfGroups+1)*sizeof(size_t))) {
    110                     std::cerr << "Cannot allocate memory for startPoints or accumBytes.\n";
    111                     exit(-1);
    112                 }
    113 
    114                 ulong * rslt = RunPTX(PTXFilename, fileBuffer, fileSize, CountOnly, LFPositions, startPoints, accumBytes);
    115                 if (CountOnly){
    116                     exit(0);
    117                 }
    118                 else{
    119                     size_t intputSize = startPoints[numOfGroups]-accumBytes[numOfGroups]+accumBytes[numOfGroups-1];
    120                     mGrepFunction_CPU((char *)rslt, LineBreak, fileBuffer, intputSize, fileIdx);
    121                     return;
    122                 }
    123                
    124             }
    125 #endif
    126             if (CountOnly) {
    127                 total_CountOnly[fileIdx] = mGrepFunction_CountOnly(fileBuffer, fileSize, fileIdx);
    128             } else {
    129                 mGrepFunction(fileBuffer, fileSize, fileIdx);
    130             }
    131             source.close();
    132         } catch (std::exception & e) {
    133             if (!SilenceFileErrors) {
    134                 std::cerr << "Boost mmap error: " + fileName + ": " + e.what() + " Skipped.\n";
    135                 return;
    136             }
    137         }
    138     } else {
    139 #ifdef CUDA_ENABLED
    140         if (codegen::NVPTX){
    141             std::cout << 0 << std::endl;
    142             exit(0);
    143         }
    144 #endif
    145         if (CountOnly) {
    146             total_CountOnly[fileIdx] = mGrepFunction_CountOnly(nullptr, 0, fileIdx);
    147         } else {
    148             mGrepFunction(nullptr, 0, fileIdx);
    149         }
    150     }
    151 }
    152 
    153 void GrepEngine::doGrep(const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly) {
    154     if (CountOnly) {
    155         total_CountOnly[fileIdx] = mGrepFunction_CountOnly(nullptr, 0, fileIdx);
    156     } else {
    157         mGrepFunction(nullptr, 0, fileIdx);
    158     }
    159 }
    160 
    161 #ifdef CUDA_ENABLED
    162 Function * generateGPUKernel(NVPTXDriver & nvptxDriver, bool CountOnly){
    163     IDISA::IDISA_Builder * iBuilder = nvptxDriver.getIDISA_Builder();
    164     Module * m = iBuilder->getModule();
    165     Type * const int64ty = iBuilder->getInt64Ty();
    166     Type * const size_ty = iBuilder->getSizeTy();
    167     Type * const int32ty = iBuilder->getInt32Ty();
    168     Type * const sizeTyPtr = PointerType::get(size_ty, 1);
    169     Type * const int64tyPtr = PointerType::get(int64ty, 1);
    170     Type * const inputType = PointerType::get(iBuilder->getInt8Ty(), 1);
    171     Type * const resultTy = iBuilder->getVoidTy();
    172     Function * kernelFunc = cast<Function>(m->getOrInsertFunction("Main", resultTy, inputType, sizeTyPtr, sizeTyPtr, int64tyPtr, nullptr));
    173     kernelFunc->setCallingConv(CallingConv::C);
    174     Function::arg_iterator args = kernelFunc->arg_begin();
    175 
    176     Value * const inputPtr = &*(args++);
    177     inputPtr->setName("inputPtr");
    178     Value * const startPointsPtr = &*(args++);
    179     startPointsPtr->setName("startPointsPtr");
    180     Value * const bufferSizesPtr = &*(args++);
    181     bufferSizesPtr->setName("bufferSizesPtr");
    182     Value * const outputPtr = &*(args++);
    183     outputPtr->setName("resultPtr");
    184 
    185     BasicBlock * entryBlock = BasicBlock::Create(m->getContext(), "entry", kernelFunc, 0);
    186     iBuilder->SetInsertPoint(entryBlock);
    187 
    188     Function * tidFunc = m->getFunction("llvm.nvvm.read.ptx.sreg.tid.x");
    189     Value * tid = iBuilder->CreateCall(tidFunc);
    190     Function * bidFunc = cast<Function>(m->getOrInsertFunction("llvm.nvvm.read.ptx.sreg.ctaid.x", int32ty, nullptr));
    191     Value * bid = iBuilder->CreateCall(bidFunc);
    192 
    193     Value * startPoint = iBuilder->CreateLoad(iBuilder->CreateGEP(startPointsPtr, bid));
    194 
    195     Function * mainFunc = m->getFunction("Main");
    196     Value * startBlock = iBuilder->CreateUDiv(startPoint, ConstantInt::get(int64ty, iBuilder->getBitBlockWidth()));
    197     Type * const inputStreamType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), 8), 1), 1);   
    198     Value * inputStreamPtr = iBuilder->CreateGEP(iBuilder->CreateBitCast(inputPtr, inputStreamType), startBlock);
    199     Value * inputStream = iBuilder->CreateGEP(inputStreamPtr, tid);
    200     Value * bufferSize = iBuilder->CreateLoad(iBuilder->CreateGEP(bufferSizesPtr, bid));
    201 
    202     if (CountOnly) {
    203         Value * strideBlocks = ConstantInt::get(int32ty, iBuilder->getStride() / iBuilder->getBitBlockWidth());
    204         Value * outputThreadPtr = iBuilder->CreateGEP(outputPtr, iBuilder->CreateAdd(iBuilder->CreateMul(bid, strideBlocks), tid));
    205         Value * result = iBuilder->CreateCall(mainFunc, {inputStream, bufferSize});
    206         iBuilder->CreateStore(result, outputThreadPtr);
    207     } else {
    208         Type * const outputStremType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), 1);
    209         Value * outputStreamPtr = iBuilder->CreateGEP(iBuilder->CreateBitCast(outputPtr, outputStremType), startBlock);
    210         Value * outputStream = iBuilder->CreateGEP(outputStreamPtr, tid);
    211         iBuilder->CreateCall(mainFunc, {inputStream, bufferSize, outputStream});
    212     }   
    213 
    214     iBuilder->CreateRetVoid();
    215 
    216     return kernelFunc;
    217 }
    218 
    219 void generateCPUKernel(ParabixDriver & pxDriver, GrepType grepType){
    220     IDISA::IDISA_Builder * iBuilder = pxDriver.getIDISA_Builder();
    221     Module * m = iBuilder->getModule();
    222 
    223     Type * const size_ty = iBuilder->getSizeTy();
    224     Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    225     Type * const rsltType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), 0);
    226     Function * const mainCPUFn = cast<Function>(m->getOrInsertFunction("Main", iBuilder->getVoidTy(), rsltType, rsltType, int8PtrTy, size_ty, size_ty, nullptr));
    227     mainCPUFn->setCallingConv(CallingConv::C);
    228     iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", mainCPUFn, 0));
    229     Function::arg_iterator args = mainCPUFn->arg_begin();
    230    
    231     Value * const rsltStream = &*(args++);
    232     rsltStream->setName("rslt");
    233     Value * const lbStream = &*(args++);
    234     lbStream->setName("lb");
    235     Value * const inputStream = &*(args++);
    236     inputStream->setName("input");
    237     Value * const fileSize = &*(args++);
    238     fileSize->setName("fileSize");
    239     Value * const fileIdx = &*(args++);
    240     fileIdx->setName("fileIdx");
    241 
    242     const unsigned segmentSize = codegen::SegmentSize;
    243    
    244     ExternalFileBuffer InputStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    245     InputStream.setStreamSetBuffer(inputStream);
    246 
    247     ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1));
    248     MatchResults.setStreamSetBuffer(rsltStream);
    249 
    250     kernel::MMapSourceKernel mmapK0(iBuilder, segmentSize);
    251     mmapK0.setName("mmap0");
    252     mmapK0.setInitialArguments({fileSize});
    253     pxDriver.addKernelCall(mmapK0, {}, {InputStream});
    254 
    255 
    256     kernel::MMapSourceKernel mmapK1(iBuilder, segmentSize);
    257     mmapK1.setName("mmap1");
    258     mmapK1.setInitialArguments({fileSize});
    259     pxDriver.addKernelCall(mmapK1, {}, {MatchResults});
    260 
    261     ExternalFileBuffer LineBreak(iBuilder, iBuilder->getStreamSetTy(1, 1));
    262     LineBreak.setStreamSetBuffer(lbStream);
    263    
    264     kernel::MMapSourceKernel mmapK2(iBuilder, segmentSize);
    265     mmapK2.setName("mmap2");
    266     mmapK2.setInitialArguments({fileSize});
    267     pxDriver.addKernelCall(mmapK2, {}, {LineBreak});
    268 
    269     kernel::ScanMatchKernel scanMatchK(iBuilder, grepType, 8);
    270     scanMatchK.setInitialArguments({fileIdx});
    271     pxDriver.addKernelCall(scanMatchK, {InputStream, MatchResults, LineBreak}, {});
    272     pxDriver.generatePipelineIR();
    273     iBuilder->CreateRetVoid();
    274 
    275     pxDriver.linkAndFinalize();
    276 }
    277 #endif
     60uint64_t GrepEngine::doGrep(const std::string & fileName, const int fileIdx) const {
     61    const int fd = open(fileName.c_str(), O_RDONLY);
     62    if (LLVM_UNLIKELY(fd == -1)) {
     63        return 0;
     64    }
     65    const auto result = doGrep(fd, fileIdx);
     66    close(fd);
     67    return result;
     68}
     69
     70uint64_t GrepEngine::doGrep(const uint32_t fileDescriptor, const int fileIdx) const {
     71    assert (mGrepFunction);
     72    typedef uint64_t (*GrepFunctionType)(size_t fileDescriptor, const int fileIdx);
     73    return reinterpret_cast<GrepFunctionType>(mGrepFunction)(fileDescriptor, fileIdx);
     74}
     75
     76void GrepEngine::doGrep(const char * buffer, const uint64_t length, const int fileIdx) const {
     77    assert (mGrepFunction);
     78    typedef uint64_t (*GrepFunctionType)(const char * buffer, const uint64_t length, const int fileIdx);
     79    reinterpret_cast<GrepFunctionType>(mGrepFunction)(buffer, length, fileIdx);
     80}
    27881
    27982static int * total_count;
     
    297100template<typename CodeUnit>
    298101void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const size_t fileIdx) {
     102
     103//    errs().write_hex((size_t)buffer) << " : " << lineNum << " (" << line_start << ", " << line_end << ", " << filesize << ")\n";
     104
    299105    assert (buffer);
    300106    assert (line_start <= line_end);
    301107    assert (line_end <= filesize);
    302 
    303   //  errs().write_hex((size_t)buffer) << " : " << lineNum << " (" << line_start << ", " << line_end << ", " << filesize << ")\n";
    304 
    305     #ifdef CUDA_ENABLED
    306     if (codegen::NVPTX){
    307         while(line_start>startPoints[blockNo]) blockNo++;
    308         line_start -= accumBytes[blockNo-1];
    309         line_end -= accumBytes[blockNo-1];
    310     }
    311     #endif
    312108
    313109    if (ShowFileNames) {
     
    416212}
    417213
    418 void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, const bool CountOnly, const bool UTF_16, const GrepType grepType, const bool usingStdIn) {
    419     int addrSpace = 0;
    420     bool CPU_Only = true;
    421     Module * M = nullptr;
    422     IDISA::IDISA_Builder * iBuilder = nullptr;
    423 
    424     #ifdef CUDA_ENABLED
    425     setNVPTXOption();
    426     if (codegen::NVPTX) {
    427         Module * gpuM = new Module(moduleName+":gpu", getGlobalContext());
    428         IDISA::IDISA_Builder * GPUBuilder = IDISA::GetIDISA_GPU_Builder(gpuM);
    429         M = gpuM;
    430         iBuilder = GPUBuilder;
    431         M->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64");
    432         M->setTargetTriple("nvptx64-nvidia-cuda");
    433         addrSpace = 1;
    434         CPU_Only = false;
    435         codegen::BlockSize = 64;
    436     }
    437     #endif
    438 
    439     Module * cpuM = new Module(moduleName + ":cpu", getGlobalContext());
    440     IDISA::IDISA_Builder * CPUBuilder = IDISA::GetIDISA_Builder(cpuM);
    441     if (CPU_Only) {
    442         M = cpuM;
    443         iBuilder = CPUBuilder;
    444     }
     214void GrepEngine::grepCodeGen(std::string moduleName, std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16, GrepSource grepSource, const GrepType grepType) {
     215
     216    Module * M = new Module(moduleName + ":icgrep", getGlobalContext());;
     217    IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);;
    445218    ParabixDriver pxDriver(iBuilder);
    446219
    447     // segment size made available for each call to the mmap source kernel
    448220    const unsigned segmentSize = codegen::SegmentSize;
    449221    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    450222    const unsigned encodingBits = UTF_16 ? 16 : 8;
    451223
    452     Type * const size_ty = iBuilder->getSizeTy();
    453     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), addrSpace);
    454     Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
    455 
    456     Function * mainFn = nullptr;
    457     Value * inputStream = nullptr;
    458     Value * fileSize = nullptr;
     224    Type * const int64Ty = iBuilder->getInt64Ty();
     225
     226    Function * mainFunc = nullptr;
    459227    Value * fileIdx = nullptr;
    460 
    461     #ifdef CUDA_ENABLED
    462     Value * outputStream = nullptr;
    463     Type * const outputType = PointerType::get(ArrayType::get(iBuilder->getBitBlockType(), 1), addrSpace);
    464     if (codegen::NVPTX){
    465         if (CountOnly){
    466             mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, nullptr));
    467             mainFn->setCallingConv(CallingConv::C);
    468             iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    469             Function::arg_iterator args = mainFn->arg_begin();
    470 
    471             inputStream = &*(args++);
    472             inputStream->setName("input");
    473             fileSize = &*(args++);
    474             fileSize->setName("fileSize");
    475         } else {
    476             mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, outputType, nullptr));
    477             mainFn->setCallingConv(CallingConv::C);
    478             iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    479             Function::arg_iterator args = mainFn->arg_begin();
    480 
    481             inputStream = &*(args++);
    482             inputStream->setName("input");
    483             fileSize = &*(args++);
    484             fileSize->setName("fileSize");
    485             outputStream = &*(args++);
    486             outputStream->setName("output");
    487         }
    488     }
    489     #endif
    490 
    491     if (CPU_Only) {
    492         mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
    493         mainFn->setCallingConv(CallingConv::C);
    494         iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    495         Function::arg_iterator args = mainFn->arg_begin();
    496 
    497         inputStream = &*(args++);
    498         inputStream->setName("input");
    499         fileSize = &*(args++);
    500         fileSize->setName("fileSize");
     228    StreamSetBuffer * ByteStream = nullptr;
     229    kernel::KernelBuilder * sourceK = nullptr;
     230
     231    if (grepSource == GrepSource::Internal) {
     232
     233        mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, iBuilder->getInt8PtrTy(), int64Ty, int64Ty, nullptr));
     234        mainFunc->setCallingConv(CallingConv::C);
     235        iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     236        Function::arg_iterator args = mainFunc->arg_begin();
     237
     238        Value * const buffer = &*(args++);
     239        buffer->setName("buffer");
     240        Value * const length = &*(args++);
     241        length->setName("length");
    501242        fileIdx = &*(args++);
    502243        fileIdx->setName("fileIdx");
    503244
    504     }
    505 
    506     StreamSetBuffer * ByteStream = nullptr;
    507     kernel::KernelBuilder * sourceK = nullptr;
    508     if (usingStdIn) {
    509         // TODO: use fstat(STDIN_FILENO) to see if we can mmap the stdin safely and avoid the calls to read
    510         ByteStream = pxDriver.addBuffer(make_unique<ExtensibleBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize));
    511         sourceK = pxDriver.addKernelInstance(make_unique<kernel::StdInKernel>(iBuilder, segmentSize));
     245        ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
     246
     247        sourceK = pxDriver.addKernelInstance(make_unique<kernel::FileSourceKernel>(iBuilder, iBuilder->getInt8PtrTy(), segmentSize));
     248        sourceK->setInitialArguments({buffer, length});
     249
    512250    } else {
    513         ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
    514         sourceK = pxDriver.addKernelInstance(make_unique<kernel::FileSourceKernel>(iBuilder, inputStream->getType(), segmentSize));
    515         sourceK->setInitialArguments({inputStream, fileSize});
    516     }
    517     pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    518    
    519     StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
    520 
    521     kernel::KernelBuilder * s2pk = pxDriver.addKernelInstance(make_unique<kernel::S2PKernel>(iBuilder));
    522     pxDriver.makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    523 
    524     kernel::KernelBuilder * linebreakK = pxDriver.addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(iBuilder, encodingBits));
    525     StreamSetBuffer * LineBreakStream = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    526     pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
    527    
    528     StreamSetBuffer * MatchResults = nullptr;
    529 #ifdef CUDA_ENABLED
    530     if (codegen::NVPTX){
    531         MatchResults = pxDriver.addExternalBuffer(make_unique<ExternalFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), addrSpace), outputStream);
    532 
    533     }
    534     else {
    535 #endif
    536     MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    537 #ifdef CUDA_ENABLED
    538     }
    539 #endif
    540     kernel::KernelBuilder * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICgrepKernelBuilder>(iBuilder, re_ast));
    541     pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
    542    
    543     kernel::KernelBuilder * invertK = pxDriver.addKernelInstance(make_unique<kernel::InvertMatchesKernel>(iBuilder));
    544     if (AlgorithmOptionIsSet(re::InvertMatches)) {
    545         StreamSetBuffer * OriginalMatches = MatchResults;
    546         MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    547         pxDriver.makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {MatchResults});
    548     }
    549 
    550     if (CountOnly) {
    551         kernel::KernelBuilder * popcountK = pxDriver.addKernelInstance(make_unique<kernel::PopcountKernel>(iBuilder));
    552         pxDriver.makeKernelCall(popcountK, {MatchResults}, {});
    553         pxDriver.generatePipelineIR();
    554         iBuilder->CreateRet(popcountK->createGetAccumulatorCall("countResult"));
    555 
    556         pxDriver.linkAndFinalize();
    557 
    558     } else {
    559 
    560         #ifdef CUDA_ENABLED
    561         if (codegen::NVPTX){
    562 
    563             pxDriver.generatePipelineIR();
    564 
    565             iBuilder->CreateRetVoid();
    566 
    567             pxDriver.linkAndFinalize();
    568         }
    569         #endif
    570 
    571         if (CPU_Only) {
    572             kernel::KernelBuilder * scanMatchK = pxDriver.addKernelInstance(make_unique<kernel::ScanMatchKernel>(iBuilder, grepType, encodingBits));
    573             scanMatchK->setInitialArguments({fileIdx});
    574 
    575             pxDriver.makeKernelCall(scanMatchK, {MatchResults, LineBreakStream, ByteStream}, {});
    576 
    577             linkGrepFunction(pxDriver, grepType, UTF_16, *scanMatchK);
    578 
    579             pxDriver.generatePipelineIR();
    580 
    581             iBuilder->CreateRetVoid();
    582 
    583             pxDriver.linkAndFinalize();
    584         }
    585     }
    586 
    587     #ifdef CUDA_ENABLED
    588     if(codegen::NVPTX){
    589         NVPTXDriver nvptxDriver(iBuilder);
    590         Function * kernelFunction = generateGPUKernel(nvptxDriver, CountOnly);
    591        
    592         MDNode * Node = MDNode::get(M->getContext(),
    593                                     {llvm::ValueAsMetadata::get(kernelFunction),
    594                                      MDString::get(M->getContext(), "kernel"),
    595                                      ConstantAsMetadata::get(ConstantInt::get(iBuilder->getInt32Ty(), 1))});
    596         NamedMDNode *NMD = M->getOrInsertNamedMetadata("nvvm.annotations");
    597         NMD->addOperand(Node);
    598 
    599         Compile2PTX(M, IRFilename, PTXFilename);
    600        
    601         ParabixDriver pxDriver(CPUBuilder);
    602         generateCPUKernel(pxDriver, grepType);
    603        
    604         mGrepFunction_CPU = reinterpret_cast<GrepFunctionType_CPU>(pxDriver.getPointerToMain());
    605         if (CountOnly) return;
    606     }
    607     #endif
    608 
    609     delete iBuilder;
    610 
    611     if (CountOnly) {
    612         mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(pxDriver.getPointerToMain());
    613     } else {
    614         if (CPU_Only) {
    615             mGrepFunction = reinterpret_cast<GrepFunctionType>(pxDriver.getPointerToMain());
    616         }
    617     }
    618 }
    619 
    620 
    621 
    622 void GrepEngine::grepCodeGen(std::string moduleName, std::vector<re::RE *> REs, const bool CountOnly, const bool UTF_16, const GrepType grepType, const bool usingStdIn) {
    623 
    624     Module * M = new Module(moduleName + ":icgrep", getGlobalContext());;
    625     IDISA::IDISA_Builder * iBuilder = IDISA::GetIDISA_Builder(M);;
    626     ParabixDriver pxDriver(iBuilder);
    627 
    628     const unsigned segmentSize = codegen::SegmentSize;
    629     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    630     const unsigned encodingBits = UTF_16 ? 16 : 8;
    631 
    632     Type * const sizeTy = iBuilder->getSizeTy();
    633     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), 0);
    634     Type * const resultTy = CountOnly ? sizeTy : iBuilder->getVoidTy();
    635 
    636     Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, sizeTy, sizeTy, nullptr));
    637     mainFn->setCallingConv(CallingConv::C);
    638     iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
    639     Function::arg_iterator args = mainFn->arg_begin();
    640 
    641     Value * inputStream = &*(args++);
    642     inputStream->setName("input");
    643     Value * fileSize = &*(args++);
    644     fileSize->setName("fileSize");
    645     Value * fileIdx = &*(args++);
    646     fileIdx->setName("fileIdx");
    647 
    648     StreamSetBuffer * ByteStream = nullptr;
    649     kernel::KernelBuilder * sourceK = nullptr;
    650     if (usingStdIn) {
    651         // TODO: use fstat(STDIN_FILENO) to see if we can mmap the stdin safely and avoid the calls to read
    652         ByteStream = pxDriver.addBuffer(make_unique<ExtensibleBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize));
    653         sourceK = pxDriver.addKernelInstance(make_unique<kernel::StdInKernel>(iBuilder, segmentSize));
    654     } else {
    655         ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
    656         sourceK = pxDriver.addKernelInstance(make_unique<kernel::FileSourceKernel>(iBuilder, inputStream->getType(), segmentSize));
    657         sourceK->setInitialArguments({inputStream, fileSize});
    658     }
     251
     252        mainFunc = cast<Function>(M->getOrInsertFunction("Main", int64Ty, iBuilder->getInt32Ty(), int64Ty, nullptr));
     253        mainFunc->setCallingConv(CallingConv::C);
     254        iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     255        Function::arg_iterator args = mainFunc->arg_begin();
     256
     257        Value * const fileDescriptor = &*(args++);
     258        fileDescriptor->setName("fileDescriptor");
     259        fileIdx = &*(args++);
     260        fileIdx->setName("fileIdx");
     261
     262        if (grepSource == GrepSource::File) {
     263            ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
     264            sourceK = pxDriver.addKernelInstance(make_unique<kernel::MMapSourceKernel>(iBuilder, segmentSize));
     265            sourceK->setInitialArguments({fileDescriptor});
     266        } else { // if (grepSource == GrepSource::StdIn) {
     267            ByteStream = pxDriver.addBuffer(make_unique<ExtensibleBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize));
     268            sourceK = pxDriver.addKernelInstance(make_unique<kernel::StdInKernel>(iBuilder, segmentSize));
     269        }
     270    }
     271
    659272    pxDriver.makeKernelCall(sourceK, {}, {ByteStream});
    660273    StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1), segmentSize * bufferSegments));
     
    667280    pxDriver.makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
    668281   
    669     std::vector<StreamSetBuffer *> MatchResultsBufs;
    670 
    671     for(unsigned i = 0; i < REs.size(); ++i){
     282    const auto n = REs.size();
     283
     284    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
     285
     286    for(unsigned i = 0; i < n; ++i){
    672287        StreamSetBuffer * MatchResults = pxDriver.addBuffer(make_unique<CircularBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    673288        kernel::KernelBuilder * icgrepK = pxDriver.addKernelInstance(make_unique<kernel::ICgrepKernelBuilder>(iBuilder, REs[i]));
    674289        pxDriver.makeKernelCall(icgrepK, {BasisBits, LineBreakStream}, {MatchResults});
    675         MatchResultsBufs.push_back(MatchResults);
     290        MatchResultsBufs[i] = MatchResults;
    676291    }
    677292    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
     
    700315        linkGrepFunction(pxDriver, grepType, UTF_16, scanMatchK);
    701316        pxDriver.generatePipelineIR();
    702         iBuilder->CreateRetVoid();
     317        iBuilder->CreateRet(iBuilder->getInt64(0));
    703318        pxDriver.linkAndFinalize();
    704319    }
    705320
    706     //delete iBuilder;
    707 
    708     if (CountOnly) {
    709         mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(pxDriver.getPointerToMain());
    710     } else {
    711         mGrepFunction = reinterpret_cast<GrepFunctionType>(pxDriver.getPointerToMain());
    712     }
     321    mGrepFunction = pxDriver.getPointerToMain();
    713322}
    714323
     
    717326    char * mFileBuffer = getUnicodeNameDataPtr();
    718327    size_t mFileSize = getUnicodeNameDataSize();
    719     mGrepFunction(mFileBuffer, mFileSize, 0);
     328    doGrep(mFileBuffer, mFileSize, 0);
    720329    return parsedCodePointSet;
    721330}
     
    732341    std::memcpy(aligned, str.data(), n);
    733342    std::memset(aligned + n, 0, MaxSupportedVectorWidthInBytes);
    734     mGrepFunction(aligned, n, 0);
     343    doGrep(aligned, n, 0);
    735344    alloc.deallocate(aligned, 0);
    736345    return parsedPropertyValues;
     
    738347
    739348GrepEngine::GrepEngine()
    740 : mGrepFunction(nullptr)
    741 , mGrepFunction_CountOnly(nullptr)
    742 #ifdef CUDA_ENABLED
    743 , mGrepFunction_CPU(nullptr)
    744 #endif
    745 {
    746 
    747 }
     349: mGrepFunction(nullptr) {
     350
     351}
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5406 r5418  
    1414namespace re { class RE; }
    1515
    16     typedef void (*GrepFunctionType)(char * byte_data, size_t filesize, const int fileIdx);
    17     typedef uint64_t (*GrepFunctionType_CountOnly)(char * byte_data, size_t filesize, const int fileIdx);
    18     typedef void (*GrepFunctionType_CPU)(char * rslt, char * LineBreak, char * byte_data, size_t filesize, const int fileIdx);
    1916class GrepEngine {
    2017public:
     
    2219    GrepEngine();
    2320
    24     void grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal, const bool usingStdIn = false);
     21    void grepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16, GrepSource grepSource, GrepType grepType = GrepType::Normal);
    2522
    26     void grepCodeGen(std::string moduleName, std::vector<re::RE *> REs, bool CountOnly, bool UTF_16 = false, GrepType grepType = GrepType::Normal, const bool usingStdIn = false);
    27      
    28     void doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly);
     23    uint64_t doGrep(const std::string & fileName, const int fileIdx) const;
    2924
    30     void doGrep(const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly);
     25    uint64_t doGrep(const uint32_t fileDescriptor, const int fileIdx) const;
    3126   
    32     re::CC *  grepCodepoints();
     27    void doGrep(const char * buffer, const uint64_t length, const int fileIdx) const;
     28
     29    re::CC * grepCodepoints();
    3330
    3431    const std::vector<std::string> & grepPropertyValues(const std::string & propertyName);
     
    3633private:
    3734   
    38     GrepFunctionType mGrepFunction;
    39     GrepFunctionType_CountOnly mGrepFunction_CountOnly;
    40 #ifdef CUDA_ENABLED
    41     GrepFunctionType_CPU mGrepFunction_CPU;
    42 #endif
     35    void * mGrepFunction;
    4336};
    4437
  • icGREP/icgrep-devel/icgrep/grep_type.h

    r5206 r5418  
    88#define ICGREP_GREP_TYPE_H
    99
    10 enum GrepType {Normal, NameExpression, PropertyValue};
     10enum class GrepType {
     11    Normal
     12    , NameExpression
     13    , PropertyValue
     14};
     15
     16enum class GrepSource {
     17    File
     18    , StdIn
     19    , Internal
     20};
    1121
    1222#endif //ICGREP_GREP_TYPE_H
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r5408 r5418  
    233233hrtime.h
    234234icgrep.cpp
    235 preprocess.cpp
    236235u8u16.cpp
    237236utf16_encoder.cpp
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5402 r5418  
    3131#include <util/papi_helper.hpp>
    3232#endif
     33#include <sys/stat.h>
     34#include <fcntl.h>
    3335
    3436using namespace llvm;
     
    113115static std::string allREs;
    114116static re::ModeFlagSet globalFlags = 0;
    115 std::vector<re::RE *> RELists;
    116 
    117 re::RE * get_icgrep_RE() {
     117
     118std::vector<re::RE *> readExpressions() {
    118119 
    119     //std::vector<std::string> regexVector;
    120120    if (RegexFilename != "") {
    121121        std::ifstream regexFile(RegexFilename.c_str());
     
    136136        inputFiles.erase(inputFiles.begin());
    137137    }
    138     if (CaseInsensitive) globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
     138    if (CaseInsensitive) {
     139        globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
     140    }
    139141
    140142    std::vector<re::RE *> REs;
    141     re::RE * re_ast = nullptr;
    142143    for (unsigned i = 0; i < regexVector.size(); i++) {
    143144#ifdef FUTURE
    144         re_ast = re::RE_Parser::parse(regexVector[i], globalFlags, RegexpSyntax);
     145        re::RE * re_ast = re::RE_Parser::parse(regexVector[i], globalFlags, RegexpSyntax);
    145146#else
    146         re_ast = re::RE_Parser::parse(regexVector[i], globalFlags);
     147        re::RE * re_ast = re::RE_Parser::parse(regexVector[i], globalFlags);
    147148#endif
    148149        REs.push_back(re_ast);
     
    150151    }
    151152
    152     std::vector<re::RE *>::iterator start = REs.begin();
    153     std::vector<re::RE *>::iterator end = start + REsPerGroup;
    154     while(end < REs.end()) {
    155         RELists.push_back(re::makeAlt(start, end));
    156         start = end;
    157         end += REsPerGroup;
    158     }
    159     if(REs.end()-start>1)
    160         RELists.push_back(re::makeAlt(start, REs.end()));
    161     else
    162         RELists.push_back(*start);
    163 
    164     if (REs.size() > 1) {
    165         re_ast = re::makeAlt(REs.begin(), REs.end());
    166     }
    167     if (WholeWordMatching) {
    168         re_ast = re::makeSeq({re::makeWordBoundary(), re_ast, re::makeWordBoundary()});
    169     }
    170     if (EntireLineMatching) {
    171         re_ast = re::makeSeq({re::makeStart(), re_ast, re::makeEnd()});
    172     }   
    173     return re_ast;
     153    if (MultiGrepKernels) {
     154        std::vector<re::RE *> groups;
     155        auto start = REs.begin();
     156        auto end = start + REsPerGroup;
     157        while (end < REs.end()) {
     158            groups.push_back(re::makeAlt(start, end));
     159            start = end;
     160            end += REsPerGroup;
     161        }
     162        if ((REs.end() - start) > 1) {
     163            groups.push_back(re::makeAlt(start, REs.end()));
     164        } else {
     165            groups.push_back(*start);
     166        }
     167        REs.swap(groups);
     168    } else if (REs.size() > 1) {
     169        re::RE * re_ast = re::makeAlt(REs.begin(), REs.end());
     170        REs.assign({re_ast});
     171    }
     172
     173    for (re::RE *& re_ast : REs) {
     174        if (WholeWordMatching) {
     175            re_ast = re::makeSeq({re::makeWordBoundary(), re_ast, re::makeWordBoundary()});
     176        }
     177        if (EntireLineMatching) {
     178            re_ast = re::makeSeq({re::makeStart(), re_ast, re::makeEnd()});
     179        }
     180    }
     181
     182    return REs;
    174183}
    175184
     
    199208    count_mutex.unlock();
    200209
    201     while (fileIdx < allFiles.size()){
    202         grepEngine->doGrep(allFiles[fileIdx], fileIdx, CountOnly, total_CountOnly);
     210    while (fileIdx < allFiles.size()) {
     211        total_CountOnly[fileIdx] = grepEngine->doGrep(allFiles[fileIdx], fileIdx);
    203212       
    204213        count_mutex.lock();
     
    384393    }
    385394#endif
    386     re::RE * re_ast = get_icgrep_RE();
     395
     396    const auto REs = readExpressions();
     397
    387398    std::string module_name = "grepcode:" + sha1sum(allREs) + ":" + std::to_string(globalFlags);
    388399   
     
    399410    if (allFiles.empty()) {
    400411
    401         grepEngine.grepCodeGen(module_name, re_ast, CountOnly, UTF_16, GrepType::Normal, true);
     412        grepEngine.grepCodeGen(module_name, REs, CountOnly, UTF_16, GrepSource::StdIn);
    402413        allFiles = { "-" };
    403414        initFileResult(allFiles);
    404         total_CountOnly.push_back(0);
    405         grepEngine.doGrep(0, CountOnly, total_CountOnly);
     415        total_CountOnly.resize(1);
     416        total_CountOnly[0] = grepEngine.doGrep(STDIN_FILENO, 0);
    406417
    407418    } else {
    408419
    409         if (MultiGrepKernels) {
    410             grepEngine.grepCodeGen(module_name, RELists, CountOnly, UTF_16);
    411         } else {
    412             grepEngine.grepCodeGen(module_name, re_ast, CountOnly, UTF_16, GrepType::Normal, false);
    413         }
     420        grepEngine.grepCodeGen(module_name, REs, CountOnly, UTF_16, GrepSource::File);
    414421
    415422        if (FileNamesOnly && NonMatchingFileNamesOnly) {
     
    433440        }
    434441        initFileResult(allFiles);
    435 
    436         for (unsigned i=0; i < allFiles.size(); ++i){
    437             total_CountOnly.push_back(0);
    438         }
     442        total_CountOnly.resize(allFiles.size());
    439443
    440444        if (Threads <= 1) {
    441 
    442             #ifdef PRINT_TIMING_INFORMATION
    443             // PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY
    444             // PAPI_RES_STL, PAPI_BR_MSP, PAPI_LST_INS, PAPI_L1_TCM
    445             papi::PapiCounter<4> papiCounters({PAPI_RES_STL, PAPI_STL_CCY, PAPI_FUL_CCY, PAPI_MEM_WCY});
    446             #endif
    447445            for (unsigned i = 0; i != allFiles.size(); ++i) {
    448                 #ifdef PRINT_TIMING_INFORMATION
    449                 papiCounters.start();
    450                 const timestamp_t execution_start = read_cycle_counter();
    451                 #endif
    452                 grepEngine.doGrep(allFiles[i], i, CountOnly, total_CountOnly);
    453                 #ifdef PRINT_TIMING_INFORMATION
    454                 const timestamp_t execution_end = read_cycle_counter();
    455                 papiCounters.stop();
    456                 std::cerr << "EXECUTION TIME: " << allFiles[i] << ":" << "CYCLES|" << (execution_end - execution_start) << papiCounters << std::endl;
    457                 #endif
     446                total_CountOnly[i] = grepEngine.doGrep(allFiles[i], i);
    458447            }
    459448        } else if (Threads > 1) {
     
    467456                }
    468457            }
    469 
    470458            for(unsigned i = 0; i < numOfThreads; ++i) {
    471459                void * status = nullptr;
     
    478466
    479467    }
    480 
    481468   
    482469    PrintResult(CountOnly, total_CountOnly);
  • icGREP/icgrep-devel/icgrep/kernels/interface.cpp

    r5411 r5418  
    1111#include <llvm/IR/Module.h>
    1212#include <IR_Gen/idisa_builder.h>
    13 namespace llvm { class Module; }
    14 namespace llvm { class Type; }
    1513
    1614static const auto INIT_SUFFIX = "_Init";
    1715
    18 static const auto TERMINATE_SUFFIX = "_Terminate";
    19 
    2016static const auto DO_SEGMENT_SUFFIX = "_DoSegment";
    2117
    22 static const auto ACCUMULATOR_INFIX = "_get_";
     18static const auto TERMINATE_SUFFIX = "_Terminate";
    2319
    2420using namespace llvm;
     
    7369
    7470void KernelInterface::addKernelDeclarations(Module * const client) {
    75     Module * saveModule = iBuilder->getModule();
    76     auto savePoint = iBuilder->saveIP();
     71    Module * const saveModule = iBuilder->getModule();
    7772    iBuilder->setModule(client);
    7873    if (mKernelStateType == nullptr) {
     
    8277    IntegerType * const sizeTy = iBuilder->getSizeTy();
    8378    PointerType * const consumerTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
     79    Type * const voidTy = iBuilder->getVoidTy();
    8480
    8581    // Create the initialization function prototype
     
    9086    initParameters.insert(initParameters.end(), mStreamSetOutputs.size(), consumerTy);
    9187
    92     FunctionType * initType = FunctionType::get(iBuilder->getVoidTy(), initParameters, false);
    93     Function * init = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, client);
    94     init->setCallingConv(CallingConv::C);
    95     init->setDoesNotThrow();
    96     auto args = init->arg_begin();
     88    FunctionType * const initType = FunctionType::get(voidTy, initParameters, false);
     89    Function * const initFunc = Function::Create(initType, GlobalValue::ExternalLinkage, getName() + INIT_SUFFIX, client);
     90    initFunc->setCallingConv(CallingConv::C);
     91    initFunc->setDoesNotThrow();
     92    auto args = initFunc->arg_begin();
    9793    args->setName("self");
    9894    for (auto binding : mScalarInputs) {
     
    10096    }
    10197    for (auto binding : mStreamSetOutputs) {
    102         args->setName(binding.name + "ConsumerLogicalSegments");       
    103         ++args;
     98        (args++)->setName(binding.name + "ConsumerLocks");
    10499    }
    105100
     
    108103    params.insert(params.end(), mStreamSetInputs.size(), sizeTy);
    109104
    110     FunctionType * const doSegmentType = FunctionType::get(iBuilder->getVoidTy(), params, false);
    111     Function * doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, client);
     105    FunctionType * const doSegmentType = FunctionType::get(voidTy, params, false);
     106    Function * const doSegment = Function::Create(doSegmentType, GlobalValue::ExternalLinkage, getName() + DO_SEGMENT_SUFFIX, client);
    112107    doSegment->setCallingConv(CallingConv::C);
    113108    doSegment->setDoesNotThrow();
    114     doSegment->setDoesNotCapture(1); // for self parameter only.
     109    doSegment->setDoesNotCapture(1); // for self parameter only.   
    115110    args = doSegment->arg_begin();
    116111    args->setName("self");
     
    121116
    122117    // Create the terminate function prototype
    123     FunctionType * terminateType = FunctionType::get(iBuilder->getVoidTy(), {selfType}, false);
    124     Function * terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, client);
     118    Type * resultType = nullptr;
     119    if (mScalarOutputs.empty()) {
     120        resultType = iBuilder->getVoidTy();
     121    } else {
     122        const auto n = mScalarOutputs.size();
     123        Type * outputType[n];
     124        for (unsigned i = 0; i < n; ++i) {
     125            outputType[i] = mScalarOutputs[i].type;
     126        }
     127        if (n == 1) {
     128            resultType = outputType[0];
     129        } else {
     130            resultType = StructType::get(iBuilder->getContext(), ArrayRef<Type *>(outputType, n));
     131        }
     132    }
     133    FunctionType * const terminateType = FunctionType::get(resultType, {selfType}, false);
     134    Function * const terminateFunc = Function::Create(terminateType, GlobalValue::ExternalLinkage, getName() + TERMINATE_SUFFIX, client);
    125135    terminateFunc->setCallingConv(CallingConv::C);
    126136    terminateFunc->setDoesNotThrow();
     
    129139    args->setName("self");
    130140
    131     /// INVESTIGATE: replace the accumulator methods with a single Exit method that handles any clean up and returns
    132     /// a struct containing all scalar outputs?
    133 
    134     // Create the accumulator get function prototypes
    135     for (const auto & binding : mScalarOutputs) {
    136         FunctionType * accumFnType = FunctionType::get(binding.type, {selfType}, false);
    137         Function * accumFn = Function::Create(accumFnType, GlobalValue::ExternalLinkage, getName() + ACCUMULATOR_INFIX + binding.name, client);
    138         accumFn->setCallingConv(CallingConv::C);
    139         accumFn->setDoesNotThrow();
    140         accumFn->setDoesNotCapture(1);
    141         auto args = accumFn->arg_begin();
    142         args->setName("self");
    143     }
    144 
    145141    iBuilder->setModule(saveModule);
    146     iBuilder->restoreIP(savePoint);
    147142}
    148143
    149144void KernelInterface::setInitialArguments(std::vector<Value *> args) {
    150145    mInitialArguments = args;
    151 }
    152 
    153 llvm::Function * KernelInterface::getAccumulatorFunction(const std::string & accumName) const {
    154     const auto name = getName() + ACCUMULATOR_INFIX + accumName;
    155     Function * f = iBuilder->getModule()->getFunction(name);
    156     if (LLVM_UNLIKELY(f == nullptr)) {
    157         llvm::report_fatal_error("Cannot find " + name);
    158     }
    159     return f;
    160146}
    161147
  • icGREP/icgrep-devel/icgrep/kernels/interface.h

    r5411 r5418  
    77#define KERNEL_INTERFACE_H
    88
    9 #include <string>  // for string
    10 #include <vector>  // for vector
     9#include <llvm/IR/Constants.h>
     10#include <string>
     11#include <vector>
     12
    1113namespace IDISA { class IDISA_Builder; }
    12 //namespace llvm { class ConstantInt; }
    13 #include <llvm/IR/Constants.h>
    14 namespace llvm { class Function; }
    15 namespace llvm { class Module; }
    16 namespace llvm { class PointerType; }
    17 namespace llvm { class StructType; }
    18 namespace llvm { class Type; }
    19 namespace llvm { class Value; }
    20 
    2114
    2215// Processing rate attributes are required for all stream set bindings for a kernel.
     
    10497    virtual void initializeInstance() = 0;
    10598
    106     virtual void terminateInstance() = 0;
     99    virtual void finalizeInstance() = 0;
    107100
    108101    void setInitialArguments(std::vector<llvm::Value *> args);
     
    128121    virtual void setProcessedItemCount(const std::string & name, llvm::Value * value) const = 0;
    129122
     123    virtual llvm::Value * getConsumedItemCount(const std::string & name) const = 0;
     124
     125    virtual void setConsumedItemCount(const std::string & name, llvm::Value * value) const = 0;
     126
    130127    virtual llvm::Value * getTerminationSignal() const = 0;
    131128
     
    139136
    140137    llvm::Function * getDoSegmentFunction() const;
    141 
    142     llvm::Function * getAccumulatorFunction(const std::string & accumName) const;
    143138
    144139    llvm::Function * getTerminateFunction() const;
  • icGREP/icgrep-devel/icgrep/kernels/kernel.cpp

    r5411 r5418  
    3434static const std::string BUFFER_PTR_SUFFIX = "_bufferPtr";
    3535
    36 static const std::string CONSUMER_LOGICAL_SEGMENT_SUFFIX = "_cls";
     36static const std::string CONSUMER_SUFFIX = "_cls";
    3737
    3838using namespace llvm;
     
    5555unsigned KernelBuilder::addUnnamedScalar(Type * const type) {
    5656    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    57         report_fatal_error("Cannot add unnamed kernel field after kernel state finalized");
     57        report_fatal_error("Cannot add unnamed field  to " + getName() + " after kernel state finalized");
    5858    }
    5959    const auto index = mKernelFields.size();
     
    7272   
    7373void KernelBuilder::prepareKernel() {
    74 
    7574    if (LLVM_UNLIKELY(mKernelStateType != nullptr)) {
    7675        report_fatal_error("Cannot prepare kernel after kernel state finalized");
    7776    }
    78 
    7977    if (mStreamSetInputs.size() != mStreamSetInputBuffers.size()) {
    8078        std::string tmp;
     
    9795        }
    9896        mScalarInputs.emplace_back(mStreamSetInputBuffers[i]->getPointerType(), mStreamSetInputs[i].name + BUFFER_PTR_SUFFIX);
    99         if ((i == 0) || !mStreamSetInputs[i].rate.isExact()) {
     97        if ((i == 0) || mStreamSetInputs[i].rate.isUnknown()) {
    10098            addScalar(iBuilder->getSizeTy(), mStreamSetInputs[i].name + PROCESSED_ITEM_COUNT_SUFFIX);
    10199        }       
     
    124122    Type * const consumerSetTy = StructType::get(sizeTy, sizeTy->getPointerTo()->getPointerTo(), nullptr)->getPointerTo();
    125123    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
    126         addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_LOGICAL_SEGMENT_SUFFIX);
     124        addScalar(consumerSetTy, mStreamSetOutputs[i].name + CONSUMER_SUFFIX);
    127125    }
    128126
     
    130128    addScalar(iBuilder->getInt1Ty(), TERMINATION_SIGNAL);
    131129
     130    for (unsigned i = 0; i < mStreamSetOutputs.size(); i++) {
     131        addScalar(sizeTy, mStreamSetOutputs[i].name + CONSUMED_ITEM_COUNT_SUFFIX);
     132    }
     133
    132134    mKernelStateType = StructType::create(iBuilder->getContext(), mKernelFields, getName());
    133135}
    134136
    135 Module * KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
    136 
     137void KernelBuilder::createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs) {
     138
     139    assert (mModule == nullptr);
    137140    assert (mStreamSetInputBuffers.empty());
    138141    assert (mStreamSetOutputBuffers.empty());
     
    188191    prepareKernel();
    189192
    190     Module * const m = new Module(cacheName.str(), iBuilder->getContext());
    191     m->setTargetTriple(iBuilder->getModule()->getTargetTriple());
    192     return m;
     193    mModule = new Module(cacheName.str(), iBuilder->getContext());
     194    mModule->setTargetTriple(iBuilder->getModule()->getTargetTriple());
    193195}
    194196
     
    213215        auto savePoint = iBuilder->saveIP();
    214216        addKernelDeclarations(iBuilder->getModule());
    215         callGenerateInitMethod();
     217        callGenerateInitializeMethod();
    216218        callGenerateDoSegmentMethod();       
    217         // Implement the accumulator get functions
    218         for (auto binding : mScalarOutputs) {
    219             Function * f = getAccumulatorFunction(binding.name);
    220             iBuilder->SetInsertPoint(BasicBlock::Create(iBuilder->getContext(), "get_" + binding.name, f));
    221             Value * self = &*(f->arg_begin());
    222             Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), getScalarIndex(binding.name)});
    223             Value * retVal = iBuilder->CreateLoad(ptr);
    224             iBuilder->CreateRet(retVal);
    225         }
    226         callGenerateTerminateMethod();
     219        callGenerateFinalizeMethod();
    227220        iBuilder->restoreIP(savePoint);
    228221        setInstance(saveInstance);
     
    231224}
    232225
    233 void KernelBuilder::callGenerateDoSegmentMethod() {
    234     mCurrentMethod = getDoSegmentFunction();
    235     iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
    236     auto args = mCurrentMethod->arg_begin();
    237     setInstance(&*(args++));
    238     Value * doFinal = &*(args++);
    239     std::vector<Value *> producerPos;
    240     for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
    241         producerPos.push_back(&*(args++));
    242     }
    243     generateDoSegmentMethod(doFinal, producerPos); // must be overridden by the KernelBuilder subtype
    244     iBuilder->CreateRetVoid();
    245 }
    246 
    247 void KernelBuilder::callGenerateInitMethod() {
     226inline void KernelBuilder::callGenerateInitializeMethod() {
    248227    mCurrentMethod = getInitFunction();
    249228    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     
    257236        setConsumerState(binding.name, &*(args++));
    258237    }
    259     generateInitMethod();
     238    generateInitializeMethod();
    260239    iBuilder->CreateRetVoid();
    261240}
    262241
    263 void KernelBuilder::callGenerateTerminateMethod() {
    264     mCurrentMethod = getTerminateFunction();
    265     iBuilder->SetInsertPoint(CreateBasicBlock(getName() + "_entry"));
     242inline void KernelBuilder::callGenerateDoSegmentMethod() {
     243    mCurrentMethod = getDoSegmentFunction();
     244    BasicBlock * const entry = CreateBasicBlock(getName() + "_entry");
     245    iBuilder->SetInsertPoint(entry);
    266246    auto args = mCurrentMethod->arg_begin();
    267247    setInstance(&*(args++));
    268     generateTerminateMethod(); // may be overridden by the KernelBuilder subtype
     248    mIsFinal = &*(args++);
     249    const auto n = mStreamSetInputs.size();
     250    mAvailableItemCount.resize(n, nullptr);
     251    for (unsigned i = 0; i < mStreamSetInputs.size(); i++) {
     252        mAvailableItemCount[i] = &*(args++);
     253    }
     254    generateDoSegmentMethod(); // must be overridden by the KernelBuilder subtype
     255    mIsFinal = nullptr;
     256    mAvailableItemCount.clear();
    269257    iBuilder->CreateRetVoid();
     258}
     259
     260inline void KernelBuilder::callGenerateFinalizeMethod() {
     261    mCurrentMethod = getTerminateFunction();
     262    iBuilder->SetInsertPoint(CreateBasicBlock("entry"));
     263    auto args = mCurrentMethod->arg_begin();
     264    setInstance(&*(args++));
     265    generateFinalizeMethod(); // may be overridden by the KernelBuilder subtype
     266    const auto n = mScalarOutputs.size();
     267    if (n == 0) {
     268        iBuilder->CreateRetVoid();
     269    } else {
     270        Value * outputs[n];
     271        for (unsigned i = 0; i < n; ++i) {
     272            outputs[i] = getScalarField(mScalarOutputs[i].name);
     273        }
     274        if (n == 1) {
     275            iBuilder->CreateRet(outputs[0]);
     276        } else {
     277            iBuilder->CreateAggregateRet(outputs, n);
     278        }
     279    }
    270280}
    271281
     
    307317
    308318llvm::Value * KernelBuilder::getAvailableItemCount(const std::string & name) const {
    309     auto arg = mCurrentMethod->arg_begin();
    310     ++arg; // self
    311     ++arg; // doFinal
    312319    for (unsigned i = 0; i < mStreamSetInputs.size(); ++i) {
    313320        if (mStreamSetInputs[i].name == name) {
    314             return &*arg;
    315         }
    316         ++arg;
     321            return mAvailableItemCount[i];
     322        }
    317323    }
    318324    return nullptr;
     
    334340}
    335341
     342Value * KernelBuilder::getConsumedItemCount(const std::string & name) const {
     343    return getScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX);
     344}
     345
    336346void KernelBuilder::setProducedItemCount(const std::string & name, Value * value) const {
    337347    setScalarField(name + PRODUCED_ITEM_COUNT_SUFFIX, value);
     
    342352}
    343353
     354void KernelBuilder::setConsumedItemCount(const std::string & name, Value * value) const {
     355    setScalarField(name + CONSUMED_ITEM_COUNT_SUFFIX, value);
     356}
     357
    344358Value * KernelBuilder::getTerminationSignal() const {
    345359    return getScalarField(TERMINATION_SIGNAL);
     
    359373
    360374llvm::Value * KernelBuilder::getConsumerState(const std::string & name) const {
    361     return getScalarField(name + CONSUMER_LOGICAL_SEGMENT_SUFFIX);
     375    return getScalarField(name + CONSUMER_SUFFIX);
    362376}
    363377
    364378void KernelBuilder::setConsumerState(const std::string & name, llvm::Value * value) const {
    365     setScalarField(name + CONSUMER_LOGICAL_SEGMENT_SUFFIX, value);
     379    setScalarField(name + CONSUMER_SUFFIX, value);
    366380}
    367381
     
    443457}
    444458
    445 void KernelBuilder::setBaseAddress(const std::string & name, llvm::Value * addr) const {
    446     unsigned index; Port port;
    447     std::tie(port, index) = getStreamPort(name);
    448     const StreamSetBuffer * buf = nullptr;
    449     if (port == Port::Input) {
    450         assert (index < mStreamSetInputBuffers.size());
    451         buf = mStreamSetInputBuffers[index];
    452     } else {
    453         assert (index < mStreamSetOutputBuffers.size());
    454         buf = mStreamSetOutputBuffers[index];
    455     }
    456     return buf->setBaseAddress(getStreamSetBufferPtr(name), addr);
     459Value * KernelBuilder::getBaseAddress(const std::string & name) const {
     460    return getAnyStreamSetBuffer(name)->getBaseAddress(getStreamSetBufferPtr(name));
     461}
     462
     463void KernelBuilder::setBaseAddress(const std::string & name, Value * const addr) const {
     464    return getAnyStreamSetBuffer(name)->setBaseAddress(getStreamSetBufferPtr(name), addr);
    457465}
    458466
    459467Value * KernelBuilder::getBufferedSize(const std::string & name) const {
    460     unsigned index; Port port;
    461     std::tie(port, index) = getStreamPort(name);
    462     const StreamSetBuffer * buf = nullptr;
    463     if (port == Port::Input) {
    464         assert (index < mStreamSetInputBuffers.size());
    465         buf = mStreamSetInputBuffers[index];
    466     } else {
    467         assert (index < mStreamSetOutputBuffers.size());
    468         buf = mStreamSetOutputBuffers[index];
    469     }
    470     return buf->getBufferedSize(getStreamSetBufferPtr(name));
     468    return getAnyStreamSetBuffer(name)->getBufferedSize(getStreamSetBufferPtr(name));
    471469}
    472470
     
    491489}
    492490
    493 KernelBuilder::StreamPort KernelBuilder::getStreamPort(const std::string & name) const {
    494     const auto f = mStreamMap.find(name);
    495     if (LLVM_UNLIKELY(f == mStreamMap.end())) {
    496         report_fatal_error(getName() + " does not contain stream set: " + name);
    497     }
    498     return f->second;
    499 }
    500 
    501491Value * KernelBuilder::getStreamSetBufferPtr(const std::string & name) const {
    502492    return getScalarField(name + BUFFER_PTR_SUFFIX);
     
    513503
    514504CallInst * KernelBuilder::createDoSegmentCall(const std::vector<Value *> & args) const {
    515     assert (getDoSegmentFunction()->getArgumentList().size() == args.size());
    516     return iBuilder->CreateCall(getDoSegmentFunction(), args);
    517 }
    518 
    519 CallInst * KernelBuilder::createGetAccumulatorCall(const std::string & accumName) const {
    520     return iBuilder->CreateCall(getAccumulatorFunction(accumName), { getInstance() });
     505    Function * const doSegment = getDoSegmentFunction();
     506    assert (doSegment->getArgumentList().size() == args.size());
     507    return iBuilder->CreateCall(doSegment, args);
     508}
     509
     510Value * KernelBuilder::getAccumulator(const std::string & accumName) const {
     511    if (LLVM_UNLIKELY(mOutputScalarResult == nullptr)) {
     512        report_fatal_error("Cannot get accumulator " + accumName + " until " + getName() + " has terminated.");
     513    }
     514    const auto n = mScalarOutputs.size();
     515    if (LLVM_UNLIKELY(n == 0)) {
     516        report_fatal_error(getName() + " has no output scalars.");
     517    } else {
     518        for (unsigned i = 0; i < n; ++i) {
     519            const Binding & b = mScalarOutputs[i];
     520            if (b.name == accumName) {
     521                if (n == 1) {
     522                    return mOutputScalarResult;
     523                } else {
     524                    return iBuilder->CreateExtractValue(mOutputScalarResult, {i});
     525                }
     526            }
     527        }
     528        report_fatal_error(getName() + " has no output scalar named " + accumName);
     529    }
    521530}
    522531
     
    576585    StructType * const consumerTy = StructType::get(sizeTy, sizePtrPtrTy, nullptr);
    577586    for (unsigned i = 0; i < mStreamSetOutputBuffers.size(); ++i) {
    578         const auto & consumers = mStreamSetOutputBuffers[i]->getConsumers();
     587        const auto output = mStreamSetOutputBuffers[i];
     588        const auto & consumers = output->getConsumers();
    579589        const auto n = consumers.size();
    580590        AllocaInst * const outputConsumers = iBuilder->CreateAlloca(consumerTy);
     
    582592        for (unsigned i = 0; i < n; ++i) {
    583593            KernelBuilder * const consumer = consumers[i];
    584             assert (consumer->getInstance());
    585             Value * const segNo = consumer->getScalarFieldPtr(consumer->getInstance(), LOGICAL_SEGMENT_NO_SCALAR);
    586             iBuilder->CreateStore(segNo, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
     594            assert ("all instances must be created prior to initialization of any instance" && consumer->getInstance());
     595            Value * const segmentNoPtr = consumer->getScalarFieldPtr(LOGICAL_SEGMENT_NO_SCALAR);
     596            iBuilder->CreateStore(segmentNoPtr, iBuilder->CreateGEP(consumerSegNoArray, { iBuilder->getInt32(0), iBuilder->getInt32(i) }));
    587597        }
    588598        Value * const consumerCountPtr = iBuilder->CreateGEP(outputConsumers, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
     
    592602        args.push_back(outputConsumers);
    593603    }
     604
     605
    594606    iBuilder->CreateCall(getInitFunction(), args);
    595 }
    596 
    597 void KernelBuilder::terminateInstance() {
    598     iBuilder->CreateCall(getTerminateFunction(), { getInstance() });
    599607}
    600608
     
    602610//  each block of the given number of blocksToDo, and then updates counts.
    603611
    604 void BlockOrientedKernel::generateDoSegmentMethod(Value * doFinal, const std::vector<Value *> & producerPos) {
     612void BlockOrientedKernel::generateDoSegmentMethod() {
    605613
    606614    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
     
    613621    Value * baseTarget = nullptr;
    614622    if (useIndirectBr()) {
    615         baseTarget = iBuilder->CreateSelect(doFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
     623        baseTarget = iBuilder->CreateSelect(mIsFinal, BlockAddress::get(doFinalBlock), BlockAddress::get(segmentDone));
    616624    }
    617625
    618626    ConstantInt * stride = iBuilder->getSize(iBuilder->getStride());
    619     Value * availablePos = producerPos[0];
     627    Value * availablePos = mAvailableItemCount[0];
    620628    Value * processed = getProcessedItemCount(mStreamSetInputs[0].name);
    621629    Value * itemsAvail = iBuilder->CreateSub(availablePos, processed);
     
    674682        mStrideLoopBranch->addDestination(segmentDone);
    675683    } else {
    676         iBuilder->CreateUnlikelyCondBr(doFinal, doFinalBlock, segmentDone);
     684        iBuilder->CreateUnlikelyCondBr(mIsFinal, doFinalBlock, segmentDone);
    677685    }
    678686
     
    681689    iBuilder->SetInsertPoint(doFinalBlock);
    682690
    683     Value * remainingItems = iBuilder->CreateSub(producerPos[0], getProcessedItemCount(mStreamSetInputs[0].name));
     691    Value * remainingItems = iBuilder->CreateSub(mAvailableItemCount[0], getProcessedItemCount(mStreamSetInputs[0].name));
    684692    writeFinalBlockMethod(remainingItems);
    685693
    686     itemsDone = producerPos[0];
     694    itemsDone = mAvailableItemCount[0];
    687695    setProcessedItemCount(mStreamSetInputs[0].name, itemsDone);
    688696    setTerminationSignal();
     
    839847}
    840848
     849void KernelBuilder::finalizeInstance() {
     850    mOutputScalarResult = iBuilder->CreateCall(getTerminateFunction(), { getInstance() });
     851}
     852
     853KernelBuilder::StreamPort KernelBuilder::getStreamPort(const std::string & name) const {
     854    const auto f = mStreamMap.find(name);
     855    if (LLVM_UNLIKELY(f == mStreamMap.end())) {
     856        report_fatal_error(getName() + " does not contain stream set " + name);
     857    }
     858    return f->second;
     859}
    841860
    842861// CONSTRUCTOR
     
    849868                             std::vector<Binding> && internal_scalars)
    850869: KernelInterface(builder, std::move(kernelName), std::move(stream_inputs), std::move(stream_outputs), std::move(scalar_parameters), std::move(scalar_outputs), std::move(internal_scalars))
     870, mModule(nullptr)
    851871, mCurrentMethod(nullptr)
    852872, mNoTerminateAttribute(false)
    853 , mIsGenerated(false) {
     873, mIsGenerated(false)
     874, mIsFinal(nullptr)
     875, mOutputScalarResult(nullptr) {
    854876
    855877}
     
    886908
    887909}
     910
  • icGREP/icgrep-devel/icgrep/kernels/kernel.h

    r5411 r5418  
    77#define KERNEL_BUILDER_H
    88
    9 #include <string>           // for string
    10 #include <memory>           // for unique_ptr
    119#include "interface.h"      // for KernelInterface
    1210#include <boost/container/flat_map.hpp>
    1311#include <IR_Gen/idisa_builder.h>
    1412#include <kernels/pipeline.h>
    15 
    16 //namespace llvm { class ConstantInt; }
    1713#include <llvm/IR/Constants.h>
     14
     15//#include <string>           // for string
     16//#include <memory>           // for unique_ptr
     17
    1818namespace llvm { class Function; }
    1919namespace llvm { class IntegerType; }
     
    3232    using StreamMap = boost::container::flat_map<std::string, StreamPort>;
    3333    using StreamSetBuffers = std::vector<parabix::StreamSetBuffer *>;
    34 
    35     friend void ::generateSegmentParallelPipeline(IDISA::IDISA_Builder *, const std::vector<KernelBuilder *> &);
    36     friend void ::generatePipelineLoop(IDISA::IDISA_Builder *, const std::vector<KernelBuilder *> &);
    37     friend void ::generateParallelPipeline(IDISA::IDISA_Builder *, const std::vector<KernelBuilder *> &);
     34    using Kernels = std::vector<KernelBuilder *>;
     35
     36    friend void ::generateSegmentParallelPipeline(IDISA::IDISA_Builder *, const Kernels &);
     37    friend void ::generatePipelineLoop(IDISA::IDISA_Builder *, const Kernels &);
     38    friend void ::generateParallelPipeline(IDISA::IDISA_Builder *, const Kernels &);
    3839public:
    3940   
     
    7172    // Create a module stub for the kernel, populated only with its Module ID.     
    7273    //
    73     llvm::Module * createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
    74      
     74    void createKernelStub(const StreamSetBuffers & inputs, const StreamSetBuffers & outputs);
     75
     76    llvm::Module * getModule() const {
     77        return mModule;
     78    }
     79
    7580    // Generate the Kernel to the current module (iBuilder->getModule()).
    7681    void generateKernel();
     
    8085    void initializeInstance() final;
    8186
    82     void terminateInstance() final;
     87    void finalizeInstance() final;
    8388
    8489    llvm::Value * getProducedItemCount(const std::string & name, llvm::Value * doFinal = nullptr) const final;
     
    8994
    9095    void setProcessedItemCount(const std::string & name, llvm::Value * value) const final;
     96
     97    llvm::Value * getConsumedItemCount(const std::string & name) const final;
     98
     99    void setConsumedItemCount(const std::string & name, llvm::Value * value) const final;
    91100
    92101    bool hasNoTerminateAttribute() const {
     
    164173    llvm::CallInst * createDoSegmentCall(const std::vector<llvm::Value *> & args) const;
    165174
    166     llvm::CallInst * createGetAccumulatorCall(const std::string & accumName) const;
     175    llvm::Value * getAccumulator(const std::string & accumName) const;
    167176
    168177    virtual ~KernelBuilder() = 0;
     
    198207    virtual void prepareKernel();
    199208
    200     virtual void generateInitMethod() { }
    201    
    202     virtual void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) = 0;
    203 
    204     virtual void generateTerminateMethod() { }
     209    virtual void generateInitializeMethod() { }
     210   
     211    virtual void generateDoSegmentMethod() = 0;
     212
     213    virtual void generateFinalizeMethod() { }
    205214
    206215    // Add an additional scalar field to the KernelState struct.
     
    242251    llvm::Value * getRawOutputPointer(const std::string & name, llvm::Value * streamIndex, llvm::Value * absolutePosition) const;
    243252
     253    llvm::Value * getBaseAddress(const std::string & name) const;
     254
    244255    void setBaseAddress(const std::string & name, llvm::Value * addr) const;
    245256
     
    251262
    252263    llvm::Value * getAvailableItemCount(const std::string & name) const;
     264
     265    llvm::Value * getIsFinal() const {
     266        return mIsFinal;
     267    }
     268
    253269
    254270    llvm::BasicBlock * CreateBasicBlock(std::string && name) const;
     
    266282        return getScalarFieldPtr(instance, getScalarIndex(fieldName));
    267283    }
     284
     285    void callGenerateInitializeMethod();
     286
     287    void callGenerateDoSegmentMethod();
     288
     289    void callGenerateFinalizeMethod();
    268290
    269291    StreamPort getStreamPort(const std::string & name) const;
     
    283305    }
    284306
    285     void callGenerateInitMethod();
    286 
    287     void callGenerateDoSegmentMethod();
    288 
    289     void callGenerateTerminateMethod();
     307    const parabix::StreamSetBuffer * getAnyStreamSetBuffer(const std::string & name) const {
     308        unsigned index; Port port;
     309        std::tie(port, index) = getStreamPort(name);
     310        if (port == Port::Input) {
     311            assert (index < mStreamSetInputBuffers.size());
     312            return mStreamSetInputBuffers[index];
     313        } else {
     314            assert (index < mStreamSetOutputBuffers.size());
     315            return mStreamSetOutputBuffers[index];
     316        }
     317    }
    290318
    291319private:
     
    297325protected:
    298326
    299     llvm::Function *                mCurrentMethod;
    300     std::vector<llvm::Type *>       mKernelFields;
    301     KernelMap                       mKernelMap;
    302     StreamMap                       mStreamMap;
    303     StreamSetBuffers                mStreamSetInputBuffers;
    304     StreamSetBuffers                mStreamSetOutputBuffers;
    305     bool                            mNoTerminateAttribute;
    306     bool                            mIsGenerated;
     327    llvm::Module *                      mModule;
     328    llvm::Function *                    mCurrentMethod;
     329    bool                                mNoTerminateAttribute;
     330    bool                                mIsGenerated;
     331
     332    llvm::Value *                       mIsFinal;
     333    std::vector<llvm::Value *>          mAvailableItemCount;
     334    llvm::Value *                       mOutputScalarResult;
     335
     336
     337    std::vector<llvm::Type *>           mKernelFields;
     338    KernelMap                           mKernelMap;
     339    StreamMap                           mStreamMap;
     340    StreamSetBuffers                    mStreamSetInputBuffers;
     341    StreamSetBuffers                    mStreamSetOutputBuffers;
    307342
    308343};
     
    339374    virtual void generateFinalBlockMethod(llvm::Value * remainingItems);
    340375
    341     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override final;
     376    void generateDoSegmentMethod() override final;
    342377
    343378    BlockOrientedKernel(IDISA::IDISA_Builder * builder,
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.cpp

    r5414 r5418  
    44 */
    55#include "mmap_kernel.h"
    6 #include <llvm/IR/Function.h>  // for Function, Function::arg_iterator
    76#include <llvm/IR/Module.h>
    87#include <IR_Gen/idisa_builder.h>
    98#include <kernels/streamset.h>
    10 namespace llvm { class BasicBlock; }
    11 namespace llvm { class Constant; }
    12 namespace llvm { class Module; }
    13 namespace llvm { class Value; }
    149
    1510using namespace llvm;
     
    1712namespace kernel {
    1813
    19 void MMapSourceKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     14void MMapSourceKernel::generateInitializeMethod() {
     15    Value * fd = getScalarField("fileDescriptor");
     16    Value * fileSize = iBuilder->CreateFileSize(fd);
     17    if (mCodeUnitWidth > 8) {
     18        fileSize = iBuilder->CreateUDiv(fileSize, iBuilder->getSize(mCodeUnitWidth / 8));
     19    }
     20    Value * buffer = iBuilder->CreateFileSourceMMap(fd, fileSize);
     21    setBaseAddress("sourceBuffer", buffer);
     22    setBufferedSize("sourceBuffer", fileSize);   
     23    setScalarField("readableBuffer", buffer);
     24    setScalarField("fileSize", fileSize);
     25    iBuilder->CreateMAdvise(buffer, fileSize, CBuilder::MMAP_WILLNEED);
     26}
    2027
    21     BasicBlock * entryBlock = iBuilder->GetInsertBlock();
     28void MMapSourceKernel::generateDoSegmentMethod() {
     29
     30    BasicBlock * dropPages = CreateBasicBlock("dropPages");
     31    BasicBlock * produceData = CreateBasicBlock("produceData");
    2232    BasicBlock * setTermination = CreateBasicBlock("setTermination");
    2333    BasicBlock * mmapSourceExit = CreateBasicBlock("mmapSourceExit");
     34
     35    // instruct the OS that it can safely drop any fully consumed pages
     36    Value * consumed = getConsumedItemCount("sourceBuffer");
     37    Type * const consumedTy = consumed->getType();
     38    Type * const voidPtrTy = iBuilder->getVoidPtrTy();
     39
     40    // multiply the consumed count by the code unit size then mask off any partial pages
     41    if (mCodeUnitWidth > 8) {
     42        consumed = iBuilder->CreateMul(consumed, iBuilder->getSize(mCodeUnitWidth / 8));
     43    }
     44    const auto pageSize = getpagesize();
     45    if (LLVM_LIKELY((pageSize & (pageSize - 1)) == 0)) {
     46        consumed = iBuilder->CreateAnd(consumed, ConstantExpr::getNot(ConstantInt::get(consumedTy, pageSize - 1)));
     47    } else {
     48        consumed = iBuilder->CreateSub(consumed, iBuilder->CreateURem(consumed, ConstantInt::get(consumedTy, pageSize)));
     49    }
     50    Value * sourceBuffer = getBaseAddress("sourceBuffer");
     51    sourceBuffer = iBuilder->CreatePtrToInt(sourceBuffer, consumedTy);
     52    Value * consumedBuffer = iBuilder->CreateAdd(sourceBuffer, consumed);
     53    Value * readableBuffer = getScalarField("readableBuffer");
     54    readableBuffer = iBuilder->CreatePtrToInt(readableBuffer, consumedTy);
     55    Value * unnecessaryBytes = iBuilder->CreateSub(consumedBuffer, readableBuffer);
     56    // avoid calling madvise unless an actual page table change could occur
     57    Value * hasPagesToDrop = iBuilder->CreateICmpEQ(unnecessaryBytes, ConstantInt::getNullValue(unnecessaryBytes->getType()));
     58    iBuilder->CreateLikelyCondBr(hasPagesToDrop, produceData, dropPages);
     59
     60    iBuilder->SetInsertPoint(dropPages);
     61    iBuilder->CreateMAdvise(iBuilder->CreateIntToPtr(readableBuffer, voidPtrTy), unnecessaryBytes, CBuilder::MMAP_DONTNEED);   
     62    readableBuffer = iBuilder->CreateIntToPtr(iBuilder->CreateAdd(readableBuffer, unnecessaryBytes), voidPtrTy);
     63    setScalarField("readableBuffer", readableBuffer);
     64    iBuilder->CreateBr(produceData);
     65
     66    // determine whether or not we've exhausted the file buffer
     67    iBuilder->SetInsertPoint(produceData);
    2468    ConstantInt * segmentItems = iBuilder->getSize(mSegmentBlocks * iBuilder->getBitBlockWidth());
    25     Value * fileItems = getScalarField("fileSize");
    26     if (mCodeUnitWidth > 8) {
    27         fileItems = iBuilder->CreateUDiv(fileItems, iBuilder->getSize(mCodeUnitWidth / 8));
    28     }
     69    Value * const fileSize = getBufferedSize("sourceBuffer");
    2970    Value * produced = getProducedItemCount("sourceBuffer");
    3071    produced = iBuilder->CreateAdd(produced, segmentItems);
    31     Value * lessThanFullSegment = iBuilder->CreateICmpULT(fileItems, produced);
     72
     73    Value * lessThanFullSegment = iBuilder->CreateICmpULT(fileSize, produced);
    3274    iBuilder->CreateCondBr(lessThanFullSegment, setTermination, mmapSourceExit);
    3375    iBuilder->SetInsertPoint(setTermination);
     76
    3477    setTerminationSignal();
    3578    iBuilder->CreateBr(mmapSourceExit);
    3679
     80    // finally, set the "produced" count to reflect current position in the file
    3781    iBuilder->SetInsertPoint(mmapSourceExit);
     82    PHINode * itemsRead = iBuilder->CreatePHI(produced->getType(), 2);
     83    itemsRead->addIncoming(produced, produceData);
     84    itemsRead->addIncoming(fileSize, setTermination);
     85    setProducedItemCount("sourceBuffer", itemsRead);
     86}
    3887
    39     PHINode * itemsRead = iBuilder->CreatePHI(produced->getType(), 2);
    40     itemsRead->addIncoming(produced, entryBlock);
    41     itemsRead->addIncoming(fileItems, setTermination);
    42     setProducedItemCount("sourceBuffer", itemsRead);
     88void MMapSourceKernel::generateFinalizeMethod() {
     89    Value * buffer = getBaseAddress("sourceBuffer");
     90    Value * fileSize = getBufferedSize("sourceBuffer");
     91    iBuilder->CreateMUnmap(buffer, fileSize);
    4392}
    4493
     
    4796    {},
    4897    {Binding{iBuilder->getStreamSetTy(1, codeUnitWidth), "sourceBuffer"}},
    49     {Binding{iBuilder->getSizeTy(), "fileSize"}}, {}, {})
     98    {Binding{iBuilder->getInt32Ty(), "fileDescriptor"}}, {Binding{iBuilder->getSizeTy(), "fileSize"}}, {Binding{iBuilder->getVoidPtrTy(), "readableBuffer"}})
    5099, mSegmentBlocks(blocksPerSegment)
    51100, mCodeUnitWidth(codeUnitWidth) {
  • icGREP/icgrep-devel/icgrep/kernels/mmap_kernel.h

    r5398 r5418  
    2020    bool moduleIDisSignature() override {return true;}
    2121private:
    22     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
     22    void generateInitializeMethod() override;
     23    void generateDoSegmentMethod() override;
     24    void generateFinalizeMethod() override;
    2325private:
    2426    const unsigned mSegmentBlocks;
  • icGREP/icgrep-devel/icgrep/kernels/pipeline.cpp

    r5411 r5418  
    4444    IntegerType * const sizeTy = iBuilder->getSizeTy();
    4545    PointerType * const voidPtrTy = iBuilder->getVoidPtrTy();
    46     const unsigned threads = codegen::ThreadNum;
    4746    Constant * nullVoidPtrVal = ConstantPointerNull::getNullValue(voidPtrTy);
    48 
    4947    std::vector<Type *> structTypes;
    5048
     
    8381    segNo->addIncoming(segOffset, entryBlock);
    8482
    85     Value * doFinal = iBuilder->getFalse();
     83    Value * terminated = iBuilder->getFalse();
    8684    Value * const nextSegNo = iBuilder->CreateAdd(segNo, iBuilder->getSize(1));
    8785
     
    9088
    9189    StreamSetBufferMap<Value *> producedPos;
     90    StreamSetBufferMap<Value *> consumedPos;
    9291
    9392    for (unsigned k = 0; k < n; ++k) {
     
    124123        iBuilder->SetInsertPoint(segmentLoopBody);
    125124        const auto & inputs = kernel->getStreamInputs();
    126         std::vector<Value *> args = {kernel->getInstance(), doFinal};
     125        std::vector<Value *> args = {kernel->getInstance(), terminated};
    127126        for (unsigned i = 0; i < inputs.size(); ++i) {
    128127            const auto f = producedPos.find(kernel->getStreamSetInputBuffer(i));
    129             if (LLVM_UNLIKELY(f == producedPos.end())) {
    130                 report_fatal_error(kernel->getName() + " uses stream set " + inputs[i].name + " prior to its definition");
    131             }
     128            assert (f != producedPos.end());
    132129            args.push_back(f->second);
    133130        }
    134131
    135         kernel->createDoSegmentCall(args);
     132        kernel->createDoSegmentCall(args);       
    136133        if (!kernel->hasNoTerminateAttribute()) {
    137             doFinal = iBuilder->CreateOr(doFinal, kernel->getTerminationSignal());
     134            terminated = iBuilder->CreateOr(terminated, kernel->getTerminationSignal());
    138135        }
    139136
    140137        const auto & outputs = kernel->getStreamOutputs();
    141138        for (unsigned i = 0; i < outputs.size(); ++i) {
    142             Value * const produced = kernel->getProducedItemCount(outputs[i].name, doFinal);
     139            Value * const produced = kernel->getProducedItemCount(outputs[i].name, terminated);
    143140            const StreamSetBuffer * const buf = kernel->getStreamSetOutputBuffer(i);
    144141            assert (producedPos.count(buf) == 0);
    145142            producedPos.emplace(buf, produced);
    146143        }
     144        for (unsigned i = 0; i < inputs.size(); ++i) {
     145            Value * const processedItemCount = kernel->getProcessedItemCount(inputs[i].name);
     146            const StreamSetBuffer * const buf = kernel->getStreamSetInputBuffer(i);
     147            auto f = consumedPos.find(buf);
     148            if (f == consumedPos.end()) {
     149                consumedPos.emplace(buf, processedItemCount);
     150            } else {
     151                Value * lesser = iBuilder->CreateICmpULT(processedItemCount, f->second);
     152                f->second = iBuilder->CreateSelect(lesser, processedItemCount, f->second);
     153            }
     154        }
    147155
    148156        kernel->releaseLogicalSegmentNo(nextSegNo);
     
    151159    assert (segmentLoopBody);
    152160    exitThreadBlock->moveAfter(segmentLoopBody);
    153     segNo->addIncoming(iBuilder->CreateAdd(segNo, iBuilder->getSize(threads)), segmentLoopBody);
    154     iBuilder->CreateCondBr(doFinal, exitThreadBlock, segmentLoop);
     161
     162    for (const auto consumed : consumedPos) {
     163        const StreamSetBuffer * const buf = consumed.first;
     164        KernelBuilder * k = buf->getProducer();
     165        const auto & outputs = k->getStreamSetOutputBuffers();
     166        for (unsigned i = 0; i < outputs.size(); ++i) {
     167            if (outputs[i] == buf) {
     168                k->setConsumedItemCount(k->getStreamOutputs()[i].name, consumed.second);
     169                break;
     170            }
     171        }
     172    }
     173
     174    segNo->addIncoming(iBuilder->CreateAdd(segNo, iBuilder->getSize(codegen::ThreadNum)), segmentLoopBody);
     175    iBuilder->CreateCondBr(terminated, exitThreadBlock, segmentLoop);
    155176
    156177    iBuilder->SetInsertPoint(exitThreadBlock);
     178
     179    // only call pthread_exit() within spawned threads; otherwise it'll be equivalent to calling exit() within the process
     180    BasicBlock * const exitThread = BasicBlock::Create(iBuilder->getContext(), "ExitThread", threadFunc);
     181    BasicBlock * const exitFunction = BasicBlock::Create(iBuilder->getContext(), "ExitProcessFunction", threadFunc);
     182
     183    Value * const exitCond = iBuilder->CreateICmpEQ(segOffset, ConstantInt::getNullValue(segOffset->getType()));
     184    iBuilder->CreateCondBr(exitCond, exitFunction, exitThread);
     185    iBuilder->SetInsertPoint(exitThread);
    157186    iBuilder->CreatePThreadExitCall(nullVoidPtrVal);
     187    iBuilder->CreateBr(exitFunction);
     188    iBuilder->SetInsertPoint(exitFunction);
    158189    iBuilder->CreateRetVoid();
    159190
     
    168199    // MAKE SEGMENT PARALLEL PIPELINE DRIVER
    169200    // -------------------------------------------------------------------------------------------------------------------------
     201    const unsigned threads = codegen::ThreadNum - 1;
     202    assert (codegen::ThreadNum > 1);
    170203    Type * const pthreadsTy = ArrayType::get(sizeTy, threads);
    171204    AllocaInst * const pthreads = iBuilder->CreateAlloca(pthreadsTy);
     
    186219    }
    187220
     221    // use the process thread to handle the initial segment function after spawning (n - 1) threads to handle the subsequent offsets
    188222    for (unsigned i = 0; i < threads; ++i) {
    189         AllocaInst * threadState = iBuilder->CreateAlloca(threadStructType);
    190         Value * const sharedStatePtr = iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    191         iBuilder->CreateStore(sharedStruct, sharedStatePtr);
    192         Value * const segmentOffsetPtr = iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(1)});
    193         iBuilder->CreateStore(iBuilder->getSize(i), segmentOffsetPtr);
     223        AllocaInst * const threadState = iBuilder->CreateAlloca(threadStructType);
     224        iBuilder->CreateStore(sharedStruct, iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
     225        iBuilder->CreateStore(iBuilder->getSize(i + 1), iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
    194226        iBuilder->CreatePThreadCreateCall(threadIdPtr[i], nullVoidPtrVal, threadFunc, threadState);
    195227    }
     228
     229    AllocaInst * const threadState = iBuilder->CreateAlloca(threadStructType);
     230    iBuilder->CreateStore(sharedStruct, iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(0)}));
     231    iBuilder->CreateStore(iBuilder->getSize(0), iBuilder->CreateGEP(threadState, {iBuilder->getInt32(0), iBuilder->getInt32(1)}));
     232    iBuilder->CreateCall(threadFunc, iBuilder->CreatePointerCast(threadState, voidPtrTy));
    196233
    197234    AllocaInst * const status = iBuilder->CreateAlloca(voidPtrTy);
     
    369406
    370407        iBuilder->SetInsertPoint(exitThreadBlock);
     408
    371409        iBuilder->CreatePThreadExitCall(nullVoidPtrVal);
     410
    372411        iBuilder->CreateRetVoid();
    373412
     
    406445
    407446    StreamSetBufferMap<Value *> producedPos;
     447    StreamSetBufferMap<Value *> consumedPos;
    408448
    409449    iBuilder->CreateBr(pipelineLoop);
     
    412452    Value * terminated = iBuilder->getFalse();
    413453    for (auto & kernel : kernels) {
     454
    414455        const auto & inputs = kernel->getStreamInputs();
     456        const auto & outputs = kernel->getStreamOutputs();
     457
    415458        std::vector<Value *> args = {kernel->getInstance(), terminated};
    416459        for (unsigned i = 0; i < inputs.size(); ++i) {
     
    421464            args.push_back(f->second);
    422465        }
    423         Value * const segNo = kernel->acquireLogicalSegmentNo();
     466
    424467        kernel->createDoSegmentCall(args);
    425468        if (!kernel->hasNoTerminateAttribute()) {
    426469            terminated = iBuilder->CreateOr(terminated, kernel->getTerminationSignal());
    427470        }
    428         const auto & outputs = kernel->getStreamOutputs();
    429471        for (unsigned i = 0; i < outputs.size(); ++i) {
    430472            Value * const produced = kernel->getProducedItemCount(outputs[i].name, terminated);
     
    434476        }
    435477
     478        for (unsigned i = 0; i < inputs.size(); ++i) {
     479            Value * const processedItemCount = kernel->getProcessedItemCount(inputs[i].name);
     480            const StreamSetBuffer * const buf = kernel->getStreamSetInputBuffer(i);
     481            auto f = consumedPos.find(buf);
     482            if (f == consumedPos.end()) {
     483                consumedPos.emplace(buf, processedItemCount);
     484            } else {
     485                Value * lesser = iBuilder->CreateICmpULT(processedItemCount, f->second);
     486                f->second = iBuilder->CreateSelect(lesser, processedItemCount, f->second);
     487            }
     488        }
     489
     490        Value * const segNo = kernel->acquireLogicalSegmentNo();
    436491        kernel->releaseLogicalSegmentNo(iBuilder->CreateAdd(segNo, iBuilder->getSize(1)));
     492    }
     493
     494    for (const auto consumed : consumedPos) {
     495        const StreamSetBuffer * const buf = consumed.first;
     496        KernelBuilder * k = buf->getProducer();
     497        const auto & outputs = k->getStreamSetOutputBuffers();
     498        for (unsigned i = 0; i < outputs.size(); ++i) {
     499            if (outputs[i] == buf) {
     500                k->setConsumedItemCount(k->getStreamOutputs()[i].name, consumed.second);
     501                break;
     502            }
     503        }
    437504    }
    438505
  • icGREP/icgrep-devel/icgrep/kernels/radix64.cpp

    r5402 r5418  
    4141// a continous buffer for the full segment (number of blocks).
    4242
    43 void expand3_4Kernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &) {
     43void expand3_4Kernel::generateDoSegmentMethod() {
    4444
    4545    BasicBlock * expand2_3entry = iBuilder->GetInsertBlock();
     
    9595    // process in multiples of 3 full blocks of data.
    9696    //
    97     Value * loopDivisor = iBuilder->CreateSelect(doFinal, triplePackSize, tripleBlockSize);
     97    Value * loopDivisor = iBuilder->CreateSelect(getIsFinal(), triplePackSize, tripleBlockSize);
    9898    Value * excessItems = iBuilder->CreateURem(itemsAvail, loopDivisor);
    9999    Value * loopItemsToDo = iBuilder->CreateSub(itemsAvail, excessItems);
     
    162162
    163163    // Except for final segment processing, we are done.
    164     iBuilder->CreateCondBr(doFinal, expand3_4_final, expand3_4_exit);
     164    iBuilder->CreateCondBr(getIsFinal(), expand3_4_final, expand3_4_exit);
    165165
    166166    // Final segment processing.   Less than a triplePack remains.
  • icGREP/icgrep-devel/icgrep/kernels/radix64.h

    r5297 r5418  
    2323    expand3_4Kernel(IDISA::IDISA_Builder * iBuilder);
    2424private:
    25     void generateDoSegmentMethod(llvm::Value *doFinal, const std::vector<llvm::Value *> &producerPos) override final;
     25    void generateDoSegmentMethod() override final;
    2626};
    2727
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.cpp

    r5411 r5418  
    1717namespace kernel {
    1818
    19 void StdInKernel::generateDoSegmentMethod(Value * /* doFinal */, const std::vector<Value *> & /* producerPos */) {
     19void StdInKernel::generateDoSegmentMethod() {
    2020
    2121    BasicBlock * const entryBlock = iBuilder->GetInsertBlock();
     
    3535    iBuilder->SetInsertPoint(readBlock);
    3636
     37//    Value * consumed = getConsumedItemCount("InputStream");
     38//    Value * remaining = iBuilder->CreateSub(itemsAlreadyRead, consumed);
     39
    3740    // how many pages are required to have enough data for the segment plus one overflow block?
    3841    const auto PageAlignedSegmentSize = round_up_to_nearest((mSegmentBlocks + 1) * iBuilder->getBitBlockWidth() * (mCodeUnitWidth / 8), getpagesize());
     
    4043    reserveBytes("InputStream", bytesToRead);
    4144    BasicBlock * const readExit = iBuilder->GetInsertBlock();
     45
     46
    4247    Value * const ptr = getRawOutputPointer("InputStream", iBuilder->getInt32(0), bufferedSize);
    4348    Value * const bytePtr = iBuilder->CreatePointerCast(ptr, iBuilder->getInt8PtrTy());
     
    7580}
    7681
    77 void FileSourceKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &) {
     82void FileSourceKernel::generateDoSegmentMethod() {
    7883
    7984    BasicBlock * entryBlock = iBuilder->GetInsertBlock();
     
    101106}
    102107
    103 void FileSourceKernel::generateInitMethod() {
     108void FileSourceKernel::generateInitializeMethod() {
    104109    setBaseAddress("sourceBuffer", getScalarField("fileSource"));
    105110    setBufferedSize("sourceBuffer", getScalarField("fileSize"));
  • icGREP/icgrep-devel/icgrep/kernels/stdin_kernel.h

    r5398 r5418  
    1919    bool moduleIDisSignature() override { return true; }
    2020protected:
    21     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
     21    void generateDoSegmentMethod() override;
    2222private:
    2323    unsigned mSegmentBlocks;
     
    3030    bool moduleIDisSignature() override { return true; }
    3131protected:
    32     void generateInitMethod() override;
    33     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override;
     32    void generateInitializeMethod() override;
     33    void generateDoSegmentMethod() override;
    3434private:
    3535    unsigned mSegmentBlocks;
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.cpp

    r5415 r5418  
    1818// However, if the segment spans two memory areas (e.g., because of wraparound),
    1919// then two write calls are made.
    20 void StdOutKernel::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     20void StdOutKernel::generateDoSegmentMethod() {
    2121    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
    2222
     
    2424    Constant * itemBytes = iBuilder->getSize(mCodeUnitWidth / 8);
    2525    Value * processed = getProcessedItemCount("codeUnitBuffer");
    26     Value * itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
     26    Value * itemsToDo = iBuilder->CreateSub(mAvailableItemCount[0], processed);
    2727    // There may be two memory areas if we are at the physical end of a circular buffer.
    2828    const auto b  = getInputStreamSetBuffer("codeUnitBuffer");
     
    5656        bytePtr = iBuilder->CreateGEP(bytePtr, byteOffset);
    5757
    58         itemsToDo = iBuilder->CreateSub(producerPos[0], processed);
     58        itemsToDo = iBuilder->CreateSub(mAvailableItemCount[0], processed);
    5959        iBuilder->CreateWriteCall(iBuilder->getInt32(1), bytePtr, iBuilder->CreateMul(itemsToDo, itemBytes));
    6060        processed = iBuilder->CreateAdd(processed, itemsToDo);
    61         setProcessedItemCount("codeUnitBuffer", producerPos[0]);
     61        setProcessedItemCount("codeUnitBuffer", mAvailableItemCount[0]);
    6262        iBuilder->CreateBr(stdoutExit);
    6363        iBuilder->SetInsertPoint(stdoutExit);
     
    7171}
    7272
    73 void FileSink::generateInitMethod() {
     73void FileSink::generateInitializeMethod() {
    7474    BasicBlock * setTerminationOnFailure = CreateBasicBlock("setTerminationOnFailure");
    7575    BasicBlock * fileSinkInitExit = CreateBasicBlock("fileSinkInitExit");
     
    9999}
    100100
    101 void FileSink::generateDoSegmentMethod(Value *doFinal, const std::vector<Value *> &producerPos) {
     101void FileSink::generateDoSegmentMethod() {
    102102
    103103    PointerType * i8PtrTy = iBuilder->getInt8PtrTy();
     
    148148        iBuilder->SetInsertPoint(checkFinal);
    149149    }
    150     iBuilder->CreateCondBr(doFinal, closeFile, fileOutExit);
     150    iBuilder->CreateCondBr(mIsFinal, closeFile, fileOutExit);
    151151
    152152    iBuilder->SetInsertPoint(closeFile);
  • icGREP/icgrep-devel/icgrep/kernels/stdout_kernel.h

    r5292 r5418  
    1616    StdOutKernel(IDISA::IDISA_Builder * iBuilder, unsigned codeUnitWidth);
    1717private:
    18     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override final;
     18    void generateDoSegmentMethod() override final;
    1919private:
    2020    const unsigned mCodeUnitWidth;
     
    2727    FileSink(IDISA::IDISA_Builder * iBuilder, unsigned codeUnitWidth);
    2828protected:
    29     void generateInitMethod() override final;
    30     void generateDoSegmentMethod(llvm::Value * doFinal, const std::vector<llvm::Value *> & producerPos) override final;
     29    void generateInitializeMethod() override final;
     30    void generateDoSegmentMethod() override final;
    3131private:
    3232    const unsigned mCodeUnitWidth;
  • icGREP/icgrep-devel/icgrep/kernels/streamset.cpp

    r5411 r5418  
    178178void SourceFileBuffer::setBaseAddress(Value * self, Value * addr) const {
    179179    Value * ptr = iBuilder->CreateGEP(self, {iBuilder->getInt32(0), iBuilder->getInt32(0)});
    180     iBuilder->CreateStore(addr, ptr);
     180    iBuilder->CreateStore(iBuilder->CreatePointerCast(addr, ptr->getType()->getPointerElementType()), ptr);
    181181}
    182182
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.cpp

    r5414 r5418  
    2929#include <kernels/pipeline.h>
    3030#include <kernels/kernel.h>
    31 #ifdef CUDA_ENABLED
    32 #include <IR_Gen/llvm2ptx.h>
    33 #endif
     31#include <sys/stat.h>
    3432
    3533using namespace llvm;
     
    229227}
    230228
    231 
    232229void ParabixDriver::addKernelCall(kernel::KernelBuilder & kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
    233     assert (mModuleMap.count(&kb) == 0);
     230    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb.getModule() == nullptr));
    234231    mPipeline.push_back(&kb);
    235     mModuleMap.emplace(&kb, kb.createKernelStub(inputs, outputs));
     232    kb.createKernelStub(inputs, outputs);
    236233}
    237234
    238235void ParabixDriver::makeKernelCall(kernel::KernelBuilder * kb, const std::vector<parabix::StreamSetBuffer *> & inputs, const std::vector<parabix::StreamSetBuffer *> & outputs) {
    239     assert (mModuleMap.count(kb) == 0);
     236    assert ("addKernelCall or makeKernelCall was already run on this kernel." && (kb->getModule() == nullptr));
    240237    mPipeline.push_back(kb);
    241     mModuleMap.emplace(kb, kb->createKernelStub(inputs, outputs));
     238    kb->createKernelStub(inputs, outputs);
    242239}
    243240
     
    272269    }
    273270    for (const auto & k : mPipeline) {
    274         k->terminateInstance();
    275     }
    276 }
    277 
    278 void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, FunctionType *type, void * functionPtr) const {
    279     const auto f = mModuleMap.find(&kb);
    280     assert ("addKernelCall(kb, ...) must be called before addExternalLink(kb, ...)" && f != mModuleMap.end());
    281     mEngine->addGlobalMapping(cast<Function>(f->second->getOrInsertFunction(name, type)), functionPtr);
     271        k->finalizeInstance();
     272    }
     273}
     274
     275void ParabixDriver::addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, FunctionType * type, void * functionPtr) const {
     276    assert ("addKernelCall or makeKernelCall must be called before addExternalLink" && (kb.getModule() != nullptr));
     277    mEngine->addGlobalMapping(cast<Function>(kb.getModule()->getOrInsertFunction(name, type)), functionPtr);
     278}
     279
     280uint64_t file_size(const uint32_t fd) {
     281    struct stat st;
     282    if (LLVM_UNLIKELY(fstat(fd, &st) != 0)) {
     283        st.st_size = 0;
     284    }
     285    return st.st_size;
    282286}
    283287
     
    322326    #endif
    323327
     328    FunctionType * fileSizeType = FunctionType::get(iBuilder->getInt64Ty(), { iBuilder->getInt32Ty() });
     329    mEngine->addGlobalMapping(cast<Function>(mMainModule->getOrInsertFunction("file_size", fileSizeType)), (void *)&file_size);
     330
    324331    PM.run(*m);
    325     for (auto pair : mModuleMap) {
    326         kernel::KernelBuilder * const kb = std::get<0>(pair);
    327         m = std::get<1>(pair);
     332    for (kernel::KernelBuilder * const kb : mPipeline) {
     333        m = kb->getModule();
    328334        bool uncachedObject = true;
    329335        if (mCache) {
     
    352358    } catch (...) { m->dump(); throw; }
    353359    #endif
    354     mModuleMap.clear();
    355360}
    356361
  • icGREP/icgrep-devel/icgrep/kernels/toolchain.h

    r5414 r5418  
    1010#include <IR_Gen/idisa_builder.h>
    1111#include <llvm/IR/TypeBuilder.h>
     12#include <kernels/kernel.h>
     13#include <kernels/streamset.h>
    1214#include <boost/container/flat_map.hpp>
    1315
     
    1921namespace IDISA { class IDISA_Builder; }
    2022namespace kernel { class KernelBuilder; }
    21 //namespace parabix { class StreamSetBuffer; }
    22 #include <kernels/streamset.h>
     23
    2324class ParabixObjectCache;
    2425
     
    8283    void addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, ExternalFunctionType * functionPtr) const;
    8384
    84     void addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
    85 
    8685    void linkAndFinalize();
    8786   
    8887    void * getPointerToMain();
     88
     89private:
     90
     91
     92    void addExternalLink(kernel::KernelBuilder & kb, llvm::StringRef name, llvm::FunctionType * type, void * functionPtr) const;
     93
    8994
    9095private:
     
    98103    std::vector<std::unique_ptr<kernel::KernelBuilder>> mOwnedKernels;
    99104    std::vector<std::unique_ptr<parabix::StreamSetBuffer>> mOwnedBuffers;
    100     ModuleMap                               mModuleMap;
    101105};
    102106
  • icGREP/icgrep-devel/icgrep/pablo/pablo_kernel.cpp

    r5398 r5418  
    1010#include <pablo/pe_zeroes.h>
    1111#include <pablo/pe_ones.h>
     12#include <pablo/pablo_toolchain.h>
    1213#include "llvm/Support/Debug.h"
    1314
     
    168169        result->setScalar();
    169170    }
     171    if (DebugOptionIsSet(DumpTrace)) {
     172        setName(getName() + "_DumpTrace");
     173    }
    170174}
    171175
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5308 r5418  
    644644    RE * propValueRe = RE_Parser::parse("^" + regexValue + "$", fModeFlagSet, mReSyntax);
    645645    GrepEngine engine;
    646     engine.grepCodeGen("NamePattern", propValueRe, false, false, GrepType::PropertyValue);
     646    engine.grepCodeGen("NamePattern", { propValueRe }, false, false, GrepSource::Internal, GrepType::PropertyValue);
    647647    const auto matches = engine.grepPropertyValues(propName);
    648648    if (matches.empty()) {
     
    677677   
    678678    GrepEngine engine;
    679     engine.grepCodeGen("NamePattern", embedded, false, false, GrepType::NameExpression);
     679    engine.grepCodeGen("NamePattern", { embedded }, false, false, GrepSource::Internal, GrepType::NameExpression);
    680680    CC * codepoints = engine.grepCodepoints();
    681681   
  • icGREP/icgrep-devel/icgrep/u8u16.cpp

    r5414 r5418  
    2424#include <pablo/pe_zeroes.h>
    2525#include <kernels/toolchain.h>
    26 #include <boost/iostreams/device/mapped_file.hpp>  // for mapped_file_source
    27 #include <boost/filesystem.hpp>
    28 #include <boost/interprocess/anonymous_shared_memory.hpp>
    2926#include "kernels/streamset.h"                     // for CircularBuffer
    3027#include <kernels/pipeline.h>
     
    3633#include "llvm/Support/Compiler.h"                 // for LLVM_UNLIKELY
    3734#include <pablo/builder.hpp>                       // for PabloBuilder
     35#include <boost/interprocess/anonymous_shared_memory.hpp>
     36#include <boost/interprocess/mapped_region.hpp>
    3837#include <iostream>
    3938
     
    271270    assert (iBuilder);
    272271
    273     Type * const size_ty = iBuilder->getSizeTy();
    274272    Type * const voidTy = iBuilder->getVoidTy();
    275273    Type * const bitBlockType = iBuilder->getBitBlockType();
    276     Type * const inputType = ArrayType::get(ArrayType::get(bitBlockType, 8), 1)->getPointerTo();
    277274    Type * const outputType = ArrayType::get(ArrayType::get(bitBlockType, 16), 1)->getPointerTo();
    278    
    279     Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, inputType, outputType, size_ty, nullptr));
     275
     276    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, iBuilder->getInt32Ty(), outputType, nullptr));
    280277    main->setCallingConv(CallingConv::C);
    281278    Function::arg_iterator args = main->arg_begin();
    282    
    283     Value * const inputStream = &*(args++);
    284     inputStream->setName("inputStream");
     279
     280    Value * const fileDecriptor = &*(args++);
     281    fileDecriptor->setName("fileDecriptor");
    285282    Value * const outputStream = &*(args++);
    286283    outputStream->setName("outputStream");
    287     Value * const fileSize = &*(args++);
    288     fileSize->setName("fileSize");
    289284
    290285    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
    291286   
    292287    // File data from mmap
    293     StreamSetBuffer * ByteStream = pxDriver.addExternalBuffer(make_unique<ExternalFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)), inputStream);
     288    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
    294289   
    295290    KernelBuilder * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder, segmentSize));
    296     mmapK->setInitialArguments({fileSize});
     291    mmapK->setInitialArguments({fileDecriptor});
    297292    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    298293   
     
    371366    assert (iBuilder);
    372367   
    373     Type * const size_ty = iBuilder->getSizeTy();
    374368    Type * const voidTy = iBuilder->getVoidTy();
    375369    Type * const bitBlockType = iBuilder->getBitBlockType();
    376     Type * const inputType = ArrayType::get(ArrayType::get(bitBlockType, 8), 1)->getPointerTo();
    377370    Type * const outputType = ArrayType::get(ArrayType::get(bitBlockType, 16), 1)->getPointerTo();
    378371   
    379     Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, inputType, outputType, size_ty, nullptr));
     372    Function * const main = cast<Function>(mod->getOrInsertFunction("Main", voidTy, iBuilder->getInt32Ty(), outputType, nullptr));
    380373    main->setCallingConv(CallingConv::C);
    381374    Function::arg_iterator args = main->arg_begin();
    382375   
    383     Value * const inputStream = &*(args++);
    384     inputStream->setName("inputStream");
     376    Value * const fileDecriptor = &*(args++);
     377    fileDecriptor->setName("fileDecriptor");
    385378    Value * const outputStream = &*(args++);
    386379    outputStream->setName("outputStream");
    387     Value * const fileSize = &*(args++);
    388     fileSize->setName("fileSize");
    389    
     380
    390381    iBuilder->SetInsertPoint(BasicBlock::Create(mod->getContext(), "entry", main,0));
    391382
    392383    // File data from mmap
    393     StreamSetBuffer * ByteStream = pxDriver.addExternalBuffer(make_unique<ExternalFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)), inputStream);
     384    StreamSetBuffer * ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
    394385   
    395386    KernelBuilder * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder, segmentSize));
    396     mmapK->setInitialArguments({fileSize});
     387    mmapK->setInitialArguments({fileDecriptor});
    397388    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    398389   
     
    443434
    444435
    445 typedef void (*u8u16FunctionType)(char * byte_data, char * output_data, size_t filesize);
     436typedef void (*u8u16FunctionType)(uint32_t fd, char * output_data);
    446437
    447438u8u16FunctionType u8u16CodeGen(void) {
     
    465456}
    466457
     458size_t file_size(const int fd) {
     459    struct stat st;
     460    if (LLVM_UNLIKELY(fstat(fd, &st) != 0)) {
     461        st.st_size = 0;
     462    }
     463    return st.st_size;
     464}
     465
    467466void u8u16(u8u16FunctionType fn_ptr, const std::string & fileName) {
    468 
    469     const boost::filesystem::path file(fileName);
    470     if (exists(file)) {
    471         if (is_directory(file)) {
    472             return;
     467    const int fd = open(fileName.c_str(), O_RDONLY);
     468    if (LLVM_UNLIKELY(fd == -1)) {
     469        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
     470    } else {
     471        const auto fileSize = file_size(fd);
     472        if (mMapBuffering) {
     473            boost::interprocess::mapped_region outputBuffer(boost::interprocess::anonymous_shared_memory(2 * fileSize));
     474            outputBuffer.advise(boost::interprocess::mapped_region::advice_willneed);
     475            outputBuffer.advise(boost::interprocess::mapped_region::advice_sequential);
     476            fn_ptr(fd, static_cast<char*>(outputBuffer.get_address()));
     477        } else if (memAlignBuffering) {
     478            char * outputBuffer;
     479            const auto r = posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2 * fileSize);
     480            if (LLVM_UNLIKELY(r != 0)) {
     481                throw std::runtime_error("posix_memalign failed with return code " + std::to_string(r));
     482            }
     483            fn_ptr(fd, outputBuffer);
     484            free(reinterpret_cast<void *>(outputBuffer));
     485        } else { /* No external output buffer */
     486            fn_ptr(fd, nullptr);
    473487        }
    474     } else {
    475         std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
    476         return;
    477     }
    478    
    479     size_t fileSize = file_size(file);
    480     boost::iostreams::mapped_file_source input;
    481 
    482     char * fileBuffer = nullptr;
    483     if (fileSize) {
    484         try {
    485             input.open(fileName);
    486             fileBuffer = const_cast<char *>(input.data());
    487         } catch (std::exception & e) {
    488             throw std::runtime_error("Boost mmap error: " + fileName + ": " + e.what());
    489         }       
    490     }
    491 
    492     if (mMapBuffering) {
    493         boost::interprocess::mapped_region outputBuffer(boost::interprocess::anonymous_shared_memory(2*fileSize));
    494         outputBuffer.advise(boost::interprocess::mapped_region::advice_willneed);
    495         outputBuffer.advise(boost::interprocess::mapped_region::advice_sequential);
    496         fn_ptr(fileBuffer, static_cast<char*>(outputBuffer.get_address()), fileSize);
    497     } else if (memAlignBuffering) {
    498         char * outputBuffer;
    499         const auto r = posix_memalign(reinterpret_cast<void **>(&outputBuffer), 32, 2*fileSize);
    500         if (LLVM_UNLIKELY(r != 0)) {
    501             throw std::runtime_error("posix_memalign failed with return code " + std::to_string(r));
    502         }
    503         fn_ptr(fileBuffer, outputBuffer, fileSize);
    504         free(reinterpret_cast<void *>(outputBuffer));
    505     } else {
    506         /* No external output buffer */
    507         fn_ptr(fileBuffer, nullptr, fileSize);
    508     }
    509     input.close();
    510    
     488        close(fd);
     489    }
    511490}
    512491
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5414 r5418  
    2525#include <pablo/pablo_compiler.h>
    2626#include <pablo/pablo_toolchain.h>
    27 #include <boost/filesystem.hpp>
    28 #include <boost/iostreams/device/mapped_file.hpp>
    29 
     27#include <fcntl.h>
    3028
    3129using namespace llvm;
     
    135133
    136134
    137 typedef void (*wcFunctionType)(char * byte_data, size_t filesize, size_t fileIdx);
     135typedef void (*WordCountFunctionType)(uint32_t fd, size_t fileIdx);
    138136
    139137void wcPipelineGen(ParabixDriver & pxDriver) {
     
    142140    Module * m = iBuilder->getModule();
    143141   
    144     Type * mBitBlockType = iBuilder->getBitBlockType();
    145     Constant * record_counts_routine;
    146     Type * const size_ty = iBuilder->getSizeTy();
     142    Type * const int32Ty = iBuilder->getInt32Ty();
     143    Type * const sizeTy = iBuilder->getSizeTy();
    147144    Type * const voidTy = iBuilder->getVoidTy();
    148     record_counts_routine = m->getOrInsertFunction("record_counts", voidTy, size_ty, size_ty, size_ty, size_ty, size_ty, nullptr);
    149     Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(mBitBlockType, 8), 1), 0);
    150    
    151     Function * const main = cast<Function>(m->getOrInsertFunction("Main", voidTy, inputType, size_ty, size_ty, nullptr));
     145
     146    FunctionType * const recordCountsType = FunctionType::get(voidTy, {sizeTy, sizeTy, sizeTy, sizeTy, sizeTy}, false);
     147    Constant * const recordCounts = m->getOrInsertFunction("record_counts", recordCountsType);
     148
     149    FunctionType * const mainType = FunctionType::get(voidTy, {int32Ty, sizeTy}, false);
     150    Function * const main = cast<Function>(m->getOrInsertFunction("Main", mainType));
    152151    main->setCallingConv(CallingConv::C);
    153     Function::arg_iterator args = main->arg_begin();
    154    
    155     Value * const inputStream = &*(args++);
    156     inputStream->setName("input");
    157     Value * const fileSize = &*(args++);
    158     fileSize->setName("fileSize");
     152    Function::arg_iterator args = main->arg_begin();   
     153    Value * const fileDecriptor = &*(args++);
     154    fileDecriptor->setName("fileDecriptor");
    159155    Value * const fileIdx = &*(args++);
    160156    fileIdx->setName("fileIdx");
     157
    161158    iBuilder->SetInsertPoint(BasicBlock::Create(m->getContext(), "entry", main,0));
    162159
    163     StreamSetBuffer * ByteStream = pxDriver.addExternalBuffer(make_unique<ExternalFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)), inputStream);
    164 
    165     StreamSetBuffer * BasisBits = pxDriver.addBuffer(make_unique<SingleBlockBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1)));
     160    StreamSetBuffer * const ByteStream = pxDriver.addBuffer(make_unique<SourceFileBuffer>(iBuilder, iBuilder->getStreamSetTy(1, 8)));
     161
     162    StreamSetBuffer * const BasisBits = pxDriver.addBuffer(make_unique<SingleBlockBuffer>(iBuilder, iBuilder->getStreamSetTy(8, 1)));
    166163
    167164    KernelBuilder * mmapK = pxDriver.addKernelInstance(make_unique<MMapSourceKernel>(iBuilder));
    168     mmapK->setInitialArguments({fileSize});
     165    mmapK->setInitialArguments({fileDecriptor});
    169166    pxDriver.makeKernelCall(mmapK, {}, {ByteStream});
    170167
     
    178175    pxDriver.generatePipelineIR();
    179176   
    180     Value * lineCount = wck->createGetAccumulatorCall("lineCount");
    181     Value * wordCount = wck->createGetAccumulatorCall("wordCount");
    182     Value * charCount = wck->createGetAccumulatorCall("charCount");
    183 
    184     iBuilder->CreateCall(record_counts_routine, std::vector<Value *>({lineCount, wordCount, charCount, fileSize, fileIdx}));
     177    Value * const fileSize = mmapK->getAccumulator("fileSize");
     178    Value * const lineCount = wck->getAccumulator("lineCount");
     179    Value * const wordCount = wck->getAccumulator("wordCount");
     180    Value * const charCount = wck->getAccumulator("charCount");
     181
     182    iBuilder->CreateCall(recordCounts, {lineCount, wordCount, charCount, fileSize, fileIdx});
    185183   
    186184    iBuilder->CreateRetVoid();
     
    190188
    191189
    192 wcFunctionType wcCodeGen(void) {
     190WordCountFunctionType wcCodeGen() {
    193191    Module * M = new Module("wc", getGlobalContext());
    194192    IDISA::IDISA_Builder * idb = IDISA::GetIDISA_Builder(M);
     
    197195    wcPipelineGen(pxDriver);
    198196
    199     wcFunctionType main = reinterpret_cast<wcFunctionType>(pxDriver.getPointerToMain());
     197    WordCountFunctionType main = reinterpret_cast<WordCountFunctionType>(pxDriver.getPointerToMain());
    200198    delete idb;
    201199    return main;
    202200}
    203201
    204 void wc(wcFunctionType fn_ptr, const int64_t fileIdx) {
     202void wc(WordCountFunctionType fn_ptr, const int64_t fileIdx) {
    205203    std::string fileName = inputFiles[fileIdx];
    206     size_t fileSize;
    207     char * fileBuffer;
    208    
    209     const boost::filesystem::path file(fileName);
    210     if (exists(file)) {
    211         if (is_directory(file)) {
    212             return;
    213         }
     204    const int fd = open(fileName.c_str(), O_RDONLY);
     205    if (LLVM_UNLIKELY(fd == -1)) {
     206        std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
    214207    } else {
    215         std::cerr << "Error: cannot open " << fileName << " for processing. Skipped.\n";
    216         return;
    217     }
    218    
    219     fileSize = file_size(file);
    220     boost::iostreams::mapped_file_source mappedFile;
    221     if (fileSize == 0) {
    222         fileBuffer = nullptr;
    223     }
    224     else {
    225         try {
    226             mappedFile.open(fileName);
    227         } catch (std::exception &e) {
    228             std::cerr << "Error: Boost mmap of " << fileName << ": " << e.what() << std::endl;
    229             return;
    230         }
    231         fileBuffer = const_cast<char *>(mappedFile.data());
    232     }
    233     fn_ptr(fileBuffer, fileSize, fileIdx);
    234 
    235     mappedFile.close();
    236    
    237 }
    238 
    239 
     208        fn_ptr(fd, fileIdx);
     209        close(fd);
     210    }
     211}
    240212
    241213int main(int argc, char *argv[]) {
     
    247219        CountWords = true;
    248220        CountBytes = true;
    249     }
    250     else {
     221    } else {
    251222        CountLines = false;
    252223        CountWords = false;
     
    263234    }
    264235   
    265    
    266     wcFunctionType fn_ptr = wcCodeGen();
    267 
    268     int fileCount = inputFiles.size();
     236    WordCountFunctionType wordCountFunctionPtr = wcCodeGen();
     237
     238    const auto fileCount = inputFiles.size();
    269239    lineCount.resize(fileCount);
    270240    wordCount.resize(fileCount);
     
    272242    byteCount.resize(fileCount);
    273243   
    274     for (unsigned i = 0; i < inputFiles.size(); ++i) {
    275         wc(fn_ptr, i);
     244    for (unsigned i = 0; i < fileCount; ++i) {
     245        wc(wordCountFunctionPtr, i);
    276246    }
    277247   
Note: See TracChangeset for help on using the changeset viewer.