Ignore:
Timestamp:
Mar 29, 2017, 2:29:52 PM (2 years ago)
Author:
nmedfort
Message:

Support for stdin. Needs more testing.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5369 r5377  
    2424#include <kernels/scanmatchgen.h>
    2525#include <kernels/streamset.h>
    26 #include <kernels/interface.h>
     26#include <kernels/stdin_kernel.h>
    2727#include <pablo/pablo_compiler.h>
    2828#include <pablo/pablo_kernel.h>
     
    3434#include <sstream>
    3535#include <cc/multiplex_CCs.h>
     36
     37#include <llvm/Support/raw_ostream.h>
     38
    3639#ifdef CUDA_ENABLED
    3740#include <IR_Gen/CudaDriver.h>
     
    4043#include <util/aligned_allocator.h>
    4144
    42 
    4345using namespace parabix;
    4446using namespace llvm;
     
    5759static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
    5860static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
     61
     62/// iNVESTIGATE: icgrep is reporting stdin is not empty even when nothing is being piped into it?
     63static cl::opt<bool> UseStdIn("stdin", cl::desc("Read from standard input."), cl::cat(bGrepOutputOptions));
    5964
    6065bool isUTF_16 = false;
     
    7176#endif
    7277
    73 void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly, bool UTF_16) {
     78void GrepEngine::doGrep(const std::string & fileName, const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly) {
    7479    boost::filesystem::path file(fileName);
    7580    if (exists(file)) {
     
    146151}
    147152
     153void GrepEngine::doGrep(const int fileIdx, bool CountOnly, std::vector<size_t> & total_CountOnly) {
     154    if (CountOnly) {
     155        total_CountOnly[fileIdx] = mGrepFunction_CountOnly(nullptr, 0, fileIdx);
     156    } else {
     157        mGrepFunction(nullptr, 0, fileIdx);
     158    }
     159}
    148160
    149161Function * generateGPUKernel(Module * m, IDISA::IDISA_Builder * iBuilder, bool CountOnly){
     
    223235    const unsigned segmentSize = codegen::SegmentSize;
    224236   
     237    ExternalFileBuffer InputStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
     238    InputStream.setStreamSetBuffer(inputStream);
     239
    225240    ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1));
    226     MatchResults.setStreamSetBuffer(rsltStream, fileSize);
     241    MatchResults.setStreamSetBuffer(rsltStream);
    227242
    228243    kernel::MMapSourceKernel mmapK1(iBuilder, segmentSize);
     
    232247
    233248    ExternalFileBuffer LineBreak(iBuilder, iBuilder->getStreamSetTy(1, 1));
    234     LineBreak.setStreamSetBuffer(lbStream, fileSize);
     249    LineBreak.setStreamSetBuffer(lbStream);
    235250   
    236251    kernel::MMapSourceKernel mmapK2(iBuilder, segmentSize);
     
    239254    mmapK2.setInitialArguments({fileSize});
    240255
    241     kernel::ScanMatchKernel scanMatchK(iBuilder, grepType);
    242     scanMatchK.generateKernel({&MatchResults, &LineBreak}, {});
    243     scanMatchK.setInitialArguments({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     256    kernel::ScanMatchKernel scanMatchK(iBuilder, grepType, 8);
     257    scanMatchK.generateKernel({&InputStream, &MatchResults, &LineBreak}, {});
     258    scanMatchK.setInitialArguments({fileIdx});
    244259   
    245260    generatePipeline(iBuilder, {&mmapK1, &mmapK2, &scanMatchK});
     
    261276    mGrepType = grepType;
    262277
    263     Type * const size_ty = iBuilder->getSizeTy();
    264     Type * const int8PtrTy = iBuilder->getInt8PtrTy();
     278    Type * const sizeTy = iBuilder->getSizeTy();
    265279    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), 0);
    266     Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
    267 
    268     Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, size_ty, size_ty, nullptr));
     280    Type * const resultTy = CountOnly ? sizeTy : iBuilder->getVoidTy();
     281
     282    Function * mainFn = cast<Function>(M->getOrInsertFunction("Main", resultTy, inputType, sizeTy, sizeTy, nullptr));
    269283    mainFn->setCallingConv(CallingConv::C);
    270284    iBuilder->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFn, 0));
     
    278292    fileIdx->setName("fileIdx");
    279293
    280     ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));   
     294    StreamSetBuffer * byteStream = nullptr;
     295    kernel::KernelBuilder * sourceK = nullptr;
     296//    if (usingStdIn) {
     297//        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     298//        cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
     299//        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
     300//        sourceK->generateKernel({}, {byteStream});
     301//    } else {
     302        byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     303        cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
     304        sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
     305        sourceK->generateKernel({}, {byteStream});
     306        sourceK->setInitialArguments({fileSize});
     307//    }
     308
    281309    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    282     ByteStream.setStreamSetBuffer(inputStream, fileSize);
    283310    BasisBits.allocateBuffer();
    284    
    285     kernel::MMapSourceKernel mmapK(iBuilder, segmentSize);
    286     mmapK.generateKernel({}, {&ByteStream});
    287     mmapK.setInitialArguments({fileSize});
    288311
    289312    kernel::S2PKernel  s2pk(iBuilder);
    290     s2pk.generateKernel({&ByteStream}, {&BasisBits});
     313    s2pk.generateKernel({byteStream}, {&BasisBits});
    291314   
    292315    std::vector<pablo::PabloKernel *> icgrepKs;
     
    304327
    305328    std::vector<kernel::KernelBuilder *> KernelList;
    306     KernelList.push_back(&mmapK);
     329    KernelList.push_back(sourceK);
    307330    KernelList.push_back(&s2pk);
    308331
     
    335358
    336359    } else {
    337         kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType);
    338         scanMatchK.generateKernel({&mergedResults, &LineBreakStream}, {});               
    339         scanMatchK.setInitialArguments({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     360        kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType, encodingBits);
     361        scanMatchK.generateKernel({byteStream, &mergedResults, &LineBreakStream}, {});
     362        scanMatchK.setInitialArguments({fileIdx});
    340363
    341364        KernelList.push_back(&scanMatchK);
     
    356379    mEngine->finalizeObject();
    357380    delete iBuilder;
    358    
     381    delete sourceK;
     382    delete byteStream;
     383
    359384    if (CountOnly) {
    360385        mGrepFunction_CountOnly = reinterpret_cast<GrepFunctionType_CountOnly>(mEngine->getPointerToFunction(mainFn));
     
    365390}
    366391
    367 void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, GrepType grepType) {
     392void GrepEngine::grepCodeGen(std::string moduleName, re::RE * re_ast, bool CountOnly, bool UTF_16, GrepType grepType, const bool usingStdIn) {
    368393    isUTF_16 = UTF_16;
    369394    int addrSpace = 0;
     
    403428
    404429    Type * const size_ty = iBuilder->getSizeTy();
    405     Type * const int8PtrTy = iBuilder->getInt8PtrTy();
    406430    Type * const inputType = PointerType::get(ArrayType::get(ArrayType::get(iBuilder->getBitBlockType(), encodingBits), 1), addrSpace);
    407431    Type * const resultTy = CountOnly ? size_ty : iBuilder->getVoidTy();
     
    453477        fileIdx = &*(args++);
    454478        fileIdx->setName("fileIdx");
    455     }
    456        
    457     ExternalFileBuffer ByteStream(iBuilder, iBuilder->getStreamSetTy(1, 8));
    458    
    459     kernel::MMapSourceKernel mmapK(iBuilder, segmentSize);
    460     mmapK.generateKernel({}, {&ByteStream});
    461     mmapK.setInitialArguments({fileSize});
     479
     480    } 
     481
     482    StreamSetBuffer * byteStream = nullptr;
     483    kernel::KernelBuilder * sourceK = nullptr;
     484    if (usingStdIn) {
     485        byteStream = new ExtensibleBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8), segmentSize * bufferSegments);
     486        cast<ExtensibleBuffer>(byteStream)->allocateBuffer();
     487        sourceK = new kernel::StdInKernel(iBuilder, segmentSize);
     488        sourceK->generateKernel({}, {byteStream});
     489    } else {
     490        byteStream = new ExternalFileBuffer(iBuilder, iBuilder->getStreamSetTy(1, 8));
     491        cast<ExternalFileBuffer>(byteStream)->setStreamSetBuffer(inputStream);
     492        sourceK = new kernel::MMapSourceKernel(iBuilder, segmentSize);
     493        sourceK->generateKernel({}, {byteStream});
     494        sourceK->setInitialArguments({fileSize});
     495    }
    462496   
    463497    CircularBuffer BasisBits(iBuilder, iBuilder->getStreamSetTy(8), segmentSize * bufferSegments);
    464498
    465499    kernel::S2PKernel  s2pk(iBuilder);
    466     s2pk.generateKernel({&ByteStream}, {&BasisBits});
     500    s2pk.generateKernel({byteStream}, {&BasisBits});
    467501   
    468502    kernel::LineBreakKernelBuilder linebreakK(iBuilder, "lb", encodingBits);
     
    476510    pablo_function_passes(&icgrepK);
    477511
    478     ByteStream.setStreamSetBuffer(inputStream, fileSize);
     512
    479513    BasisBits.allocateBuffer();
    480514
    481515    if (CountOnly) {
    482516        icgrepK.generateKernel({&BasisBits, &LineBreakStream}, {});
    483         generatePipeline(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK});
     517        generatePipeline(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK});
    484518        iBuilder->CreateRet(icgrepK.createGetAccumulatorCall(icgrepK.getInstance(), "matchedLineCount"));
    485519    } else {
     
    487521        if (codegen::NVPTX){
    488522            ExternalFileBuffer MatchResults(iBuilder, iBuilder->getStreamSetTy(1, 1), addrSpace);
    489             MatchResults.setStreamSetBuffer(outputStream, fileSize);
     523            MatchResults.setStreamSetBuffer(outputStream);
    490524
    491525            icgrepK.generateKernel({&BasisBits, &LineBreakStream},  {&MatchResults});
    492             generatePipelineLoop(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK});
     526            generatePipelineLoop(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK});
    493527
    494528        }
     
    500534            icgrepK.generateKernel({&BasisBits, &LineBreakStream}, {&MatchResults});
    501535
    502             kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType);
    503             scanMatchK.generateKernel({&MatchResults, &LineBreakStream}, {});               
    504             scanMatchK.setInitialArguments({iBuilder->CreateBitCast(inputStream, int8PtrTy), fileSize, fileIdx});
     536            kernel::ScanMatchKernel scanMatchK(iBuilder, mGrepType, encodingBits);
     537            scanMatchK.generateKernel({byteStream, &MatchResults, &LineBreakStream}, {});
     538            scanMatchK.setInitialArguments({fileIdx});
    505539           
    506             generatePipeline(iBuilder, {&mmapK, &s2pk, &linebreakK, &icgrepK, &scanMatchK});
     540            generatePipeline(iBuilder, {sourceK, &s2pk, &linebreakK, &icgrepK, &scanMatchK});
    507541        }
    508542        iBuilder->CreateRetVoid();
     
    537571    mEngine->finalizeObject();
    538572    delete iBuilder;
     573    delete sourceK;
     574    delete byteStream;
    539575   
    540576    if (CountOnly) {
     
    581617static std::vector<std::string> inputFiles;
    582618
    583 void initResult(std::vector<std::string> filenames){
     619void initFileResult(std::vector<std::string> filenames){
    584620    const int n = filenames.size();
    585621    if (n > 1) {
     
    595631}
    596632
    597 extern "C" {
    598     void wrapped_report_match(size_t lineNum, size_t line_start, size_t line_end, const char * buffer, size_t filesize, int fileIdx) {
    599         assert (buffer);
    600 #ifdef CUDA_ENABLED
    601     if (codegen::NVPTX){
    602         while(line_start>startPoints[blockNo]) blockNo++;
    603         line_start -= accumBytes[blockNo-1];
    604         line_end -= accumBytes[blockNo-1];
    605     }
    606 #endif
    607         int index = isUTF_16 ? 2 : 1;
    608         int idx = fileIdx;
    609          
    610         if (ShowFileNames) {
    611             resultStrs[idx] << inputFiles[idx] << ':';
    612         }
    613         if (ShowLineNumbers) {
    614             resultStrs[idx] << lineNum << ":";
    615         }
    616        
    617         if ((!isUTF_16 && buffer[line_start] == 0xA) && (line_start != line_end)) {
    618             // The line "starts" on the LF of a CRLF.  Really the end of the last line.
    619             line_start++;
    620         }
    621         if (((isUTF_16 && buffer[line_start] == 0x0) && buffer[line_start + 1] == 0xA) && (line_start != line_end)) {
    622             // The line "starts" on the LF of a CRLF.  Really the end of the last line.
    623             line_start += 2;
    624         }
    625         if (line_end == filesize) {
    626             // The match position is at end-of-file.   We have a final unterminated line.
    627             resultStrs[idx].write(&buffer[line_start * index], (line_end - line_start) * index);
    628             if (NormalizeLineBreaks) {
    629                 resultStrs[idx] << '\n';  // terminate it
     633template<typename CodeUnit>
     634void wrapped_report_match(const size_t lineNum, size_t line_start, size_t line_end, const CodeUnit * const buffer, const size_t filesize, const int fileIdx) {
     635    assert (buffer);
     636    assert (line_start <= line_end);
     637    assert (line_end < filesize);
     638#ifdef CUDA_ENABLED
     639if (codegen::NVPTX){
     640    while(line_start>startPoints[blockNo]) blockNo++;
     641    line_start -= accumBytes[blockNo-1];
     642    line_end -= accumBytes[blockNo-1];
     643}
     644#endif
     645
     646    if (ShowFileNames) {
     647        resultStrs[fileIdx] << inputFiles[fileIdx] << ':';
     648    }
     649    if (ShowLineNumbers) {
     650        resultStrs[fileIdx] << lineNum << ":";
     651    }
     652
     653    // If the line "starts" on the LF of a CRLF, it is actually the end of the last line.
     654    if ((buffer[line_start] == 0xA) && (line_start != line_end)) {
     655        ++line_start;
     656    }
     657
     658    if (LLVM_UNLIKELY(line_end == filesize)) {
     659        // The match position is at end-of-file.   We have a final unterminated line.
     660        resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
     661        if (NormalizeLineBreaks) {
     662            resultStrs[fileIdx] << '\n';  // terminate it
     663        }
     664    } else {
     665        const auto end_byte = buffer[line_end];
     666        if (NormalizeLineBreaks) {
     667            if (LLVM_UNLIKELY(end_byte == 0x85)) {
     668                // Line terminated with NEL, on the second byte.  Back up 1.
     669                line_end -= 1;
     670            } else if (LLVM_UNLIKELY(end_byte > 0xD)) {
     671                // Line terminated with PS or LS, on the third byte.  Back up 2.
     672                line_end -= 2;
    630673            }
    631             return;
    632         }
    633         unsigned char end_byte = (unsigned char)buffer[line_end];
    634         unsigned char penult_byte = (unsigned char)(buffer[line_end - 1]);
    635         if (NormalizeLineBreaks) {
    636             if (end_byte == 0x85) {
    637                 // Line terminated with NEL, on the second byte.  Back up 1.
    638                 line_end--;
    639             } else if (end_byte > 0xD) {
    640                 // Line terminated with PS or LS, on the third byte.  Back up 2.
    641                 isUTF_16 ? line_end-- : line_end -= 2;
    642             }
    643             resultStrs[idx].write(&buffer[line_start * index], (line_end - line_start) * index);
    644             resultStrs[idx] << '\n';
     674            resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
     675            resultStrs[fileIdx] << '\n';
    645676        } else {
    646             if ((!isUTF_16 && end_byte == 0x0D) || (isUTF_16 && (end_byte == 0x0D && penult_byte == 0x0))) {
    647                 // Check for line_end on first byte of CRLF;  note that we don't
    648                 // want to access past the end of buffer.
    649                 if (line_end + 1 < filesize) {
    650                     if (!isUTF_16 && buffer[line_end + 1] == 0x0A) {
     677            if (end_byte == 0x0D) {
     678                // Check for line_end on first byte of CRLF; we don't want to access past the end of buffer.
     679                if ((line_end + 1) < filesize) {
     680                    if (buffer[line_end + 1] == 0x0A) {
    651681                        // Found CRLF; preserve both bytes.
    652                         line_end++;
    653                     }
    654                     if (isUTF_16 && buffer[line_end + 1] == 0x0 && buffer[line_end + 2] == 0x0A) {
    655                         // Found CRLF; preserve both bytes.
    656                         line_end += 2;
     682                        ++line_end;
    657683                    }
    658684                }
    659685            }
    660             resultStrs[idx].write(&buffer[line_start * index], (line_end - line_start + 1) * index);
     686            resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start + 1) * sizeof(CodeUnit));
    661687        }
    662688    }
     
    684710}
    685711
    686 extern "C" {
    687     void insert_codepoints(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
    688         assert (buffer);
    689         re::codepoint_t c = 0;
    690         ssize_t line_pos = line_start;
    691         while (isxdigit(buffer[line_pos])) {
    692             if (isdigit(buffer[line_pos])) {
    693                 c = (c << 4) | (buffer[line_pos] - '0');
    694             }
    695             else {
    696                 c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
    697             }
    698             line_pos++;
    699         }
    700         assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.       
    701         parsedCodePointSet->insert(c);
    702     }
    703 }
    704 
    705 extern "C" {
    706     void insert_property_values(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
    707         parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
    708     }
     712void insert_codepoints(const size_t lineNum, const size_t line_start, const size_t line_end, const char * const buffer) {
     713    assert (buffer);
     714    assert (line_start <= line_end);
     715    re::codepoint_t c = 0;
     716    size_t line_pos = line_start;
     717    while (isxdigit(buffer[line_pos])) {
     718        assert (line_pos < line_end);
     719        if (isdigit(buffer[line_pos])) {
     720            c = (c << 4) | (buffer[line_pos] - '0');
     721        }
     722        else {
     723            c = (c << 4) | (tolower(buffer[line_pos]) - 'a' + 10);
     724        }
     725        line_pos++;
     726    }
     727    assert(((line_pos - line_start) >= 4) && ((line_pos - line_start) <= 6)); // UCD format 4 to 6 hex digits.
     728    parsedCodePointSet->insert(c);
     729}
     730
     731void insert_property_values(size_t lineNum, size_t line_start, size_t line_end, const char * buffer) {
     732    assert (line_start <= line_end);
     733    parsedPropertyValues.emplace_back(buffer + line_start, buffer + line_end);
    709734}
    710735
     
    717742        if (fnName == "process_block_initialize_carries") continue;
    718743       
    719         if (fnName == "wrapped_report_match") {
    720             e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match);
     744        if (fnName == "wrapped_report_match8") {
     745            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match<uint8_t>);
     746        }
     747        if (fnName == "wrapped_report_match16") {
     748            e->addGlobalMapping(cast<GlobalValue>(it), (void *)&wrapped_report_match<uint16_t>);
    721749        }
    722750        if (fnName == "insert_codepoints") {
Note: See TracChangeset for help on using the changeset viewer.