Changeset 5704


Ignore:
Timestamp:
Oct 21, 2017, 9:13:48 PM (18 months ago)
Author:
cameron
Message:

Further refinement of grep engine

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5703 r5704  
    4646namespace grep {
    4747
    48 
    49 // DoGrep thread function.
    50 void *GrepEngine::DoGrepThreadFunction(void *args) {
    51     size_t fileIdx;
    52     grep::GrepEngine * grepEngine = (grep::GrepEngine *)args;
    53 
    54     grepEngine->count_mutex.lock();
    55     fileIdx = grepEngine->fileCount;
    56     grepEngine->fileCount++;
    57     grepEngine->count_mutex.unlock();
    58 
    59     while (fileIdx < grepEngine->inputFiles.size()) {
    60         size_t grepResult = grepEngine->doGrep(grepEngine->inputFiles[fileIdx], fileIdx);
    61        
    62         grepEngine->count_mutex.lock();
    63         if (grepResult > 0) grepEngine->grepMatchFound = true;
    64         fileIdx = grepEngine->fileCount;
    65         grepEngine->fileCount++;
    66         grepEngine->count_mutex.unlock();
    67         if (QuietMode && grepEngine->grepMatchFound) pthread_exit(nullptr);
    68     }
    69     pthread_exit(nullptr);
    70 }
    71    
    72 bool GrepEngine::searchAllFiles() {
    73    
    74     if (Threads <= 1) {
    75         for (unsigned i = 0; i != inputFiles.size(); ++i) {
    76             size_t grepResult = doGrep(inputFiles[i], i);
    77             if (grepResult > 0) {
    78                 grepMatchFound = true;
    79                 if (QuietMode) break;
    80             }
    81         }
    82     } else if (Threads > 1) {
    83         const unsigned numOfThreads = Threads; // <- convert the command line value into an integer to allow stack allocation
    84         pthread_t threads[numOfThreads];
    85        
    86         for(unsigned long i = 0; i < numOfThreads; ++i){
    87             const int rc = pthread_create(&threads[i], nullptr, DoGrepThreadFunction, (void *)this);
    88             if (rc) {
    89                 llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
    90             }
    91         }
    92         for(unsigned i = 0; i < numOfThreads; ++i) {
    93             void * status = nullptr;
    94             const int rc = pthread_join(threads[i], &status);
    95             if (rc) {
    96                 llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
    97             }
    98         }
    99     }
    100     return grepMatchFound;
    101 }
    102        
     48// Grep Engine construction and initialization.
     49   
     50GrepEngine::GrepEngine() :
     51    mGrepDriver(nullptr),
     52    grepMatchFound(false),
     53    fileCount(0),
     54    mMoveMatchesToEOL(true) {}
     55   
     56GrepEngine::~GrepEngine() {
     57    delete mGrepDriver;
     58}
     59   
     60QuietModeEngine::QuietModeEngine() : GrepEngine() {
     61    mMoveMatchesToEOL = false;
     62}
     63
     64MatchOnlyEngine::MatchOnlyEngine(bool showFilesWithoutMatch) :
     65    GrepEngine(), mRequiredCount(showFilesWithoutMatch) {
     66    mFileSuffix = NullFlag ? std::string("\0", 1) : "\n";
     67    mMoveMatchesToEOL = false;
     68}
     69
     70CountOnlyEngine::CountOnlyEngine() : GrepEngine() {
     71    mFileSuffix = ":";
     72}
     73
     74EmitMatchesEngine::EmitMatchesEngine() : GrepEngine() {
     75    mFileSuffix = InitialTabFlag ? "\t:" : ":";
     76    if (LineRegexpFlag) mMoveMatchesToEOL = false;
     77}
     78
     79void GrepEngine::initFileResult(std::vector<std::string> & filenames) {
     80    const int n = filenames.size();
     81    mResultStrs.resize(n);
     82    inputFiles = filenames;
     83}
     84   
     85
     86// Code Generation
    10387//
    104 //  Default Report Match:  lines are emitted with whatever line terminators are found in the
    105 //  input.  However, if the final line is not terminated, a new line is appended.
     88// All engines share a common pipeline to compute a stream of Matches from a given input Bytestream.
     89
     90std::pair<StreamSetBuffer *, StreamSetBuffer *> GrepEngine::grepPipeline(std::vector<re::RE *> & REs, StreamSetBuffer * ByteStream) {
     91    auto & idb = mGrepDriver->getBuilder();
     92    const unsigned segmentSize = codegen::SegmentSize;
     93    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
     94    const unsigned encodingBits = 8;
     95   
     96    StreamSetBuffer * BasisBits = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), segmentSize * bufferSegments));
     97    kernel::Kernel * s2pk = mGrepDriver->addKernelInstance(make_unique<kernel::S2PKernel>(idb));
     98    mGrepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
     99   
     100    StreamSetBuffer * LineBreakStream = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     101    kernel::Kernel * linebreakK = mGrepDriver->addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
     102    mGrepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
     103   
     104    kernel::Kernel * requiredStreamsK = mGrepDriver->addKernelInstance(make_unique<kernel::RequiredStreams_UTF8>(idb));
     105    StreamSetBuffer * RequiredStreams = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(4, 1), segmentSize * bufferSegments));
     106    mGrepDriver->makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
     107   
     108    const auto n = REs.size();
     109   
     110    std::vector<std::vector<UCD::UnicodeSet>> charclasses(n);
     111   
     112    for (unsigned i = 0; i < n; i++) {
     113        REs[i] = re::resolveNames(REs[i]);
     114        std::vector<UCD::UnicodeSet> UnicodeSets = re::collect_UnicodeSets(REs[i]);
     115        std::vector<std::vector<unsigned>> exclusiveSetIDs;
     116        doMultiplexCCs(UnicodeSets, exclusiveSetIDs, charclasses[i]);
     117        REs[i] = multiplex(REs[i], UnicodeSets, exclusiveSetIDs);
     118    }
     119   
     120    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
     121   
     122    for(unsigned i = 0; i < n; ++i){
     123        const auto numOfCharacterClasses = charclasses[i].size();
     124        StreamSetBuffer * CharClasses = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), segmentSize * bufferSegments));
     125        kernel::Kernel * ccK = mGrepDriver->addKernelInstance(make_unique<kernel::CharClassesKernel>(idb, std::move(charclasses[i])));
     126        mGrepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
     127        StreamSetBuffer * MatchResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     128        kernel::Kernel * icgrepK = mGrepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i], numOfCharacterClasses));
     129        mGrepDriver->makeKernelCall(icgrepK, {CharClasses, LineBreakStream, RequiredStreams}, {MatchResults});
     130        MatchResultsBufs[i] = MatchResults;
     131    }
     132    StreamSetBuffer * MergedResults = MatchResultsBufs[0];
     133    if (REs.size() > 1) {
     134        MergedResults = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     135        kernel::Kernel * streamsMergeK = mGrepDriver->addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
     136        mGrepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
     137    }
     138    StreamSetBuffer * Matches = MergedResults;
     139   
     140    if (mMoveMatchesToEOL) {
     141        StreamSetBuffer * OriginalMatches = Matches;
     142        kernel::Kernel * matchedLinesK = mGrepDriver->addKernelInstance(make_unique<kernel::MatchedLinesKernel>(idb));
     143        Matches = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     144        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
     145    }
     146   
     147    if (InvertMatchFlag) {
     148        kernel::Kernel * invertK = mGrepDriver->addKernelInstance(make_unique<kernel::InvertMatchesKernel>(idb));
     149        StreamSetBuffer * OriginalMatches = Matches;
     150        Matches = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     151        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
     152    }
     153    if (MaxCountFlag > 0) {
     154        kernel::Kernel * untilK = mGrepDriver->addKernelInstance(make_unique<kernel::UntilNkernel>(idb));
     155        untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
     156        StreamSetBuffer * AllMatches = Matches;
     157        Matches = mGrepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
     158        mGrepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
     159    }
     160    return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
     161}
     162
     163// The QuietMode, MatchOnly and CountOnly engines share a common code generation main function,
     164// which returns a count of the matches found (possibly subject to a MaxCount).
     165//
     166
     167void GrepEngine::grepCodeGen(std::vector<re::RE *> REs) {
     168   
     169    assert (mGrepDriver == nullptr);
     170    mGrepDriver = new ParabixDriver("engine");
     171    auto & idb = mGrepDriver->getBuilder();
     172    Module * M = idb->getModule();
     173   
     174    const unsigned segmentSize = codegen::SegmentSize;
     175    const unsigned encodingBits = 8;
     176   
     177    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), nullptr));
     178    mainFunc->setCallingConv(CallingConv::C);
     179    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     180    auto args = mainFunc->arg_begin();
     181   
     182    Value * const fileDescriptor = &*(args++);
     183    fileDescriptor->setName("fileDescriptor");
     184   
     185    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits)));
     186    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::FDSourceKernel>(idb, segmentSize));
     187    sourceK->setInitialArguments({fileDescriptor});
     188    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
     189   
     190    StreamSetBuffer * LineBreakStream;
     191    StreamSetBuffer * Matches;
     192    std::tie(LineBreakStream, Matches) = grepPipeline(REs, ByteStream);
     193   
     194    kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance(make_unique<kernel::PopcountKernel>(idb));
     195    mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
     196    mGrepDriver->generatePipelineIR();
     197    idb->setKernel(matchCountK);
     198    Value * matchedLineCount = idb->getAccumulator("countResult");
     199    matchedLineCount = idb->CreateZExt(matchedLineCount, idb->getInt64Ty());
     200    mGrepDriver->deallocateBuffers();
     201    idb->CreateRet(matchedLineCount);
     202    mGrepDriver->finalizeObject();
     203}
     204
     205//
     206// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
     207// matched lines.
    106208
    107209class EmitMatch : public MatchAccumulator {
     
    118220};
    119221
     222//
     223//  Default Report Match:  lines are emitted with whatever line terminators are found in the
     224//  input.  However, if the final line is not terminated, a new line is appended.
    120225void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
    121226    if (!(WithFilenameFlag | LineNumberFlag) && (line_start == mPrevious_line_end + 1)) {
     
    165270}
    166271
    167 
    168 
    169 bool matchesNeedToBeMovedToEOL() {
    170     if ((Mode == QuietMode) | (Mode == FilesWithMatch) | (Mode == FilesWithoutMatch)) {
    171         return false;
    172     }
    173     else if (LineRegexpFlag) {
    174         return false;
    175     }
    176     // TODO: return false for other cases based on regexp analysis, e.g., regexp ends with $.
    177     return true;
     272void EmitMatchesEngine::grepCodeGen(std::vector<re::RE *> REs) {
     273    assert (mGrepDriver == nullptr);
     274    mGrepDriver = new ParabixDriver("engine");
     275    auto & idb = mGrepDriver->getBuilder();
     276    Module * M = idb->getModule();
     277   
     278    const unsigned segmentSize = codegen::SegmentSize;
     279    const unsigned encodingBits = 8;
     280   
     281    Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
     282    mainFunc->setCallingConv(CallingConv::C);
     283    idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     284    auto args = mainFunc->arg_begin();
     285   
     286    Value * const fileDescriptor = &*(args++);
     287    fileDescriptor->setName("fileDescriptor");
     288    Value * match_accumulator = &*(args++);
     289    match_accumulator->setName("match_accumulator");
     290   
     291    StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits)));
     292    kernel::Kernel * sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::FDSourceKernel>(idb, segmentSize));
     293    sourceK->setInitialArguments({fileDescriptor});
     294    mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
     295   
     296    StreamSetBuffer * LineBreakStream;
     297    StreamSetBuffer * Matches;
     298    std::tie(LineBreakStream, Matches) = grepPipeline(REs, ByteStream);
     299   
     300    kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb));
     301    scanMatchK->setInitialArguments({match_accumulator});
     302    mGrepDriver->makeKernelCall(scanMatchK, {Matches, LineBreakStream, ByteStream}, {});
     303    mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
     304    mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
     305   
     306    mGrepDriver->generatePipelineIR();
     307    mGrepDriver->deallocateBuffers();
     308    idb->CreateRet(idb->getInt64(0));
     309    mGrepDriver->finalizeObject();
     310}
     311
     312
     313//
     314//  The doGrep methods apply a GrepEngine to a single file, processing the results
     315//  differently based on the engine type.
     316   
     317uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
     318    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor);
     319    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
     320   
     321    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
     322    if (fileDescriptor == -1) return 0;
     323   
     324    uint64_t grepResult = f(fileDescriptor);
     325    close(fileDescriptor);
     326    return grepResult;
     327}
     328
     329uint64_t CountOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
     330    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
     331   
     332    if (WithFilenameFlag) mResultStrs[fileIdx] << linePrefix(fileName);
     333    mResultStrs[fileIdx] << grepResult << "\n";
     334    return grepResult;
     335}
     336
     337std::string GrepEngine::linePrefix(std::string fileName) {
     338    if (fileName == "-") {
     339        return LabelFlag + mFileSuffix;
     340    }
     341    else {
     342        return fileName + mFileSuffix;
     343    }
     344}
     345   
     346uint64_t MatchOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
     347    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
     348    if (grepResult == mRequiredCount) {
     349        mResultStrs[fileIdx] << linePrefix(fileName);
     350    }
     351    return grepResult;
     352}
     353
     354uint64_t EmitMatchesEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
     355    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor, intptr_t accum_addr);
     356    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
     357   
     358    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
     359    if (fileDescriptor == -1) return 0;
     360    EmitMatch accum(linePrefix(fileName), &mResultStrs[fileIdx]);
     361    uint64_t grepResult = f(fileDescriptor, reinterpret_cast<intptr_t>(&accum));
     362    close(fileDescriptor);
     363    if (accum.mLineCount > 0) grepMatchFound = true;
     364    return accum.mLineCount;
    178365}
    179366
     
    205392            }
    206393            close(fileDescriptor);
    207             return -1; 
     394            return -1;
    208395        }
    209396        return fileDescriptor;
     
    211398}
    212399
    213 std::string GrepEngine::linePrefix(std::string fileName) {
    214     if (fileName == "-") {
    215         return LabelFlag + mFileSuffix;
    216     }
    217     else {
    218         return fileName + mFileSuffix;
    219     }
    220 }
    221 
    222 uint64_t EmitMatchesEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
    223     int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
    224    
    225     if (fileDescriptor == -1) return 0;
    226    
    227     EmitMatch accum(linePrefix(fileName), &mResultStrs[fileIdx]);
    228    
    229     typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor, intptr_t accum_addr);
    230     auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
    231    
    232     uint64_t grepResult = f(fileDescriptor, reinterpret_cast<intptr_t>(&accum));
    233     close(fileDescriptor);
    234     if (accum.mLineCount > 0) grepMatchFound = true;
    235     return grepResult;
    236 }
    237 
    238 uint64_t CountOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
    239     int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
    240     if (fileDescriptor == -1) return 0;
    241    
    242     typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor);
    243     auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
    244    
    245     uint64_t grepResult = f(fileDescriptor);
    246     close(fileDescriptor);
    247    
    248     if (WithFilenameFlag) mResultStrs[fileIdx] << linePrefix(fileName);
    249     mResultStrs[fileIdx] << grepResult << "\n";
    250     return grepResult;
    251 }
    252 
    253 uint64_t MatchOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
    254     int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
    255     if (fileDescriptor == -1) return 0;
    256    
    257     typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor);
    258     auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
    259    
    260     uint64_t grepResult = f(fileDescriptor);
    261     close(fileDescriptor);
    262    
    263     if (QuietMode) {
    264         if (grepResult > 0) exit(MatchFoundExitCode);
    265     }
    266     else {
    267         if (grepResult == mRequiredCount) {
    268             mResultStrs[fileIdx] << linePrefix(fileName);
    269         }
    270     }
    271     return grepResult;
    272 }
    273 
    274 void GrepEngine::initFileResult(std::vector<std::string> & filenames){
    275     const int n = filenames.size();
    276     mResultStrs.resize(n);
    277     inputFiles = filenames;
    278 }
    279 
    280 std::pair<StreamSetBuffer *, StreamSetBuffer *> grepPipeline(Driver * grepDriver, std::vector<re::RE *> & REs, StreamSetBuffer * ByteStream) {
    281     auto & idb = grepDriver->getBuilder();
    282     const unsigned segmentSize = codegen::SegmentSize;
    283     const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    284     const unsigned encodingBits = 8;
    285 
    286     StreamSetBuffer * BasisBits = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(encodingBits, 1), segmentSize * bufferSegments));
    287     kernel::Kernel * s2pk = grepDriver->addKernelInstance(make_unique<kernel::S2PKernel>(idb));
    288     grepDriver->makeKernelCall(s2pk, {ByteStream}, {BasisBits});
    289    
    290     StreamSetBuffer * LineBreakStream = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    291     kernel::Kernel * linebreakK = grepDriver->addKernelInstance(make_unique<kernel::LineBreakKernelBuilder>(idb, encodingBits));
    292     grepDriver->makeKernelCall(linebreakK, {BasisBits}, {LineBreakStream});
    293    
    294     kernel::Kernel * requiredStreamsK = grepDriver->addKernelInstance(make_unique<kernel::RequiredStreams_UTF8>(idb));
    295     StreamSetBuffer * RequiredStreams = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(4, 1), segmentSize * bufferSegments));
    296     grepDriver->makeKernelCall(requiredStreamsK, {BasisBits}, {RequiredStreams});
    297    
    298     const auto n = REs.size();
    299    
    300     std::vector<std::vector<UCD::UnicodeSet>> charclasses(n);
    301 
    302     for (unsigned i = 0; i < n; i++) {
    303         REs[i] = re::resolveNames(REs[i]);
    304         std::vector<UCD::UnicodeSet> UnicodeSets = re::collect_UnicodeSets(REs[i]);
    305         std::vector<std::vector<unsigned>> exclusiveSetIDs;
    306         doMultiplexCCs(UnicodeSets, exclusiveSetIDs, charclasses[i]);
    307         REs[i] = multiplex(REs[i], UnicodeSets, exclusiveSetIDs);
    308     }
    309 
    310     std::vector<StreamSetBuffer *> MatchResultsBufs(n);
    311 
    312     for(unsigned i = 0; i < n; ++i){
    313         const auto numOfCharacterClasses = charclasses[i].size();
    314         StreamSetBuffer * CharClasses = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), segmentSize * bufferSegments));
    315         kernel::Kernel * ccK = grepDriver->addKernelInstance(make_unique<kernel::CharClassesKernel>(idb, std::move(charclasses[i])));
    316         grepDriver->makeKernelCall(ccK, {BasisBits}, {CharClasses});
    317         StreamSetBuffer * MatchResults = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    318         kernel::Kernel * icgrepK = grepDriver->addKernelInstance(make_unique<kernel::ICGrepKernel>(idb, REs[i], numOfCharacterClasses));
    319         grepDriver->makeKernelCall(icgrepK, {CharClasses, LineBreakStream, RequiredStreams}, {MatchResults});
    320         MatchResultsBufs[i] = MatchResults;
    321     }
    322     StreamSetBuffer * MergedResults = MatchResultsBufs[0];
    323     if (REs.size() > 1) {
    324         MergedResults = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    325         kernel::Kernel * streamsMergeK = grepDriver->addKernelInstance(make_unique<kernel::StreamsMerge>(idb, 1, REs.size()));
    326         grepDriver->makeKernelCall(streamsMergeK, MatchResultsBufs, {MergedResults});
    327     }
    328     StreamSetBuffer * Matches = MergedResults;
    329    
    330     if (matchesNeedToBeMovedToEOL()) {
    331         StreamSetBuffer * OriginalMatches = Matches;
    332         kernel::Kernel * matchedLinesK = grepDriver->addKernelInstance(make_unique<kernel::MatchedLinesKernel>(idb));
    333         Matches = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    334         grepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    335     }
    336    
    337     if (InvertMatchFlag) {
    338         kernel::Kernel * invertK = grepDriver->addKernelInstance(make_unique<kernel::InvertMatchesKernel>(idb));
    339         StreamSetBuffer * OriginalMatches = Matches;
    340         Matches = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    341         grepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
    342     }
    343     if (MaxCountFlag > 0) {
    344         kernel::Kernel * untilK = grepDriver->addKernelInstance(make_unique<kernel::UntilNkernel>(idb));
    345         untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
    346         StreamSetBuffer * AllMatches = Matches;
    347         Matches = grepDriver->addBuffer(make_unique<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), segmentSize * bufferSegments));
    348         grepDriver->makeKernelCall(untilK, {AllMatches}, {Matches});
    349     }
    350     return std::pair<StreamSetBuffer *, StreamSetBuffer *>(LineBreakStream, Matches);
    351 }
    352 
    353 void EmitMatchesEngine::grepCodeGen(std::vector<re::RE *> REs) {
    354     assert (mGrepDriver == nullptr);
    355     mGrepDriver = new ParabixDriver("engine");
    356     auto & idb = mGrepDriver->getBuilder();
    357     Module * M = idb->getModule();
    358    
    359     const unsigned segmentSize = codegen::SegmentSize;
    360     const unsigned encodingBits = 8;
    361    
    362     Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
    363     mainFunc->setCallingConv(CallingConv::C);
    364     idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    365     auto args = mainFunc->arg_begin();
    366    
    367     Value * const fileDescriptor = &*(args++);
    368     fileDescriptor->setName("fileDescriptor");
    369     Value * match_accumulator = &*(args++);
    370     match_accumulator->setName("match_accumulator");
    371    
    372     StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits)));
    373     kernel::Kernel * sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::FDSourceKernel>(idb, segmentSize));
    374     sourceK->setInitialArguments({fileDescriptor});
    375     mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
    376    
    377     StreamSetBuffer * LineBreakStream;
    378     StreamSetBuffer * Matches;
    379     std::tie(LineBreakStream, Matches) = grepPipeline(mGrepDriver, REs, ByteStream);
    380    
    381     kernel::Kernel * scanMatchK = mGrepDriver->addKernelInstance(make_unique<kernel::ScanMatchKernel>(idb));
    382     scanMatchK->setInitialArguments({match_accumulator});
    383     mGrepDriver->makeKernelCall(scanMatchK, {Matches, LineBreakStream, ByteStream}, {});
    384     mGrepDriver->LinkFunction(*scanMatchK, "accumulate_match_wrapper", &accumulate_match_wrapper);
    385     mGrepDriver->LinkFunction(*scanMatchK, "finalize_match_wrapper", &finalize_match_wrapper);
    386    
    387     mGrepDriver->generatePipelineIR();
    388     mGrepDriver->deallocateBuffers();
    389     idb->CreateRet(idb->getInt64(0));
    390     mGrepDriver->finalizeObject();
    391 }
    392 
    393 void GrepEngine::grepCodeGen(std::vector<re::RE *> REs) {
    394    
    395     assert (mGrepDriver == nullptr);
    396     mGrepDriver = new ParabixDriver("engine");
    397     auto & idb = mGrepDriver->getBuilder();
    398     Module * M = idb->getModule();
    399    
    400     const unsigned segmentSize = codegen::SegmentSize;
    401     const unsigned encodingBits = 8;
    402    
    403     Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), nullptr));
    404     mainFunc->setCallingConv(CallingConv::C);
    405     idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
    406     auto args = mainFunc->arg_begin();
    407    
    408     Value * const fileDescriptor = &*(args++);
    409     fileDescriptor->setName("fileDescriptor");
    410    
    411     StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits)));
    412     kernel::Kernel * sourceK = mGrepDriver->addKernelInstance(make_unique<kernel::FDSourceKernel>(idb, segmentSize));
    413     sourceK->setInitialArguments({fileDescriptor});
    414     mGrepDriver->makeKernelCall(sourceK, {}, {ByteStream});
    415    
    416     StreamSetBuffer * LineBreakStream;
    417     StreamSetBuffer * Matches;
    418     std::tie(LineBreakStream, Matches) = grepPipeline(mGrepDriver, REs, ByteStream);
    419    
    420     kernel::Kernel * matchCountK = mGrepDriver->addKernelInstance(make_unique<kernel::PopcountKernel>(idb));
    421     mGrepDriver->makeKernelCall(matchCountK, {Matches}, {});
    422     mGrepDriver->generatePipelineIR();
    423     idb->setKernel(matchCountK);
    424     Value * matchedLineCount = idb->getAccumulator("countResult");
    425     matchedLineCount = idb->CreateZExt(matchedLineCount, idb->getInt64Ty());
    426     mGrepDriver->deallocateBuffers();
    427     idb->CreateRet(matchedLineCount);
    428     mGrepDriver->finalizeObject();
    429 }
    430 
    431 void GrepEngine::writeMatches(){
    432     for (unsigned i = 0; i < inputFiles.size(); ++i){
     400// The process of searching a group of files may use a sequential or a task
     401// parallel approach.
     402
     403bool GrepEngine::searchAllFiles() {
     404    if (Threads <= 1) {
     405        for (unsigned i = 0; i != inputFiles.size(); ++i) {
     406            size_t grepResult = doGrep(inputFiles[i], i);
     407            if (grepResult > 0) {
     408                grepMatchFound = true;
     409                if (QuietMode) break;
     410            }
     411        }
     412    } else if (Threads > 1) {
     413        const unsigned numOfThreads = Threads; // <- convert the command line value into an integer to allow stack allocation
     414        pthread_t threads[numOfThreads];
     415       
     416        for(unsigned long i = 0; i < numOfThreads; ++i) {
     417            const int rc = pthread_create(&threads[i], nullptr, DoGrepThreadFunction, (void *)this);
     418            if (rc) {
     419                llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
     420            }
     421        }
     422        for(unsigned i = 0; i < numOfThreads; ++i) {
     423            void * status = nullptr;
     424            const int rc = pthread_join(threads[i], &status);
     425            if (rc) {
     426                llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
     427            }
     428        }
     429    }
     430    return grepMatchFound;
     431}
     432
     433
     434// DoGrep thread function.
     435void * GrepEngine::DoGrepThreadFunction(void *args) {
     436    size_t fileIdx;
     437    grep::GrepEngine * grepEngine = (grep::GrepEngine *)args;
     438
     439    grepEngine->count_mutex.lock();
     440    fileIdx = grepEngine->fileCount;
     441    grepEngine->fileCount++;
     442    grepEngine->count_mutex.unlock();
     443
     444    while (fileIdx < grepEngine->inputFiles.size()) {
     445        size_t grepResult = grepEngine->doGrep(grepEngine->inputFiles[fileIdx], fileIdx);
     446       
     447        grepEngine->count_mutex.lock();
     448        if (grepResult > 0) grepEngine->grepMatchFound = true;
     449        fileIdx = grepEngine->fileCount;
     450        grepEngine->fileCount++;
     451        grepEngine->count_mutex.unlock();
     452        if (QuietMode && grepEngine->grepMatchFound) pthread_exit(nullptr);
     453    }
     454    pthread_exit(nullptr);
     455}
     456   
     457void GrepEngine::writeMatches() {
     458    for (unsigned i = 0; i < inputFiles.size(); ++i) {
    433459        std::cout << mResultStrs[i].str();
    434460    }
    435461}
    436462
    437 GrepEngine::GrepEngine() :
    438     mGrepDriver(nullptr),
    439     grepMatchFound(false),
    440     fileCount(0) {}
    441    
    442 GrepEngine::~GrepEngine() {
    443     delete mGrepDriver;
    444 }
    445 
    446 EmitMatchesEngine::EmitMatchesEngine() : GrepEngine()
    447     {mFileSuffix = InitialTabFlag ? "\t:" : ":";}
    448    
    449 CountOnlyEngine::CountOnlyEngine() :
    450     GrepEngine() {mFileSuffix = ":";}
    451 
    452 MatchOnlyEngine::MatchOnlyEngine(bool showFilesWithoutMatch) :
    453     GrepEngine(), mRequiredCount(showFilesWithoutMatch)
    454     {mFileSuffix = NullFlag ? std::string("\0", 1) : "\n";}
    455 }
     463}
     464
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5703 r5704  
    77#define GREP_ENGINE_H
    88#include <grep_interface.h>
     9#include <kernels/streamset.h>
    910#include <toolchain/grep_pipeline.h>
    1011#include <string>       // for string
     
    3334   
    3435protected:
     36    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(std::vector<re::RE *> & REs, parabix::StreamSetBuffer * ByteStream);
     37
    3538    static void * DoGrepThreadFunction(void *args);
    36     virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) = 0;
     39    virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx);
    3740    std::string linePrefix(std::string fileName);
    3841    int32_t openFile(const std::string & fileName, std::stringstream & msgstrm);
     
    4851    std::mutex count_mutex;
    4952    size_t fileCount;
     53    bool mMoveMatchesToEOL;
    5054};
    5155
     
    7074private:
    7175    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
    72     unsigned mRequiredCount;       
     76    unsigned mRequiredCount;
    7377};
    74    
     78
     79class QuietModeEngine : public GrepEngine {
     80public:
     81    QuietModeEngine();
     82};
     83
    7584}
    7685
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5703 r5704  
    191191        case grep::FilesWithMatch:
    192192        case grep::FilesWithoutMatch:
     193            grepEngine = new grep::MatchOnlyEngine(grep::Mode == grep::FilesWithoutMatch); break;
    193194        case grep::QuietMode:
    194             grepEngine = new grep::MatchOnlyEngine(grep::Mode == grep::FilesWithoutMatch); break;
     195            grepEngine = new grep::QuietModeEngine(); break;
    195196    }
    196197               
Note: See TracChangeset for help on using the changeset viewer.