Changeset 5700


Ignore:
Timestamp:
Oct 20, 2017, 12:40:18 PM (18 months ago)
Author:
cameron
Message:

Refactoring of grep_engine based on mode

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5698 r5700  
    2828#include <re/re_collect_unicodesets.h>
    2929#include <re/re_multiplex.h>
     30#include <toolchain/toolchain.h>
    3031#include <toolchain/cpudriver.h>
    31 #include <toolchain/NVPTXDriver.h>
    3232#include <iostream>
    3333#include <cc/multiplex_CCs.h>
     
    3838#include <errno.h>
    3939#include <llvm/ADT/STLExtras.h> // for make_unique
     40#include <llvm/Support/CommandLine.h>
    4041
    4142using namespace parabix;
    4243using namespace llvm;
     44static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(1));
     45
    4346
    4447namespace grep {
     
    4649
    4750// DoGrep thread function.
    48 void *DoGrepThreadFunction(void *args)
    49 {
     51void *GrepEngine::DoGrepThreadFunction(void *args) {
    5052    size_t fileIdx;
    5153    grep::GrepEngine * grepEngine = (grep::GrepEngine *)args;
     
    6668        if (QuietMode && grepEngine->grepMatchFound) pthread_exit(nullptr);
    6769    }
    68 
    6970    pthread_exit(nullptr);
    7071}
    7172   
    72     //
    73     //  Default Report Match:  lines are emitted with whatever line terminators are found in the
    74     //  input.  However, if the final line is not terminated, a new line is appended.
    75    
    76     void NonNormalizingReportMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
    77         if (!(WithFilenameFlag | LineNumberFlag) && (line_start == mPrevious_line_end + 1)) {
    78             // Consecutive matches: only one write call needed.
    79             mResultStr.write(mPrevious_line_end, line_end - mPrevious_line_end);
    80         }
    81         else {
    82             if (mLineCount > 0) {
    83                 // deal with the final byte of the previous line.
    84                 mResultStr.write(mPrevious_line_end, 1);
    85             }
    86             if (WithFilenameFlag) {
    87                 mResultStr << mLinePrefix;
    88             }
    89             if (LineNumberFlag) {
    90                 // Internally line numbers are counted from 0.  For display, adjust
    91                 // the line number so that lines are numbered from 1.
    92                 if (InitialTabFlag) {
    93                     mResultStr << lineNum+1 << "\t:";
    94                 }
    95                 else {
    96                     mResultStr << lineNum+1 << ":";
    97                 }
    98             }
    99             mResultStr.write(line_start, line_end - line_start);
    100         }
    101         mPrevious_line_end = line_end;
    102         mLineCount++;
    103     }
    104    
    105     void NonNormalizingReportMatch::finalize_match(char * buffer_end) {
    106         if (mLineCount == 0) return;  // No matches.
    107         if (mPrevious_line_end < buffer_end) {
    108             mResultStr.write(mPrevious_line_end, 1);
    109         }
    110         else {
    111             // Likely unterminated final line.
    112             char last_byte = mPrevious_line_end[-1];
    113             if (last_byte == 0x0D) {
    114                 // The final CR is acceptable as a line_end.
    115                 return;
    116             }
    117             // Terminate the line with an LF
    118             // (Even if we had an incomplete UTF-8 sequence.)
    119             mResultStr << "\n";
    120         }
    121     }
    122    
    123    
     73void GrepEngine::run() {
     74   
     75    if (Threads <= 1) {
     76        for (unsigned i = 0; i != inputFiles.size(); ++i) {
     77            size_t grepResult = doGrep(inputFiles[i], i);
     78            if (grepResult > 0) grepMatchFound = true;
     79        }
     80    } else if (Threads > 1) {
     81        const unsigned numOfThreads = Threads; // <- convert the command line value into an integer to allow stack allocation
     82        pthread_t threads[numOfThreads];
     83       
     84        for(unsigned long i = 0; i < numOfThreads; ++i){
     85            const int rc = pthread_create(&threads[i], nullptr, DoGrepThreadFunction, (void *)this);
     86            if (rc) {
     87                llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
     88            }
     89        }
     90        for(unsigned i = 0; i < numOfThreads; ++i) {
     91            void * status = nullptr;
     92            const int rc = pthread_join(threads[i], &status);
     93            if (rc) {
     94                llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
     95            }
     96        }
     97    }
     98}
     99       
     100//
     101//  Default Report Match:  lines are emitted with whatever line terminators are found in the
     102//  input.  However, if the final line is not terminated, a new line is appended.
     103
     104
     105class EmitMatch : public MatchAccumulator {
     106public:
     107    EmitMatch(std::string linePrefix, std::stringstream * strm) : mLinePrefix(linePrefix), mLineCount(0), mPrevious_line_end(nullptr), mResultStr(strm) {}
     108    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
     109    void finalize_match(char * buffer_end) override;
     110    std::string mLinePrefix;
     111    size_t mLineCount;
     112    char * mPrevious_line_end;
     113    std::stringstream* mResultStr;
     114   
     115};
     116
     117
     118void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
     119    if (!(WithFilenameFlag | LineNumberFlag) && (line_start == mPrevious_line_end + 1)) {
     120        // Consecutive matches: only one write call needed.
     121        mResultStr->write(mPrevious_line_end, line_end - mPrevious_line_end);
     122    }
     123    else {
     124        if (mLineCount > 0) {
     125            // deal with the final byte of the previous line.
     126            mResultStr->write(mPrevious_line_end, 1);
     127        }
     128        if (WithFilenameFlag) {
     129            *mResultStr << mLinePrefix;
     130        }
     131        if (LineNumberFlag) {
     132            // Internally line numbers are counted from 0.  For display, adjust
     133            // the line number so that lines are numbered from 1.
     134            if (InitialTabFlag) {
     135                *mResultStr << lineNum+1 << "\t:";
     136            }
     137            else {
     138                *mResultStr << lineNum+1 << ":";
     139            }
     140        }
     141        mResultStr->write(line_start, line_end - line_start);
     142    }
     143    mPrevious_line_end = line_end;
     144    mLineCount++;
     145}
     146
     147void EmitMatch::finalize_match(char * buffer_end) {
     148    if (mLineCount == 0) return;  // No matches.
     149    if (mPrevious_line_end < buffer_end) {
     150        mResultStr->write(mPrevious_line_end, 1);
     151    }
     152    else {
     153        // Likely unterminated final line.
     154        char last_byte = mPrevious_line_end[-1];
     155        if (last_byte == 0x0D) {
     156            // The final CR is acceptable as a line_end.
     157            return;
     158        }
     159        // Terminate the line with an LF
     160        // (Even if we had an incomplete UTF-8 sequence.)
     161        *mResultStr << "\n";
     162    }
     163}
     164
     165
    124166
    125167bool matchesNeedToBeMovedToEOL() {
     
    133175    return true;
    134176}
    135    
     177
     178   
     179int32_t openFile(const std::string & fileName, std::stringstream & msgstrm) {
     180    if (fileName == "-") {
     181        return STDIN_FILENO;
     182    }
     183    else {
     184        struct stat sb;
     185        int32_t fileDescriptor = open(fileName.c_str(), O_RDONLY);
     186        if (LLVM_UNLIKELY(fileDescriptor == -1)) {
     187            if (!NoMessagesFlag) {
     188                if (errno == EACCES) {
     189                    msgstrm << "icgrep: " << fileName << ": Permission denied.\n";
     190                }
     191                else if (errno == ENOENT) {
     192                    msgstrm << "icgrep: " << fileName << ": No such file.\n";
     193                }
     194                else {
     195                    msgstrm << "icgrep: " << fileName << ": Failed.\n";
     196                }
     197            }
     198            return fileDescriptor;
     199        }
     200        if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
     201            if (!NoMessagesFlag) {
     202                msgstrm << "icgrep: " << fileName << ": Is a directory.\n";
     203            }
     204            close(fileDescriptor);
     205            return -1; 
     206        }
     207        return fileDescriptor;
     208    }
     209}
     210
     211std::string GrepEngine::linePrefix(std::string fileName) {
     212    if (fileName == "-") {
     213        return LabelFlag + mFileSuffix;
     214    }
     215    else {
     216        return fileName + mFileSuffix;
     217    }
     218}
     219
    136220uint64_t GrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
    137     if (fileName == "-") {
    138         return doGrep(STDIN_FILENO, fileIdx);
    139     }
    140     struct stat sb;
    141     const int32_t fd = open(fileName.c_str(), O_RDONLY);
    142     if (LLVM_UNLIKELY(fd == -1)) {
    143         if (!NoMessagesFlag) {
    144             if (errno == EACCES) {
    145                 resultAccums[fileIdx]->mResultStr << "icgrep: " << fileName << ": Permission denied.\n";
    146             }
    147             else if (errno == ENOENT) {
    148                 resultAccums[fileIdx]->mResultStr << "icgrep: " << fileName << ": No such file.\n";
    149             }
    150             else {
    151                 resultAccums[fileIdx]->mResultStr << "icgrep: " << fileName << ": Failed.\n";
    152             }
    153         }
    154         return 0;
    155     }
    156     if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
    157         if (!NoMessagesFlag) {
    158             resultAccums[fileIdx]->mResultStr << "icgrep: " << fileName << ": Is a directory.\n";
    159         }
    160         close(fd);
    161         return 0;
    162     }
    163     const auto result = doGrep(fd, fileIdx);
    164     close(fd);
    165     return result;
    166 }
    167 
    168 uint64_t GrepEngine::doGrep(const int32_t fileDescriptor, const uint32_t fileIdx) {
     221    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
     222   
     223    if (fileDescriptor == -1) return 0;
     224   
     225    EmitMatch accum(linePrefix(fileName), &mResultStrs[fileIdx]);
     226   
    169227    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor, intptr_t accum_addr);
    170228    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
    171229   
    172     uint64_t grepResult = f(fileDescriptor, reinterpret_cast<intptr_t>(resultAccums[fileIdx].get()));
    173     if (grepResult > 0) grepMatchFound = true;
    174     else if ((Mode == NormalMode) && !resultAccums[fileIdx]->mResultStr.str().empty()) grepMatchFound = true;
    175    
    176     if (Mode == CountOnly) {
    177         resultAccums[fileIdx]->mResultStr << resultAccums[fileIdx]->mLinePrefix << grepResult << "\n";
    178     }
    179     else if (Mode == FilesWithMatch || Mode == FilesWithoutMatch ) {
    180         size_t requiredCount = Mode == FilesWithMatch ? 1 : 0;
    181         if (grepResult == requiredCount) {
    182             resultAccums[fileIdx]->mResultStr << resultAccums[fileIdx]->mLinePrefix;
    183         }
    184     }
    185     else if (Mode == QuietMode) {
    186         if (grepMatchFound) exit(MatchFoundExitCode);
     230    uint64_t grepResult = f(fileDescriptor, reinterpret_cast<intptr_t>(&accum));
     231    close(fileDescriptor);
     232    if (accum.mLineCount > 0) grepMatchFound = true;
     233    return grepResult;
     234}
     235
     236uint64_t CountOnlyGrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
     237    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
     238   
     239    if (fileDescriptor == -1) return 0;
     240   
     241    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor);
     242    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
     243   
     244    uint64_t grepResult = f(fileDescriptor);
     245    close(fileDescriptor);
     246   
     247    if (WithFilenameFlag) mResultStrs[fileIdx] << linePrefix(fileName);
     248    mResultStrs[fileIdx] << grepResult << "\n";
     249    return grepResult;
     250}
     251
     252uint64_t MatchOnlyGrepEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
     253    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
     254   
     255    if (fileDescriptor == -1) return 0;
     256   
     257    typedef uint64_t (*GrepFunctionType)(int32_t fileDescriptor);
     258    auto f = reinterpret_cast<GrepFunctionType>(mGrepDriver->getMain());
     259   
     260    uint64_t grepResult = f(fileDescriptor);
     261    close(fileDescriptor);
     262   
     263    if (QuietMode) {
     264        if (grepResult > 0) exit(MatchFoundExitCode);
     265    }
     266    else {
     267        if (grepResult == mRequiredCount) {
     268            mResultStrs[fileIdx] << linePrefix(fileName);
     269        }
    187270    }
    188271    return grepResult;
     
    192275    grepMatchFound = false;
    193276    const int n = filenames.size();
    194     if ((n > 1) && !NoFilenameFlag) {
    195         WithFilenameFlag = true;
    196     }
    197     std::string fileSuffix = "";
    198     bool setLinePrefix = WithFilenameFlag || (Mode == FilesWithMatch) || (Mode == FilesWithoutMatch);
    199     if (setLinePrefix) {
    200         if (NullFlag) {
    201             fileSuffix = std::string("\0", 1);
    202         }
    203         else if ((Mode == NormalMode) && InitialTabFlag && !(LineNumberFlag || ByteOffsetFlag)) {
    204             fileSuffix = "\t:";
    205         }
    206         else if ((Mode == NormalMode) || (Mode == CountOnly)) {
    207             fileSuffix = ":";
    208         }
    209         else if ((Mode == FilesWithMatch) || (Mode == FilesWithoutMatch)) {
    210             fileSuffix = "\n";
    211         }
    212     }
     277    mResultStrs.resize(n);
    213278    inputFiles = filenames;
    214     for (unsigned i = 0; i < inputFiles.size(); ++i) {
    215         std::string linePrefix;
    216         if (setLinePrefix) {
    217             if (inputFiles[i] == "-") {
    218                 linePrefix = LabelFlag + fileSuffix;
    219             }
    220             else {
    221                 linePrefix = inputFiles[i] + fileSuffix;
    222             }
    223         }
    224         resultAccums.push_back(make_unique<NonNormalizingReportMatch>(linePrefix));
    225     }
    226 }
    227 
    228 
     279}
     280
     281   
    229282void GrepEngine::PrintResults(){
    230283   
    231284    for (unsigned i = 0; i < inputFiles.size(); ++i){
    232         std::cout << resultAccums[i]->mResultStr.str();
     285        std::cout << mResultStrs[i].str();
    233286    }
    234287    exit(grepMatchFound ? MatchFoundExitCode : MatchNotFoundExitCode);
    235288}
    236289
     290   
     291   
    237292   
    238293std::pair<StreamSetBuffer *, StreamSetBuffer *> grepPipeline(Driver * grepDriver, std::vector<re::RE *> & REs, StreamSetBuffer * ByteStream) {
     
    362417        const unsigned encodingBits = 8;
    363418       
    364         Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), idb->getIntAddrTy(), nullptr));
     419        Function * mainFunc = cast<Function>(M->getOrInsertFunction("Main", idb->getInt64Ty(), idb->getInt32Ty(), nullptr));
    365420        mainFunc->setCallingConv(CallingConv::C);
    366421        idb->SetInsertPoint(BasicBlock::Create(M->getContext(), "entry", mainFunc, 0));
     
    369424        Value * const fileDescriptor = &*(args++);
    370425        fileDescriptor->setName("fileDescriptor");
    371         Value * match_accumulator = &*(args++);
    372         match_accumulator->setName("match_accumulator");
    373426       
    374427        StreamSetBuffer * ByteStream = mGrepDriver->addBuffer(make_unique<SourceBuffer>(idb, idb->getStreamSetTy(1, encodingBits)));
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5698 r5700  
    2121namespace grep {
    2222
    23 // Thread function only.
    24 void *DoGrepThreadFunction(void *args);
    25    
    26 
    27 class NonNormalizingReportMatch : public MatchAccumulator {
    28 public:
    29     NonNormalizingReportMatch(std::string linePrefix) : mLinePrefix(linePrefix), mLineCount(0), mPrevious_line_end(nullptr) {}
    30     void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
    31     void finalize_match(char * buffer_end) override;
    32     std::string mLinePrefix;
    33     size_t mLineCount;
    34     char * mPrevious_line_end;
    35     std::stringstream mResultStr;
    36    
    37 };
    38 
    39 
    4023class GrepEngine {
    4124public:
    4225
    43     GrepEngine() : mGrepDriver(nullptr), grepMatchFound(false), fileCount(0) {}
    44 
     26    GrepEngine() : mGrepDriver(nullptr), mFileSuffix(InitialTabFlag ? "\t:" : ":"), grepMatchFound(false), fileCount(0) {}
    4527
    4628    virtual ~GrepEngine();
     
    5032    virtual void grepCodeGen(std::vector<re::RE *> REs);
    5133
    52     uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx);
    53 
    54     uint64_t doGrep(const int32_t fileDescriptor, const uint32_t fileIdx);
     34    void run();
    5535   
    5636    void PrintResults();
    5737   
    5838   
     39   
     40protected:
     41    static void * DoGrepThreadFunction(void *args);
     42    virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx);
     43    std::string linePrefix(std::string fileName);
     44   
     45    Driver * mGrepDriver;
    5946
    60     Driver * mGrepDriver;
     47    std::vector<std::string> inputFiles;
     48    std::vector<std::stringstream> mResultStrs;
     49   
     50    std::string mFileSuffix;
     51   
    6152    bool grepMatchFound;
    62 
    63     std::vector<std::unique_ptr<NonNormalizingReportMatch>> resultAccums;
    64     std::vector<std::string> inputFiles;
    65    
    6653    std::mutex count_mutex;
    6754    size_t fileCount;
    68    
    6955};
    7056
     
    7258public:
    7359   
    74     CountOnlyGrepEngine() : GrepEngine() {}
     60    CountOnlyGrepEngine() : GrepEngine() {mFileSuffix = ":";}
    7561    void grepCodeGen(std::vector<re::RE *> REs) override;
     62private:
     63    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
    7664   
    7765};
    7866
     67class MatchOnlyGrepEngine : public CountOnlyGrepEngine {
     68public:
     69   
     70    MatchOnlyGrepEngine() : CountOnlyGrepEngine(), mRequiredCount(Mode != FilesWithoutMatch) {mFileSuffix = NullFlag ? std::string("\0", 1) : "\n";}
     71private:
     72    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
     73    unsigned mRequiredCount;       
     74};
     75   
    7976}
    8077
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5698 r5700  
    3737static cl::list<std::string> inputFiles(cl::Positional, cl::desc("<regex> <input file ...>"), cl::OneOrMore);
    3838
    39 static cl::opt<int> Threads("t", cl::desc("Total number of threads."), cl::init(1));
    40 
    4139static cl::opt<bool> ByteMode("enable-byte-mode", cl::desc("Process regular expressions in byte mode"));
    4240
     
    4442static cl::opt<int> REsPerGroup("re-num", cl::desc("Number of regular expressions processed by each kernel."), cl::init(1));
    4543
    46 static std::vector<std::string> allFiles;
    4744static re::ModeFlagSet globalFlags = 0;
    4845
     
    177174    const auto REs = readExpressions();
    178175
    179     allFiles = getFullFileList(inputFiles);
     176    std::vector<std::string> allFiles = getFullFileList(inputFiles);
    180177    if (allFiles.empty()) {
    181178        allFiles = { "-" };
     
    187184    grep::GrepEngine * grepEngine;
    188185   
    189     if (grep::Mode == grep::NormalMode) {
    190         grepEngine = new grep::GrepEngine();
    191     }
    192     else {
    193         grepEngine = new grep::CountOnlyGrepEngine();
    194     }
    195 
     186    switch (grep::Mode) {
     187        case grep::NormalMode:
     188            grepEngine = new grep::GrepEngine(); break;
     189        case grep::CountOnly:
     190            grepEngine = new grep::CountOnlyGrepEngine(); break;
     191        case grep::FilesWithMatch:
     192        case grep::FilesWithoutMatch:
     193        case grep::QuietMode:
     194            grepEngine = new grep::MatchOnlyGrepEngine(); break;
     195    }
    196196               
    197197    grepEngine->grepCodeGen(REs);
    198198
    199199    grepEngine->initFileResult(allFiles);
    200 
    201     if (Threads <= 1) {
    202         for (unsigned i = 0; i != allFiles.size(); ++i) {
    203             grepEngine->doGrep(allFiles[i], i);
    204         }
    205     } else if (Threads > 1) {
    206         const unsigned numOfThreads = Threads; // <- convert the command line value into an integer to allow stack allocation
    207         pthread_t threads[numOfThreads];
    208 
    209         for(unsigned long i = 0; i < numOfThreads; ++i){
    210             const int rc = pthread_create(&threads[i], nullptr, grep::DoGrepThreadFunction, (void *)grepEngine);
    211             if (rc) {
    212                 llvm::report_fatal_error("Failed to create thread: code " + std::to_string(rc));
    213             }
    214         }
    215         for(unsigned i = 0; i < numOfThreads; ++i) {
    216             void * status = nullptr;
    217             const int rc = pthread_join(threads[i], &status);
    218             if (rc) {
    219                 llvm::report_fatal_error("Failed to join thread: code " + std::to_string(rc));
    220             }
    221         }
    222     }
    223 
     200   
     201    grepEngine->run();
    224202   
    225203    grepEngine->PrintResults();
Note: See TracChangeset for help on using the changeset viewer.