Changeset 5945


Ignore:
Timestamp:
Apr 5, 2018, 9:18:57 PM (14 months ago)
Author:
cameron
Message:

Decouple GrepEngine? and re utilities from command line flags

Location:
icGREP/icgrep-devel/icgrep
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.cpp

    r5944 r5945  
    66#include <set>
    77#include "grep_engine.h"
    8 #include "grep_interface.h"
    98#include <llvm/IR/Module.h>
    109#include <boost/filesystem.hpp>
     
    4544#include <cc/multiplex_CCs.h>
    4645#include <llvm/Support/raw_ostream.h>
     46#include <util/file_select.h>
    4747#include <util/aligned_allocator.h>
    48 #include <util/file_select.h>
    4948#include <sys/stat.h>
    5049#include <fcntl.h>
     
    5352#include <llvm/Support/CommandLine.h>
    5453#include <llvm/Support/Debug.h>
     54#include <llvm/Support/Casting.h>
    5555#include <sched.h>
    5656
     
    7979extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end) {
    8080    reinterpret_cast<MatchAccumulator *>(accum_addr)->finalize_match(buffer_end);
     81}
     82   
     83inline static size_t ceil_log2(const size_t v) {
     84    assert ("log2(0) is undefined!" && v != 0);
     85    return (sizeof(size_t) * CHAR_BIT) - __builtin_clzll(v - 1U);
     86}
     87
     88void SearchableBuffer::addSearchCandidate(char * string_ptr, size_t length) {
     89    if (space_used + length >= allocated_capacity) {
     90        size_t new_capacity = size_t{1} << (ceil_log2(space_used + length + 1));
     91        char * new_buffer = (char *) boost::alignment::aligned_alloc(BUFFER_ALIGNMENT, new_capacity);
     92        memcpy(new_buffer, buffer_base, space_used);
     93        memset(&new_buffer[space_used], 0, new_capacity-space_used);
     94        if (buffer_base != initial_buffer) {
     95            free(buffer_base);
     96        }
     97        buffer_base = new_buffer;
     98        allocated_capacity = new_capacity;
     99    }
     100    memcpy((void * ) &buffer_base[space_used], string_ptr, length);
     101    space_used += length;
     102    buffer_base[space_used] = '\0';
     103    space_used++;
     104    entries++;
     105}
     106
     107SearchableBuffer::SearchableBuffer() :
     108allocated_capacity(INITIAL_CAPACITY), buffer_base(initial_buffer) {
     109    memset(buffer_base, 0, INITIAL_CAPACITY);
     110}
     111
     112SearchableBuffer::~SearchableBuffer() {
     113    if (buffer_base != initial_buffer) {
     114        free(buffer_base);
     115    }
    81116}
    82117
     
    154189
    155190GrepEngine::GrepEngine() :
     191    mSuppressFileMessages(false),
     192    mPreferMMap(true),
     193    mShowFileNames(false),
     194    mStdinLabel("(stdin)"),
     195    mShowLineNumbers(false),
     196    mInitialTab(false),
     197    mCaseInsensitive(false),
     198    mInvertMatches(false),
     199    mMaxCount(0),
    156200    mGrepDriver(nullptr),
    157201    mNextFileToGrep(0),
     
    167211
    168212QuietModeEngine::QuietModeEngine() : GrepEngine() {
     213    mEngineKind = EngineKind::QuietMode;
    169214    mMoveMatchesToEOL = false;
    170 }
    171 
    172 MatchOnlyEngine::MatchOnlyEngine(bool showFilesWithoutMatch) :
     215    mMaxCount = 1;
     216}
     217
     218MatchOnlyEngine::MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators) :
    173219    GrepEngine(), mRequiredCount(showFilesWithoutMatch) {
    174     mFileSuffix = NullFlag ? std::string("\0", 1) : "\n";
     220    mEngineKind = EngineKind::MatchOnly;
     221    mFileSuffix = useNullSeparators ? std::string("\0", 1) : "\n";
    175222    mMoveMatchesToEOL = false;
     223    mMaxCount = 1;
    176224}
    177225
    178226CountOnlyEngine::CountOnlyEngine() : GrepEngine() {
     227    mEngineKind = EngineKind::CountOnly;
    179228    mFileSuffix = ":";
    180229}
    181230
    182231EmitMatchesEngine::EmitMatchesEngine() : GrepEngine() {
    183     mFileSuffix = InitialTabFlag ? "\t:" : ":";
    184     if (LineRegexpFlag) mMoveMatchesToEOL = false;
     232    mEngineKind = EngineKind::EmitMatches;
     233    mFileSuffix = mInitialTab ? "\t:" : ":";
    185234}
    186235
     
    219268    for(unsigned i = 0; i < mREs.size(); ++i) {
    220269        if (!hasEndAnchor(mREs[i])) allAnchored = false;
    221         mREs[i] = resolveModesAndExternalSymbols(mREs[i]);
     270        mREs[i] = resolveModesAndExternalSymbols(mREs[i], mCaseInsensitive);
    222271        mREs[i] = re::exclude_CC(mREs[i], mBreakCC);
    223272        mREs[i] = resolveAnchors(mREs[i], anchorRE);
     
    245294    const unsigned bufferSegments = codegen::BufferSegments * codegen::ThreadNum;
    246295    // TODO: until we automate stream buffer sizing, use this calculation to determine how large our matches buffer needs to be.
    247     const unsigned baseBufferSize = segmentSize * (MaxCountFlag > 0 ? (std::max(bufferSegments, calculateMaxCountRate(idb))) : bufferSegments);
     296    const unsigned baseBufferSize = segmentSize * (mMaxCount > 0 ? (std::max(bufferSegments, calculateMaxCountRate(idb))) : bufferSegments);
    248297    const unsigned encodingBits = 8;
    249298   
     
    429478        mGrepDriver->makeKernelCall(matchedLinesK, {OriginalMatches, LineBreakStream}, {Matches});
    430479    }
    431     if (InvertMatchFlag) {
     480    if (mInvertMatches) {
    432481        kernel::Kernel * invertK = mGrepDriver->addKernelInstance<kernel::InvertMatchesKernel>(idb);
    433482        StreamSetBuffer * OriginalMatches = Matches;
     
    435484        mGrepDriver->makeKernelCall(invertK, {OriginalMatches, LineBreakStream}, {Matches});
    436485    }
    437     if (MaxCountFlag > 0) {
     486    if (mMaxCount > 0) {
    438487        kernel::Kernel * untilK = mGrepDriver->addKernelInstance<kernel::UntilNkernel>(idb);
    439         untilK->setInitialArguments({idb->getSize(MaxCountFlag)});
     488        untilK->setInitialArguments({idb->getSize(mMaxCount)});
    440489        StreamSetBuffer * const AllMatches = Matches;
    441490        Matches = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(1, 1), baseBufferSize);
     
    495544//
    496545void EmitMatch::accumulate_match (const size_t lineNum, char * line_start, char * line_end) {
    497     if (WithFilenameFlag) {
    498         mResultStr << mLinePrefix;
    499     }
    500     if (LineNumberFlag) {
     546    mResultStr << mLinePrefix;
     547    if (mShowLineNumbers) {
    501548        // Internally line numbers are counted from 0.  For display, adjust
    502549        // the line number so that lines are numbered from 1.
    503         if (InitialTabFlag) {
     550        if (mInitialTab) {
    504551            mResultStr << lineNum+1 << "\t:";
    505552        }
     
    580627    using namespace boost::filesystem;
    581628    path p(fileName);
    582     bool useMMap = argv::MmapFlag;
     629    bool useMMap = mPreferMMap;
    583630    if (p == "-") useMMap = false;
    584631    if (!is_regular_file(p)) useMMap = false;
     
    596643uint64_t CountOnlyEngine::doGrep(const std::string & fileName, const uint32_t fileIdx) {
    597644    uint64_t grepResult = GrepEngine::doGrep(fileName, fileIdx);
    598     if (WithFilenameFlag) mResultStrs[fileIdx] << linePrefix(fileName);
     645    if (mShowFileNames) mResultStrs[fileIdx] << linePrefix(fileName);
    599646    mResultStrs[fileIdx] << grepResult << "\n";
    600647    return grepResult;
     
    602649
    603650std::string GrepEngine::linePrefix(std::string fileName) {
     651    if (!mShowFileNames) return "";
    604652    if (fileName == "-") {
    605         return LabelFlag + mFileSuffix;
     653        return mStdinLabel + mFileSuffix;
    606654    }
    607655    else {
     
    622670    using namespace boost::filesystem;
    623671    path p(fileName);
    624     bool useMMap = argv::MmapFlag;
     672    bool useMMap = mPreferMMap;
    625673    if (p == "-") useMMap = false;
    626674    if (!is_regular_file(p)) useMMap = false;
     
    628676    int32_t fileDescriptor = openFile(fileName, mResultStrs[fileIdx]);
    629677    if (fileDescriptor == -1) return 0;
    630     EmitMatch accum(linePrefix(fileName), mResultStrs[fileIdx]);
     678    EmitMatch accum(linePrefix(fileName), mShowLineNumbers, mInitialTab, mResultStrs[fileIdx]);
    631679    f(useMMap, fileDescriptor, reinterpret_cast<intptr_t>(&accum));
    632680    close(fileDescriptor);
     
    644692        int32_t fileDescriptor = open(fileName.c_str(), O_RDONLY);
    645693        if (LLVM_UNLIKELY(fileDescriptor == -1)) {
    646             if (!NoMessagesFlag) {
     694            if (!mSuppressFileMessages) {
    647695                if (errno == EACCES) {
    648696                    msgstrm << "icgrep: " << fileName << ": Permission denied.\n";
     
    658706        }
    659707        if (stat(fileName.c_str(), &sb) == 0 && S_ISDIR(sb.st_mode)) {
    660             if (!NoMessagesFlag) {
     708            if (!mSuppressFileMessages) {
    661709                msgstrm << "icgrep: " << fileName << ": Is a directory.\n";
    662710            }
     
    703751    unsigned fileIdx = mNextFileToGrep++;
    704752    while (fileIdx < inputFiles.size()) {
     753        if (codegen::DebugOptionIsSet(codegen::TraceCounts)) {
     754            errs() << "Tracing " << inputFiles[fileIdx] << "\n";
     755        }
    705756        const auto grepResult = doGrep(inputFiles[fileIdx], fileIdx);
    706757        mFileStatus[fileIdx] = FileStatus::GrepComplete;
     
    708759            grepMatchFound = true;
    709760        }
    710         if (QuietMode && grepMatchFound) {
     761        if ((mEngineKind == EngineKind::QuietMode) && grepMatchFound) {
    711762            if (pthread_self() != mEngineThread) {
    712763                pthread_exit(nullptr);
  • icGREP/icgrep-devel/icgrep/grep/grep_engine.h

    r5941 r5945  
    3535extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end);
    3636
     37   
     38#define MAX_SIMD_WIDTH_SUPPORTED 512
     39#define INITIAL_CAPACITY 1024
     40   
     41class SearchableBuffer  {
     42    SearchableBuffer();
     43    void addSearchCandidate(char * string_ptr, size_t length);
     44    ~SearchableBuffer();
     45private:
     46    static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8;
     47    size_t allocated_capacity;
     48    char * buffer_base;
     49    alignas(BUFFER_ALIGNMENT) char initial_buffer[INITIAL_CAPACITY];
     50    size_t space_used;
     51    size_t entries;
     52};
     53
    3754void grepBuffer(re::RE * pattern, const char * buffer, size_t bufferLength, MatchAccumulator * accum);
    3855
     
    4461    virtual ~GrepEngine();
    4562   
     63    void setPreferMMap() {mPreferMMap = true;}
     64   
     65    void showFileNames() {mShowFileNames = true;}
     66    void setStdinLabel(std::string lbl) {mStdinLabel = lbl;}
     67    void showLineNumbers() {mShowLineNumbers = true;}
     68    void setInitialTab() {mInitialTab = true;}
     69
     70    void setMaxCount(int m) {mMaxCount = m;}
     71    void setInvertMatches() {mInvertMatches = true;}
     72    void setCaseInsensitive()  {mCaseInsensitive = true;}
     73
     74    void suppressFileMessages() {mSuppressFileMessages = true;}
     75
    4676    void setRecordBreak(GrepRecordBreakKind b);
    4777    void initFileResult(std::vector<std::string> & filenames);
     
    5585
    5686    virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx);
    57     std::string linePrefix(std::string fileName);
    5887    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
    5988
     89    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
     90    EngineKind mEngineKind;
     91   
     92    std::string linePrefix(std::string fileName);
     93
     94    bool mSuppressFileMessages;
     95    bool mPreferMMap;
     96    bool mShowFileNames;
     97    std::string mStdinLabel;
     98    bool mShowLineNumbers;
     99    bool mInitialTab;
     100    bool mCaseInsensitive;
     101    bool mInvertMatches;
     102    int mMaxCount;
     103   
    60104    Driver * mGrepDriver;
    61105
     
    85129    friend class EmitMatchesEngine;
    86130public:
    87     EmitMatch(std::string linePrefix, std::ostringstream & strm) : mLinePrefix(linePrefix), mLineCount(0), mTerminated(true), mResultStr(strm) {}
     131    EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm) : mLinePrefix(linePrefix),
     132        mShowLineNumbers(showLineNumbers),
     133        mInitialTab(initialTab),
     134        mLineCount(0),
     135        mTerminated(true),
     136        mResultStr(strm) {}
    88137    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
    89138    void finalize_match(char * buffer_end) override;
    90139protected:
    91140    std::string mLinePrefix;
     141    bool mShowLineNumbers;
     142    bool mInitialTab;
    92143    size_t mLineCount;
    93144    bool mTerminated;
     
    112163class MatchOnlyEngine : public GrepEngine {
    113164public:
    114     MatchOnlyEngine(bool showFilesWithoutMatch);
     165    MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators);
    115166private:
    116167    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
  • icGREP/icgrep-devel/icgrep/grep_interface.cpp

    r5944 r5945  
    2222using namespace llvm;
    2323
    24 namespace grep {
     24namespace argv {
    2525
    2626/*
  • icGREP/icgrep-devel/icgrep/grep_interface.h

    r5944 r5945  
    2020
    2121
    22 namespace grep {
     22namespace argv {
    2323   
    2424
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5944 r5945  
    4040std::vector<re::RE *> readExpressions() {
    4141 
    42     if (grep::FileFlag != "") {
    43         std::ifstream regexFile(grep::FileFlag.c_str());
     42    if (argv::FileFlag != "") {
     43        std::ifstream regexFile(argv::FileFlag.c_str());
    4444        std::string r;
    4545        if (regexFile.is_open()) {
    4646            while (std::getline(regexFile, r)) {
    47                 grep::RegexpVector.push_back(r);
     47                argv::RegexpVector.push_back(r);
    4848            }
    4949            regexFile.close();
     
    5454    // must be a regex, not an input file.
    5555   
    56     if (grep::RegexpVector.size() == 0) {
    57         grep::RegexpVector.push_back(inputFiles[0]);
     56    if (argv::RegexpVector.size() == 0) {
     57        argv::RegexpVector.push_back(inputFiles[0]);
    5858        inputFiles.erase(inputFiles.begin());
    5959    }
    60     if (grep::IgnoreCaseFlag) {
     60    if (argv::IgnoreCaseFlag) {
    6161        globalFlags |= re::CASE_INSENSITIVE_MODE_FLAG;
    6262    }
    6363
    6464    std::vector<re::RE *> REs;
    65     for (unsigned i = 0; i < grep::RegexpVector.size(); i++) {
    66         re::RE * re_ast = re::RE_Parser::parse(grep::RegexpVector[i], globalFlags, grep::RegexpSyntax, ByteMode);
     65    for (unsigned i = 0; i < argv::RegexpVector.size(); i++) {
     66        re::RE * re_ast = re::RE_Parser::parse(argv::RegexpVector[i], globalFlags, argv::RegexpSyntax, ByteMode);
    6767        REs.push_back(re_ast);
    6868    }
     
    9494    for (re::RE *& re_ast : REs) {
    9595        assert (re_ast);
    96         if (grep::WordRegexpFlag) {
     96        if (argv::WordRegexpFlag) {
    9797            re_ast = re::makeSeq({re::makeWordBoundary(), re_ast, re::makeWordBoundary()});
    9898        }
    99         if (grep::LineRegexpFlag) {
     99        if (argv::LineRegexpFlag) {
    100100            re_ast = re::makeSeq({re::makeStart(), re_ast, re::makeEnd()});
    101101        }
     
    108108int main(int argc, char *argv[]) {
    109109
    110     grep::InitializeCommandLineInterface(argc, argv);
     110    argv::InitializeCommandLineInterface(argc, argv);
    111111   
    112112    auto REs = readExpressions();
     
    116116        allFiles = { "-" };
    117117    }
    118     else if ((allFiles.size() > 1) && !grep::NoFilenameFlag) {
    119         grep::WithFilenameFlag = true;
     118    else if ((allFiles.size() > 1) && !argv::NoFilenameFlag) {
     119        argv::WithFilenameFlag = true;
    120120    }
    121121
    122122    grep::GrepEngine * grepEngine = nullptr;
    123123   
    124     switch (grep::Mode) {
    125         case grep::NormalMode:
    126             grepEngine = new grep::EmitMatchesEngine(); break;
    127         case grep::CountOnly:
    128             grepEngine = new grep::CountOnlyEngine(); break;
    129         case grep::FilesWithMatch:
    130         case grep::FilesWithoutMatch:
    131             grepEngine = new grep::MatchOnlyEngine(grep::Mode == grep::FilesWithoutMatch); break;
    132         case grep::QuietMode:
     124    switch (argv::Mode) {
     125        case argv::NormalMode:
     126            grepEngine = new grep::EmitMatchesEngine();
     127            if (argv::MaxCountFlag) grepEngine->setMaxCount(argv::MaxCountFlag);
     128            if (argv::WithFilenameFlag) grepEngine->showFileNames();
     129            if (argv::LineNumberFlag) grepEngine->showLineNumbers();
     130            if (argv::InitialTabFlag) grepEngine->setInitialTab();
     131           break;
     132        case argv::CountOnly:
     133            grepEngine = new grep::CountOnlyEngine();
     134            if (argv::WithFilenameFlag) grepEngine->showFileNames();
     135            if (argv::MaxCountFlag) grepEngine->setMaxCount(argv::MaxCountFlag);
     136           break;
     137        case argv::FilesWithMatch:
     138        case argv::FilesWithoutMatch:
     139            grepEngine = new grep::MatchOnlyEngine(argv::Mode == argv::FilesWithoutMatch, argv::NullFlag);
     140            break;
     141        case argv::QuietMode:
    133142            grepEngine = new grep::QuietModeEngine(); break;
    134143        default: llvm_unreachable("Invalid grep mode!");
    135144    }
    136                
    137     if (grep::UnicodeLinesFlag) {
     145    if (argv::IgnoreCaseFlag) grepEngine->setCaseInsensitive();
     146    if (argv::InvertMatchFlag) grepEngine->setInvertMatches();
     147    if (argv::UnicodeLinesFlag) {
    138148        grepEngine->setRecordBreak(grep::GrepRecordBreakKind::Unicode);
    139     } else if (grep::NullDataFlag) {
     149    } else if (argv::NullDataFlag) {
    140150        grepEngine->setRecordBreak(grep::GrepRecordBreakKind::Null);
    141151    } else {
    142152        grepEngine->setRecordBreak(grep::GrepRecordBreakKind::LF);
    143153    }
     154    grepEngine->setStdinLabel(argv::LabelFlag);
     155    if (argv::NoMessagesFlag) grepEngine->suppressFileMessages();
     156    if (argv::MmapFlag) grepEngine->setPreferMMap();
    144157    grepEngine->initREs(REs);
    145158    grepEngine->grepCodeGen();
     
    148161    delete(grepEngine);
    149162   
    150     return matchFound ? grep::MatchFoundExitCode : grep::MatchNotFoundExitCode;
     163    return matchFound ? argv::MatchFoundExitCode : argv::MatchNotFoundExitCode;
    151164}
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp

    r5835 r5945  
    6262                         cl::cat(RegexOptions));
    6363
    64 RE * resolveModesAndExternalSymbols(RE * r) {
     64RE * resolveModesAndExternalSymbols(RE * r, bool globallyCaseInsensitive) {
    6565    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
    6666        errs() << "Parser:\n" << Printer_RE::PrintRE(r) << '\n';
     
    7474        errs() << "resolveUnicodeProperties:\n" << Printer_RE::PrintRE(r) << '\n';
    7575    }
    76     r = resolveCaseInsensitiveMode(r, grep::IgnoreCaseFlag);
     76    r = resolveCaseInsensitiveMode(r, globallyCaseInsensitive);
    7777    if (PrintOptions.isSet(ShowAllREs)) {
    7878        errs() << "resolveCaseInsensitiveMode:\n" << Printer_RE::PrintRE(r) << '\n';
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.h

    r5835 r5945  
    11/*
    2  *  Copyright (c) 2016 International Characters.
     2 *  Copyright (c) 2018 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
     
    2929const llvm::cl::OptionCategory * LLVM_READONLY re_toolchain_flags();
    3030
    31 RE * resolveModesAndExternalSymbols(RE * r);
     31RE * resolveModesAndExternalSymbols(RE * r, bool globallyCaseInsensitive = false);
    3232
    3333RE * excludeUnicodeLineBreak(RE * r);
  • icGREP/icgrep-devel/icgrep/util/file_select.cpp

    r5944 r5945  
    1515#include <re/re_alt.h>
    1616#include <re/re_toolchain.h>
     17#include <grep/grep_engine.h>
    1718#include <fstream>
    1819#include <string>
Note: See TracChangeset for help on using the changeset viewer.