source: icGREP/icgrep-devel/icgrep/grep/grep_engine.h @ 5945

Last change on this file since 5945 was 5945, checked in by cameron, 14 months ago

Decouple GrepEngine? and re utilities from command line flags

File size: 5.1 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6#ifndef GREP_ENGINE_H
7#define GREP_ENGINE_H
8#include <grep_interface.h>
9#include <kernels/streamset.h>
10#include <cc/multiplex_CCs.h>
11#include <string>
12#include <vector>
13#include <sstream>
14#include <atomic>
15
16namespace re { class CC; }
17namespace re { class RE; }
18namespace llvm { namespace cl { class OptionCategory; } }
19class Driver;
20
21
22namespace grep {
23   
24enum class GrepRecordBreakKind {Null, LF, Unicode};
25
26class MatchAccumulator {
27public:
28    MatchAccumulator() {}
29    virtual void accumulate_match(const size_t lineNum, char * line_start, char * line_end) = 0;
30    virtual void finalize_match(char * buffer_end) {}  // default: no op
31};
32
33extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end);
34
35extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end);
36
37   
38#define MAX_SIMD_WIDTH_SUPPORTED 512
39#define INITIAL_CAPACITY 1024
40   
41class SearchableBuffer  {
42    SearchableBuffer();
43    void addSearchCandidate(char * string_ptr, size_t length);
44    ~SearchableBuffer();
45private:
46    static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8;
47    size_t allocated_capacity;
48    char * buffer_base;
49    alignas(BUFFER_ALIGNMENT) char initial_buffer[INITIAL_CAPACITY];
50    size_t space_used;
51    size_t entries;
52};
53
54void grepBuffer(re::RE * pattern, const char * buffer, size_t bufferLength, MatchAccumulator * accum);
55
56class GrepEngine {
57    enum class FileStatus {Pending, GrepComplete, PrintComplete};
58public:
59
60    GrepEngine();
61    virtual ~GrepEngine();
62   
63    void setPreferMMap() {mPreferMMap = true;}
64   
65    void showFileNames() {mShowFileNames = true;}
66    void setStdinLabel(std::string lbl) {mStdinLabel = lbl;}
67    void showLineNumbers() {mShowLineNumbers = true;}
68    void setInitialTab() {mInitialTab = true;}
69
70    void setMaxCount(int m) {mMaxCount = m;}
71    void setInvertMatches() {mInvertMatches = true;}
72    void setCaseInsensitive()  {mCaseInsensitive = true;}
73
74    void suppressFileMessages() {mSuppressFileMessages = true;}
75
76    void setRecordBreak(GrepRecordBreakKind b);
77    void initFileResult(std::vector<std::string> & filenames);
78    void initREs(std::vector<re::RE *> & REs);
79    virtual void grepCodeGen();
80    bool searchAllFiles();
81    void * DoGrepThreadMethod();
82
83protected:
84    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(parabix::StreamSetBuffer * ByteStream);
85
86    virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx);
87    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
88
89    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
90    EngineKind mEngineKind;
91   
92    std::string linePrefix(std::string fileName);
93
94    bool mSuppressFileMessages;
95    bool mPreferMMap;
96    bool mShowFileNames;
97    std::string mStdinLabel;
98    bool mShowLineNumbers;
99    bool mInitialTab;
100    bool mCaseInsensitive;
101    bool mInvertMatches;
102    int mMaxCount;
103   
104    Driver * mGrepDriver;
105
106    std::atomic<unsigned> mNextFileToGrep;
107    std::atomic<unsigned> mNextFileToPrint;
108    std::vector<std::string> inputFiles;
109    std::vector<std::ostringstream> mResultStrs;
110    std::vector<FileStatus> mFileStatus;
111    bool grepMatchFound;
112    GrepRecordBreakKind mGrepRecordBreak;
113   
114    std::vector<re:: RE *> mREs;
115    std::set<re::Name *> mUnicodeProperties;
116    re::CC * mBreakCC;
117    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
118    std::string mFileSuffix;
119    bool mMoveMatchesToEOL;
120    pthread_t mEngineThread;
121};
122
123
124//
125// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
126// matched lines.
127
128class EmitMatch : public MatchAccumulator {
129    friend class EmitMatchesEngine;
130public:
131    EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm) : mLinePrefix(linePrefix),
132        mShowLineNumbers(showLineNumbers),
133        mInitialTab(initialTab),
134        mLineCount(0),
135        mTerminated(true),
136        mResultStr(strm) {}
137    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
138    void finalize_match(char * buffer_end) override;
139protected:
140    std::string mLinePrefix;
141    bool mShowLineNumbers;
142    bool mInitialTab;
143    size_t mLineCount;
144    bool mTerminated;
145    std::ostringstream & mResultStr;
146};
147
148class EmitMatchesEngine : public GrepEngine {
149public:
150    EmitMatchesEngine();
151    void grepCodeGen() override;
152private:
153    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
154};
155
156class CountOnlyEngine : public GrepEngine {
157public:
158    CountOnlyEngine();
159private:
160    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
161};
162
163class MatchOnlyEngine : public GrepEngine {
164public:
165    MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators);
166private:
167    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
168    unsigned mRequiredCount;
169};
170
171class QuietModeEngine : public GrepEngine {
172public:
173    QuietModeEngine();
174};
175
176}
177
178#endif
Note: See TracBrowser for help on using the repository browser.