1 | /* |
---|
2 | * Copyright (c) 2017 International Characters. |
---|
3 | * This software is licensed to the public under the Open Software License 3.0. |
---|
4 | * icgrep is a trademark of International Characters. |
---|
5 | */ |
---|
6 | #ifndef GREP_ENGINE_H |
---|
7 | #define GREP_ENGINE_H |
---|
8 | #include <grep_interface.h> |
---|
9 | #include <kernels/streamset.h> |
---|
10 | #include <cc/multiplex_CCs.h> |
---|
11 | #include <string> |
---|
12 | #include <vector> |
---|
13 | #include <sstream> |
---|
14 | #include <atomic> |
---|
15 | |
---|
16 | namespace re { class CC; } |
---|
17 | namespace re { class RE; } |
---|
18 | namespace llvm { namespace cl { class OptionCategory; } } |
---|
19 | class Driver; |
---|
20 | |
---|
21 | |
---|
22 | namespace grep { |
---|
23 | |
---|
24 | enum class GrepRecordBreakKind {Null, LF, Unicode}; |
---|
25 | |
---|
26 | class MatchAccumulator { |
---|
27 | public: |
---|
28 | MatchAccumulator() {} |
---|
29 | virtual void accumulate_match(const size_t lineNum, char * line_start, char * line_end) = 0; |
---|
30 | virtual void finalize_match(char * buffer_end) {} // default: no op |
---|
31 | }; |
---|
32 | |
---|
33 | extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end); |
---|
34 | |
---|
35 | extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end); |
---|
36 | |
---|
37 | |
---|
38 | #define MAX_SIMD_WIDTH_SUPPORTED 512 |
---|
39 | #define INITIAL_CAPACITY 1024 |
---|
40 | |
---|
41 | class SearchableBuffer { |
---|
42 | SearchableBuffer(); |
---|
43 | void addSearchCandidate(char * string_ptr, size_t length); |
---|
44 | size_t getCandidateCount() {return mEntries;} |
---|
45 | ~SearchableBuffer(); |
---|
46 | private: |
---|
47 | static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8; |
---|
48 | size_t mAllocated_capacity; |
---|
49 | char * mBuffer_base; |
---|
50 | alignas(BUFFER_ALIGNMENT) char mInitial_buffer[INITIAL_CAPACITY]; |
---|
51 | size_t mSpace_used; |
---|
52 | size_t mEntries; |
---|
53 | }; |
---|
54 | |
---|
55 | void grepBuffer(re::RE * pattern, const char * buffer, size_t bufferLength, MatchAccumulator * accum); |
---|
56 | |
---|
57 | class GrepEngine { |
---|
58 | enum class FileStatus {Pending, GrepComplete, PrintComplete}; |
---|
59 | public: |
---|
60 | |
---|
61 | GrepEngine(); |
---|
62 | virtual ~GrepEngine(); |
---|
63 | |
---|
64 | void setPreferMMap() {mPreferMMap = true;} |
---|
65 | |
---|
66 | void showFileNames() {mShowFileNames = true;} |
---|
67 | void setStdinLabel(std::string lbl) {mStdinLabel = lbl;} |
---|
68 | void showLineNumbers() {mShowLineNumbers = true;} |
---|
69 | void setInitialTab() {mInitialTab = true;} |
---|
70 | |
---|
71 | void setMaxCount(int m) {mMaxCount = m;} |
---|
72 | void setInvertMatches() {mInvertMatches = true;} |
---|
73 | void setCaseInsensitive() {mCaseInsensitive = true;} |
---|
74 | |
---|
75 | void suppressFileMessages() {mSuppressFileMessages = true;} |
---|
76 | |
---|
77 | void setRecordBreak(GrepRecordBreakKind b); |
---|
78 | void initFileResult(std::vector<std::string> & filenames); |
---|
79 | void initREs(std::vector<re::RE *> & REs); |
---|
80 | virtual void grepCodeGen(); |
---|
81 | bool searchAllFiles(); |
---|
82 | void * DoGrepThreadMethod(); |
---|
83 | |
---|
84 | protected: |
---|
85 | std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(parabix::StreamSetBuffer * ByteStream); |
---|
86 | |
---|
87 | virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx); |
---|
88 | int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm); |
---|
89 | |
---|
90 | enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches}; |
---|
91 | EngineKind mEngineKind; |
---|
92 | |
---|
93 | std::string linePrefix(std::string fileName); |
---|
94 | |
---|
95 | bool mSuppressFileMessages; |
---|
96 | bool mPreferMMap; |
---|
97 | bool mShowFileNames; |
---|
98 | std::string mStdinLabel; |
---|
99 | bool mShowLineNumbers; |
---|
100 | bool mInitialTab; |
---|
101 | bool mCaseInsensitive; |
---|
102 | bool mInvertMatches; |
---|
103 | int mMaxCount; |
---|
104 | |
---|
105 | Driver * mGrepDriver; |
---|
106 | |
---|
107 | std::atomic<unsigned> mNextFileToGrep; |
---|
108 | std::atomic<unsigned> mNextFileToPrint; |
---|
109 | std::vector<std::string> inputFiles; |
---|
110 | std::vector<std::ostringstream> mResultStrs; |
---|
111 | std::vector<FileStatus> mFileStatus; |
---|
112 | bool grepMatchFound; |
---|
113 | GrepRecordBreakKind mGrepRecordBreak; |
---|
114 | |
---|
115 | std::vector<re:: RE *> mREs; |
---|
116 | std::set<re::Name *> mUnicodeProperties; |
---|
117 | re::CC * mBreakCC; |
---|
118 | std::unique_ptr<cc::MultiplexedAlphabet> mpx; |
---|
119 | std::string mFileSuffix; |
---|
120 | bool mMoveMatchesToEOL; |
---|
121 | pthread_t mEngineThread; |
---|
122 | }; |
---|
123 | |
---|
124 | |
---|
125 | // |
---|
126 | // The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together |
---|
127 | // matched lines. |
---|
128 | |
---|
129 | class EmitMatch : public MatchAccumulator { |
---|
130 | friend class EmitMatchesEngine; |
---|
131 | public: |
---|
132 | EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm) : mLinePrefix(linePrefix), |
---|
133 | mShowLineNumbers(showLineNumbers), |
---|
134 | mInitialTab(initialTab), |
---|
135 | mLineCount(0), |
---|
136 | mTerminated(true), |
---|
137 | mResultStr(strm) {} |
---|
138 | void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override; |
---|
139 | void finalize_match(char * buffer_end) override; |
---|
140 | protected: |
---|
141 | std::string mLinePrefix; |
---|
142 | bool mShowLineNumbers; |
---|
143 | bool mInitialTab; |
---|
144 | size_t mLineCount; |
---|
145 | bool mTerminated; |
---|
146 | std::ostringstream & mResultStr; |
---|
147 | }; |
---|
148 | |
---|
149 | class EmitMatchesEngine : public GrepEngine { |
---|
150 | public: |
---|
151 | EmitMatchesEngine(); |
---|
152 | void grepCodeGen() override; |
---|
153 | private: |
---|
154 | uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override; |
---|
155 | }; |
---|
156 | |
---|
157 | class CountOnlyEngine : public GrepEngine { |
---|
158 | public: |
---|
159 | CountOnlyEngine(); |
---|
160 | private: |
---|
161 | uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override; |
---|
162 | }; |
---|
163 | |
---|
164 | class MatchOnlyEngine : public GrepEngine { |
---|
165 | public: |
---|
166 | MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators); |
---|
167 | private: |
---|
168 | uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override; |
---|
169 | unsigned mRequiredCount; |
---|
170 | }; |
---|
171 | |
---|
172 | class QuietModeEngine : public GrepEngine { |
---|
173 | public: |
---|
174 | QuietModeEngine(); |
---|
175 | }; |
---|
176 | |
---|
177 | } |
---|
178 | |
---|
179 | #endif |
---|