source: icGREP/icgrep-devel/icgrep/grep/grep_engine.h @ 6184

Last change on this file since 6184 was 6184, checked in by nmedfort, 7 months ago

Initial version of PipelineKernel? + revised StreamSet? model.

File size: 7.1 KB
Line 
1
2/*
3 *  Copyright (c) 2018 International Characters.
4 *  This software is licensed to the public under the Open Software License 3.0.
5 *  icgrep is a trademark of International Characters.
6 */
7#ifndef GREP_ENGINE_H
8#define GREP_ENGINE_H
9#include <grep_interface.h>
10//#include <kernels/streamset.h>
11#include <cc/multiplex_CCs.h>
12#include <string>
13#include <vector>
14#include <sstream>
15#include <atomic>
16#include <util/aligned_allocator.h>
17#include <boost/filesystem.hpp>
18
19namespace re { class CC; }
20namespace re { class RE; }
21namespace llvm { namespace cl { class OptionCategory; } }
22namespace kernel { class PipelineBuilder; }
23namespace kernel { class StreamSet; }
24class BaseDriver;
25
26
27namespace grep {
28   
29enum class GrepRecordBreakKind {Null, LF, Unicode};
30
31class InternalSearchEngine;
32
33enum GrepSignal : unsigned {BinaryFile};
34
35class GrepCallBackObject {
36public:
37    GrepCallBackObject() : mBinaryFile(false) {}
38    virtual void handle_signal(unsigned signal);
39    bool binaryFileSignalled() {return mBinaryFile;}
40private:
41    bool mBinaryFile;
42};
43   
44class MatchAccumulator : public GrepCallBackObject {
45public:
46    MatchAccumulator() {}
47    virtual void accumulate_match(const size_t lineNum, char * line_start, char * line_end) = 0;
48    virtual void finalize_match(char * buffer_end) {}  // default: no op
49};
50
51extern "C" void signal_dispatcher(intptr_t callback_object_addr, unsigned signal);
52   
53extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end);
54
55extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end);
56
57class GrepEngine {
58    enum class FileStatus {Pending, GrepComplete, PrintComplete};
59    friend class InternalSearchEngine;
60public:
61
62    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
63
64    GrepEngine(BaseDriver & driver);
65
66    virtual ~GrepEngine() = 0;
67   
68    void setPreferMMap() {mPreferMMap = true;}
69   
70    void showFileNames() {mShowFileNames = true;}
71    void setStdinLabel(std::string lbl) {mStdinLabel = lbl;}
72    void showLineNumbers() {mShowLineNumbers = true;}
73    void setInitialTab() {mInitialTab = true;}
74
75    void setMaxCount(int m) {mMaxCount = m;}
76    void setGrepStdIn() {mGrepStdIn = true;}
77    void setInvertMatches() {mInvertMatches = true;}
78    void setCaseInsensitive()  {mCaseInsensitive = true;}
79
80    void suppressFileMessages() {mSuppressFileMessages = true;}
81    void setBinaryFilesOption(argv::BinaryFilesMode mode) {mBinaryFilesMode = mode;}
82    void setRecordBreak(GrepRecordBreakKind b);
83    void initFileResult(std::vector<boost::filesystem::path> & filenames);
84    void initREs(std::vector<re::RE *> & REs);
85    virtual void grepCodeGen();
86    bool searchAllFiles();
87    void * DoGrepThreadMethod();
88    virtual void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm);
89
90protected:
91    std::pair<kernel::StreamSet *, kernel::StreamSet *> grepPipeline(const std::unique_ptr<kernel::PipelineBuilder> & P,
92                                                                     kernel::StreamSet * ByteStream);
93
94    virtual uint64_t doGrep(const std::string & fileName, std::ostringstream & strm);
95    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
96
97    std::string linePrefix(std::string fileName);
98
99protected:
100
101    EngineKind mEngineKind;
102    bool mSuppressFileMessages;
103    argv::BinaryFilesMode mBinaryFilesMode;
104    bool mPreferMMap;
105    bool mShowFileNames;
106    std::string mStdinLabel;
107    bool mShowLineNumbers;
108    bool mInitialTab;
109    bool mCaseInsensitive;
110    bool mInvertMatches;
111    int mMaxCount;
112    bool mGrepStdIn;
113    BaseDriver & mGrepDriver;
114    void * mMainMethod;
115
116    std::atomic<unsigned> mNextFileToGrep;
117    std::atomic<unsigned> mNextFileToPrint;
118    std::vector<boost::filesystem::path> inputPaths;
119    std::vector<std::ostringstream> mResultStrs;
120    std::vector<FileStatus> mFileStatus;
121    bool grepMatchFound;
122    GrepRecordBreakKind mGrepRecordBreak;
123   
124    std::vector<re:: RE *> mREs;
125    std::set<re::Name *> mUnicodeProperties;
126    re::CC * mBreakCC;
127    std::string mFileSuffix;
128    bool mMoveMatchesToEOL;
129    pthread_t mEngineThread;
130};
131
132
133//
134// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
135// matched lines.
136
137class EmitMatch : public MatchAccumulator {
138    friend class EmitMatchesEngine;
139public:
140    EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm)
141        : mLinePrefix(linePrefix),
142        mShowLineNumbers(showLineNumbers),
143        mInitialTab(initialTab),
144        mLineCount(0),
145        mTerminated(true),
146        mResultStr(strm) {}
147    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
148    void finalize_match(char * buffer_end) override;
149protected:
150    std::string mLinePrefix;
151    bool mShowLineNumbers;
152    bool mInitialTab;
153    size_t mLineCount;
154    bool mTerminated;
155    std::ostringstream & mResultStr;
156};
157
158class EmitMatchesEngine final : public GrepEngine {
159public:
160    EmitMatchesEngine(BaseDriver & driver);
161    void grepCodeGen() override;
162private:
163    uint64_t doGrep(const std::string & fileName, std::ostringstream & strm) override;
164};
165
166class CountOnlyEngine final : public GrepEngine {
167public:
168    CountOnlyEngine(BaseDriver & driver);
169private:
170    void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) override;
171};
172
173class MatchOnlyEngine final : public GrepEngine {
174public:
175    MatchOnlyEngine(BaseDriver & driver, bool showFilesWithoutMatch, bool useNullSeparators);
176private:
177    void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) override;
178    unsigned mRequiredCount;
179};
180
181class QuietModeEngine final : public GrepEngine {
182public:
183    QuietModeEngine(BaseDriver & driver);
184};
185
186   
187   
188class InternalSearchEngine {
189public:
190    InternalSearchEngine(BaseDriver & driver);
191
192    InternalSearchEngine(const std::unique_ptr<grep::GrepEngine> & engine);
193
194    ~InternalSearchEngine();
195   
196    void setRecordBreak(GrepRecordBreakKind b) {mGrepRecordBreak = b;}
197    void setCaseInsensitive()  {mCaseInsensitive = true;}
198   
199    void grepCodeGen(re::RE * matchingRE, re::RE * invertedRE);
200   
201    void doGrep(const char * search_buffer, size_t bufferLength, MatchAccumulator & accum);
202   
203private:
204    GrepRecordBreakKind mGrepRecordBreak;
205    bool mCaseInsensitive;
206    bool mSaveSegmentPipelineParallel;
207    BaseDriver & mGrepDriver;
208    void * mMainMethod;
209};
210   
211   
212#define MAX_SIMD_WIDTH_SUPPORTED 512
213#define INITIAL_CAPACITY (MAX_SIMD_WIDTH_SUPPORTED * 4)
214   
215class SearchableBuffer  {
216public:
217    SearchableBuffer();
218    void addSearchCandidate(const char * string_ptr);
219    size_t getCandidateCount() {return mEntries;}
220    char * getBufferBase() {return mBuffer_base;}
221    size_t getBufferSize() {return mSpace_used;}
222    ~SearchableBuffer();
223private:
224    static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8;
225    AlignedAllocator<char, BUFFER_ALIGNMENT> mAllocator;
226    size_t mAllocated_capacity;
227    size_t mSpace_used;
228    size_t mEntries;
229    char * mBuffer_base;
230    alignas(BUFFER_ALIGNMENT) char mInitial_buffer[INITIAL_CAPACITY];
231};
232
233}
234
235#endif
Note: See TracBrowser for help on using the repository browser.