source: icGREP/icgrep-devel/icgrep/grep/grep_engine.h @ 5994

Last change on this file since 5994 was 5994, checked in by cameron, 12 months ago

INITIAL_CAPACITY must be a multiple of BlockSize?

File size: 6.7 KB
Line 
1
2/*
3 *  Copyright (c) 2018 International Characters.
4 *  This software is licensed to the public under the Open Software License 3.0.
5 *  icgrep is a trademark of International Characters.
6 */
7#ifndef GREP_ENGINE_H
8#define GREP_ENGINE_H
9#include <grep_interface.h>
10#include <kernels/streamset.h>
11#include <cc/multiplex_CCs.h>
12#include <string>
13#include <vector>
14#include <sstream>
15#include <atomic>
16#include <util/aligned_allocator.h>
17#include <boost/filesystem.hpp>
18
19namespace re { class CC; }
20namespace re { class RE; }
21namespace llvm { namespace cl { class OptionCategory; } }
22class Driver;
23
24
25namespace grep {
26   
27enum class GrepRecordBreakKind {Null, LF, Unicode};
28
29enum GrepSignal : unsigned {BinaryFile};
30
31class GrepCallBackObject {
32public:
33    GrepCallBackObject() : mBinaryFile(false) {}
34    virtual void handle_signal(unsigned signal);
35    bool binaryFileSignalled() {return mBinaryFile;}
36private:
37    bool mBinaryFile;
38};
39   
40class MatchAccumulator : public GrepCallBackObject {
41public:
42    MatchAccumulator() {}
43    virtual void accumulate_match(const size_t lineNum, char * line_start, char * line_end) = 0;
44    virtual void finalize_match(char * buffer_end) {}  // default: no op
45};
46
47extern "C" void signal_dispatcher(intptr_t callback_object_addr, unsigned signal);
48   
49extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end);
50
51extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end);
52
53class GrepEngine {
54    enum class FileStatus {Pending, GrepComplete, PrintComplete};
55public:
56
57    GrepEngine();
58    virtual ~GrepEngine();
59   
60    void setPreferMMap() {mPreferMMap = true;}
61   
62    void showFileNames() {mShowFileNames = true;}
63    void setStdinLabel(std::string lbl) {mStdinLabel = lbl;}
64    void showLineNumbers() {mShowLineNumbers = true;}
65    void setInitialTab() {mInitialTab = true;}
66
67    void setMaxCount(int m) {mMaxCount = m;}
68    void setGrepStdIn() {mGrepStdIn = true;}
69    void setInvertMatches() {mInvertMatches = true;}
70    void setCaseInsensitive()  {mCaseInsensitive = true;}
71
72    void suppressFileMessages() {mSuppressFileMessages = true;}
73    void setBinaryFilesOption(argv::BinaryFilesMode mode) {mBinaryFilesMode = mode;}
74    void setRecordBreak(GrepRecordBreakKind b);
75    void initFileResult(std::vector<boost::filesystem::path> & filenames);
76    void initREs(std::vector<re::RE *> & REs);
77    virtual void grepCodeGen();
78    bool searchAllFiles();
79    void * DoGrepThreadMethod();
80    virtual void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm);
81
82protected:
83    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(parabix::StreamSetBuffer * ByteStream, llvm::Value * callback_object_addr);
84
85    virtual uint64_t doGrep(const std::string & fileName, std::ostringstream & strm);
86    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
87
88    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
89    EngineKind mEngineKind;
90   
91    std::string linePrefix(std::string fileName);
92
93    bool mSuppressFileMessages;
94    argv::BinaryFilesMode mBinaryFilesMode;
95    bool mPreferMMap;
96    bool mShowFileNames;
97    std::string mStdinLabel;
98    bool mShowLineNumbers;
99    bool mInitialTab;
100    bool mCaseInsensitive;
101    bool mInvertMatches;
102    int mMaxCount;
103    bool mGrepStdIn;
104    std::unique_ptr<Driver> mGrepDriver;
105
106    std::atomic<unsigned> mNextFileToGrep;
107    std::atomic<unsigned> mNextFileToPrint;
108    std::vector<boost::filesystem::path> inputPaths;
109    std::vector<std::ostringstream> mResultStrs;
110    std::vector<FileStatus> mFileStatus;
111    bool grepMatchFound;
112    GrepRecordBreakKind mGrepRecordBreak;
113   
114    std::vector<re:: RE *> mREs;
115    std::set<re::Name *> mUnicodeProperties;
116    re::CC * mBreakCC;
117    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
118    std::string mFileSuffix;
119    bool mMoveMatchesToEOL;
120    pthread_t mEngineThread;
121};
122
123
124//
125// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
126// matched lines.
127
128class EmitMatch : public MatchAccumulator {
129    friend class EmitMatchesEngine;
130public:
131    EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm) : mLinePrefix(linePrefix),
132        mShowLineNumbers(showLineNumbers),
133        mInitialTab(initialTab),
134        mLineCount(0),
135        mTerminated(true),
136        mResultStr(strm) {}
137    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
138    void finalize_match(char * buffer_end) override;
139protected:
140    std::string mLinePrefix;
141    bool mShowLineNumbers;
142    bool mInitialTab;
143    size_t mLineCount;
144    bool mTerminated;
145    std::ostringstream & mResultStr;
146};
147
148class EmitMatchesEngine : public GrepEngine {
149public:
150    EmitMatchesEngine();
151    void grepCodeGen() override;
152private:
153    uint64_t doGrep(const std::string & fileName, std::ostringstream & strm) override;
154};
155
156class CountOnlyEngine : public GrepEngine {
157public:
158    CountOnlyEngine();
159private:
160    void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) override;
161};
162
163class MatchOnlyEngine : public GrepEngine {
164public:
165    MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators);
166private:
167    void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) override;
168    unsigned mRequiredCount;
169};
170
171class QuietModeEngine : public GrepEngine {
172public:
173    QuietModeEngine();
174};
175
176   
177   
178class InternalSearchEngine {
179public:
180    InternalSearchEngine();
181    ~InternalSearchEngine();
182   
183    void setRecordBreak(GrepRecordBreakKind b) {mGrepRecordBreak = b;}
184    void setCaseInsensitive()  {mCaseInsensitive = true;}
185   
186    void grepCodeGen(re::RE * matchingRE, re::RE * invertedRE, MatchAccumulator * accum);
187   
188    void doGrep(const char * search_buffer, size_t bufferLength);
189   
190private:
191    GrepRecordBreakKind mGrepRecordBreak;
192    bool mCaseInsensitive;
193    bool mSaveSegmentPipelineParallel;
194
195    std::unique_ptr<Driver> mGrepDriver;
196};
197   
198   
199#define MAX_SIMD_WIDTH_SUPPORTED 512
200#define INITIAL_CAPACITY (MAX_SIMD_WIDTH_SUPPORTED * 4)
201   
202class SearchableBuffer  {
203public:
204    SearchableBuffer();
205    void addSearchCandidate(const char * string_ptr);
206    size_t getCandidateCount() {return mEntries;}
207    char * getBufferBase() {return mBuffer_base;}
208    size_t getBufferSize() {return mSpace_used;}
209    ~SearchableBuffer();
210private:
211    static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8;
212    AlignedAllocator<char, BUFFER_ALIGNMENT> mAllocator;
213    size_t mAllocated_capacity;
214    size_t mSpace_used;
215    size_t mEntries;
216    char * mBuffer_base;
217    alignas(BUFFER_ALIGNMENT) char mInitial_buffer[INITIAL_CAPACITY];
218};
219
220}
221
222#endif
Note: See TracBrowser for help on using the repository browser.