source: icGREP/icgrep-devel/icgrep/grep/grep_engine.h @ 5965

Last change on this file since 5965 was 5965, checked in by cameron, 10 months ago

Support for file/directory include/exclude

File size: 5.9 KB
Line 
1
2/*
3 *  Copyright (c) 2017 International Characters.
4 *  This software is licensed to the public under the Open Software License 3.0.
5 *  icgrep is a trademark of International Characters.
6 */
7#ifndef GREP_ENGINE_H
8#define GREP_ENGINE_H
9#include <grep_interface.h>
10#include <kernels/streamset.h>
11#include <cc/multiplex_CCs.h>
12#include <string>
13#include <vector>
14#include <sstream>
15#include <atomic>
16#include <util/aligned_allocator.h>
17#include <boost/filesystem.hpp>
18
19namespace re { class CC; }
20namespace re { class RE; }
21namespace llvm { namespace cl { class OptionCategory; } }
22class Driver;
23
24
25namespace grep {
26   
27enum class GrepRecordBreakKind {Null, LF, Unicode};
28
29class MatchAccumulator {
30public:
31    MatchAccumulator() {}
32    virtual void accumulate_match(const size_t lineNum, char * line_start, char * line_end) = 0;
33    virtual void finalize_match(char * buffer_end) {}  // default: no op
34};
35
36extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end);
37
38extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end);
39
40
41class GrepEngine {
42    enum class FileStatus {Pending, GrepComplete, PrintComplete};
43public:
44
45    GrepEngine();
46    virtual ~GrepEngine();
47   
48    void setPreferMMap() {mPreferMMap = true;}
49   
50    void showFileNames() {mShowFileNames = true;}
51    void setStdinLabel(std::string lbl) {mStdinLabel = lbl;}
52    void showLineNumbers() {mShowLineNumbers = true;}
53    void setInitialTab() {mInitialTab = true;}
54
55    void setMaxCount(int m) {mMaxCount = m;}
56    void setGrepStdIn() {mGrepStdIn = true;}
57    void setInvertMatches() {mInvertMatches = true;}
58    void setCaseInsensitive()  {mCaseInsensitive = true;}
59
60    void suppressFileMessages() {mSuppressFileMessages = true;}
61
62    void setRecordBreak(GrepRecordBreakKind b);
63    void initFileResult(std::vector<boost::filesystem::path> & filenames);
64    void initREs(std::vector<re::RE *> & REs);
65    virtual void grepCodeGen();
66    bool searchAllFiles();
67    void * DoGrepThreadMethod();
68
69protected:
70    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(parabix::StreamSetBuffer * ByteStream);
71
72    virtual uint64_t doGrep(const std::string & fileName, std::ostringstream & strm);
73    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
74
75    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
76    EngineKind mEngineKind;
77   
78    std::string linePrefix(std::string fileName);
79
80    bool mSuppressFileMessages;
81    bool mPreferMMap;
82    bool mShowFileNames;
83    std::string mStdinLabel;
84    bool mShowLineNumbers;
85    bool mInitialTab;
86    bool mCaseInsensitive;
87    bool mInvertMatches;
88    int mMaxCount;
89    bool mGrepStdIn;
90   
91    Driver * mGrepDriver;
92
93    std::atomic<unsigned> mNextFileToGrep;
94    std::atomic<unsigned> mNextFileToPrint;
95    std::vector<boost::filesystem::path> inputPaths;
96    std::vector<std::ostringstream> mResultStrs;
97    std::vector<FileStatus> mFileStatus;
98    bool grepMatchFound;
99    GrepRecordBreakKind mGrepRecordBreak;
100   
101    std::vector<re:: RE *> mREs;
102    std::set<re::Name *> mUnicodeProperties;
103    re::CC * mBreakCC;
104    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
105    std::string mFileSuffix;
106    bool mMoveMatchesToEOL;
107    pthread_t mEngineThread;
108};
109
110
111//
112// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
113// matched lines.
114
115class EmitMatch : public MatchAccumulator {
116    friend class EmitMatchesEngine;
117public:
118    EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm) : mLinePrefix(linePrefix),
119        mShowLineNumbers(showLineNumbers),
120        mInitialTab(initialTab),
121        mLineCount(0),
122        mTerminated(true),
123        mResultStr(strm) {}
124    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
125    void finalize_match(char * buffer_end) override;
126protected:
127    std::string mLinePrefix;
128    bool mShowLineNumbers;
129    bool mInitialTab;
130    size_t mLineCount;
131    bool mTerminated;
132    std::ostringstream & mResultStr;
133};
134
135class EmitMatchesEngine : public GrepEngine {
136public:
137    EmitMatchesEngine();
138    void grepCodeGen() override;
139private:
140    uint64_t doGrep(const std::string & fileName, std::ostringstream & strm) override;
141};
142
143class CountOnlyEngine : public GrepEngine {
144public:
145    CountOnlyEngine();
146private:
147    uint64_t doGrep(const std::string & fileName, std::ostringstream & strm) override;
148};
149
150class MatchOnlyEngine : public GrepEngine {
151public:
152    MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators);
153private:
154    uint64_t doGrep(const std::string & fileName, std::ostringstream & strm) override;
155    unsigned mRequiredCount;
156};
157
158class QuietModeEngine : public GrepEngine {
159public:
160    QuietModeEngine();
161};
162
163   
164   
165class InternalSearchEngine {
166public:
167    InternalSearchEngine();
168    ~InternalSearchEngine();
169   
170    void setRecordBreak(GrepRecordBreakKind b) {mGrepRecordBreak = b;}
171    void setCaseInsensitive()  {mCaseInsensitive = true;}
172   
173    void grepCodeGen(re::RE * matchingRE, re::RE * invertedRE, MatchAccumulator * accum);
174   
175    void doGrep(const char * search_buffer, size_t bufferLength);
176   
177private:
178    GrepRecordBreakKind mGrepRecordBreak;
179    bool mCaseInsensitive;
180
181    Driver * mGrepDriver;
182};
183   
184   
185#define MAX_SIMD_WIDTH_SUPPORTED 256
186#define INITIAL_CAPACITY 64
187   
188class SearchableBuffer  {
189public:
190    SearchableBuffer();
191    void addSearchCandidate(const char * string_ptr);
192    size_t getCandidateCount() {return mEntries;}
193    char * getBufferBase() {return mBuffer_base;}
194    size_t getBufferSize() {return mSpace_used;}
195    ~SearchableBuffer();
196private:
197    static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8;
198    AlignedAllocator<char, BUFFER_ALIGNMENT> mAllocator;
199    size_t mAllocated_capacity;
200    size_t mSpace_used;
201    size_t mEntries;
202    char * mBuffer_base;
203    alignas(BUFFER_ALIGNMENT) char mInitial_buffer[INITIAL_CAPACITY];
204};
205
206}
207
208#endif
Note: See TracBrowser for help on using the repository browser.