source: icGREP/icgrep-devel/icgrep/grep/grep_engine.h @ 5954

Last change on this file since 5954 was 5954, checked in by cameron, 11 months ago

InternalSearchEngine?

File size: 5.8 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6#ifndef GREP_ENGINE_H
7#define GREP_ENGINE_H
8#include <grep_interface.h>
9#include <kernels/streamset.h>
10#include <cc/multiplex_CCs.h>
11#include <string>
12#include <vector>
13#include <sstream>
14#include <atomic>
15
16namespace re { class CC; }
17namespace re { class RE; }
18namespace llvm { namespace cl { class OptionCategory; } }
19class Driver;
20
21
22namespace grep {
23   
24enum class GrepRecordBreakKind {Null, LF, Unicode};
25
26class MatchAccumulator {
27public:
28    MatchAccumulator() {}
29    virtual void accumulate_match(const size_t lineNum, char * line_start, char * line_end) = 0;
30    virtual void finalize_match(char * buffer_end) {}  // default: no op
31};
32
33extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end);
34
35extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end);
36
37
38class GrepEngine {
39    enum class FileStatus {Pending, GrepComplete, PrintComplete};
40public:
41
42    GrepEngine();
43    virtual ~GrepEngine();
44   
45    void setPreferMMap() {mPreferMMap = true;}
46   
47    void showFileNames() {mShowFileNames = true;}
48    void setStdinLabel(std::string lbl) {mStdinLabel = lbl;}
49    void showLineNumbers() {mShowLineNumbers = true;}
50    void setInitialTab() {mInitialTab = true;}
51
52    void setMaxCount(int m) {mMaxCount = m;}
53    void setInvertMatches() {mInvertMatches = true;}
54    void setCaseInsensitive()  {mCaseInsensitive = true;}
55
56    void suppressFileMessages() {mSuppressFileMessages = true;}
57
58    void setRecordBreak(GrepRecordBreakKind b);
59    void initFileResult(std::vector<std::string> & filenames);
60    void initREs(std::vector<re::RE *> & REs);
61    virtual void grepCodeGen();
62    bool searchAllFiles();
63    void * DoGrepThreadMethod();
64
65protected:
66    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(parabix::StreamSetBuffer * ByteStream);
67
68    virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx);
69    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
70
71    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
72    EngineKind mEngineKind;
73   
74    std::string linePrefix(std::string fileName);
75
76    bool mSuppressFileMessages;
77    bool mPreferMMap;
78    bool mShowFileNames;
79    std::string mStdinLabel;
80    bool mShowLineNumbers;
81    bool mInitialTab;
82    bool mCaseInsensitive;
83    bool mInvertMatches;
84    int mMaxCount;
85   
86    Driver * mGrepDriver;
87
88    std::atomic<unsigned> mNextFileToGrep;
89    std::atomic<unsigned> mNextFileToPrint;
90    std::vector<std::string> inputFiles;
91    std::vector<std::ostringstream> mResultStrs;
92    std::vector<FileStatus> mFileStatus;
93    bool grepMatchFound;
94    GrepRecordBreakKind mGrepRecordBreak;
95   
96    std::vector<re:: RE *> mREs;
97    std::set<re::Name *> mUnicodeProperties;
98    re::CC * mBreakCC;
99    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
100    std::string mFileSuffix;
101    bool mMoveMatchesToEOL;
102    pthread_t mEngineThread;
103};
104
105
106//
107// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
108// matched lines.
109
110class EmitMatch : public MatchAccumulator {
111    friend class EmitMatchesEngine;
112public:
113    EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm) : mLinePrefix(linePrefix),
114        mShowLineNumbers(showLineNumbers),
115        mInitialTab(initialTab),
116        mLineCount(0),
117        mTerminated(true),
118        mResultStr(strm) {}
119    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
120    void finalize_match(char * buffer_end) override;
121protected:
122    std::string mLinePrefix;
123    bool mShowLineNumbers;
124    bool mInitialTab;
125    size_t mLineCount;
126    bool mTerminated;
127    std::ostringstream & mResultStr;
128};
129
130class EmitMatchesEngine : public GrepEngine {
131public:
132    EmitMatchesEngine();
133    void grepCodeGen() override;
134private:
135    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
136};
137
138class CountOnlyEngine : public GrepEngine {
139public:
140    CountOnlyEngine();
141private:
142    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
143};
144
145class MatchOnlyEngine : public GrepEngine {
146public:
147    MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators);
148private:
149    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
150    unsigned mRequiredCount;
151};
152
153class QuietModeEngine : public GrepEngine {
154public:
155    QuietModeEngine();
156};
157
158   
159   
160class InternalSearchEngine {
161public:
162    InternalSearchEngine();
163    ~InternalSearchEngine();
164   
165    void setRecordBreak(GrepRecordBreakKind b) {mGrepRecordBreak = b;}
166    void setCaseInsensitive()  {mCaseInsensitive = true;}
167   
168    void grepCodeGen(re::RE * matchingRE, re::RE * invertedRE, MatchAccumulator * accum);
169   
170    void doGrep(const char * search_buffer, size_t bufferLength);
171   
172private:
173    GrepRecordBreakKind mGrepRecordBreak;
174    bool mCaseInsensitive;
175
176    Driver * mGrepDriver;
177    bool grepMatchFound;
178};
179   
180   
181#define MAX_SIMD_WIDTH_SUPPORTED 512
182#define INITIAL_CAPACITY 1024
183   
184    class SearchableBuffer  {
185        SearchableBuffer();
186        void addSearchCandidate(char * string_ptr, size_t length);
187        size_t getCandidateCount() {return mEntries;}
188        char * getBufferBase() {return mBuffer_base;}
189        size_t getBufferSize() {return mSpace_used;}
190        ~SearchableBuffer();
191    private:
192        static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8;
193        size_t mAllocated_capacity;
194        char * mBuffer_base;
195        alignas(BUFFER_ALIGNMENT) char mInitial_buffer[INITIAL_CAPACITY];
196        size_t mSpace_used;
197        size_t mEntries;
198    };
199
200}
201
202#endif
Note: See TracBrowser for help on using the repository browser.