source: icGREP/icgrep-devel/icgrep/grep/grep_engine.h @ 5953

Last change on this file since 5953 was 5953, checked in by cameron, 15 months ago

Use AlignedAllocator? when boost::align unavailable (Jenkins test server).

File size: 5.7 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6#ifndef GREP_ENGINE_H
7#define GREP_ENGINE_H
8#include <grep_interface.h>
9#include <kernels/streamset.h>
10#include <cc/multiplex_CCs.h>
11#include <string>
12#include <vector>
13#include <sstream>
14#include <atomic>
15
16namespace re { class CC; }
17namespace re { class RE; }
18namespace llvm { namespace cl { class OptionCategory; } }
19class Driver;
20
21
22namespace grep {
23   
24enum class GrepRecordBreakKind {Null, LF, Unicode};
25
26class MatchAccumulator {
27public:
28    MatchAccumulator() {}
29    virtual void accumulate_match(const size_t lineNum, char * line_start, char * line_end) = 0;
30    virtual void finalize_match(char * buffer_end) {}  // default: no op
31};
32
33extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end);
34
35extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end);
36
37   
38#define MAX_SIMD_WIDTH_SUPPORTED 512
39#define INITIAL_CAPACITY 1024
40   
41class SearchableBuffer  {
42    SearchableBuffer();
43    void addSearchCandidate(char * string_ptr, size_t length);
44    size_t getCandidateCount() {return mEntries;}
45    ~SearchableBuffer();
46private:
47    static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8;
48    size_t mAllocated_capacity;
49    char * mBuffer_base;
50    alignas(BUFFER_ALIGNMENT) char mInitial_buffer[INITIAL_CAPACITY];
51    size_t mSpace_used;
52    size_t mEntries;
53};
54
55void grepBuffer(re::RE * pattern, const char * buffer, size_t bufferLength, MatchAccumulator * accum);
56
57class GrepEngine {
58    enum class FileStatus {Pending, GrepComplete, PrintComplete};
59public:
60
61    GrepEngine();
62    virtual ~GrepEngine();
63   
64    void setPreferMMap() {mPreferMMap = true;}
65   
66    void showFileNames() {mShowFileNames = true;}
67    void setStdinLabel(std::string lbl) {mStdinLabel = lbl;}
68    void showLineNumbers() {mShowLineNumbers = true;}
69    void setInitialTab() {mInitialTab = true;}
70
71    void setMaxCount(int m) {mMaxCount = m;}
72    void setInvertMatches() {mInvertMatches = true;}
73    void setCaseInsensitive()  {mCaseInsensitive = true;}
74
75    void suppressFileMessages() {mSuppressFileMessages = true;}
76
77    void setRecordBreak(GrepRecordBreakKind b);
78    void initFileResult(std::vector<std::string> & filenames);
79    void initREs(std::vector<re::RE *> & REs);
80    virtual void grepCodeGen();
81    bool searchAllFiles();
82    void * DoGrepThreadMethod();
83
84protected:
85    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(parabix::StreamSetBuffer * ByteStream);
86
87    virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx);
88    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
89
90    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
91    EngineKind mEngineKind;
92   
93    std::string linePrefix(std::string fileName);
94
95    bool mSuppressFileMessages;
96    bool mPreferMMap;
97    bool mShowFileNames;
98    std::string mStdinLabel;
99    bool mShowLineNumbers;
100    bool mInitialTab;
101    bool mCaseInsensitive;
102    bool mInvertMatches;
103    int mMaxCount;
104   
105    Driver * mGrepDriver;
106
107    std::atomic<unsigned> mNextFileToGrep;
108    std::atomic<unsigned> mNextFileToPrint;
109    std::vector<std::string> inputFiles;
110    std::vector<std::ostringstream> mResultStrs;
111    std::vector<FileStatus> mFileStatus;
112    bool grepMatchFound;
113    GrepRecordBreakKind mGrepRecordBreak;
114   
115    std::vector<re:: RE *> mREs;
116    std::set<re::Name *> mUnicodeProperties;
117    re::CC * mBreakCC;
118    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
119    std::string mFileSuffix;
120    bool mMoveMatchesToEOL;
121    pthread_t mEngineThread;
122};
123
124
125//
126// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
127// matched lines.
128
129class EmitMatch : public MatchAccumulator {
130    friend class EmitMatchesEngine;
131public:
132    EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm) : mLinePrefix(linePrefix),
133        mShowLineNumbers(showLineNumbers),
134        mInitialTab(initialTab),
135        mLineCount(0),
136        mTerminated(true),
137        mResultStr(strm) {}
138    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
139    void finalize_match(char * buffer_end) override;
140protected:
141    std::string mLinePrefix;
142    bool mShowLineNumbers;
143    bool mInitialTab;
144    size_t mLineCount;
145    bool mTerminated;
146    std::ostringstream & mResultStr;
147};
148
149class EmitMatchesEngine : public GrepEngine {
150public:
151    EmitMatchesEngine();
152    void grepCodeGen() override;
153private:
154    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
155};
156
157class CountOnlyEngine : public GrepEngine {
158public:
159    CountOnlyEngine();
160private:
161    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
162};
163
164class MatchOnlyEngine : public GrepEngine {
165public:
166    MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators);
167private:
168    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
169    unsigned mRequiredCount;
170};
171
172class QuietModeEngine : public GrepEngine {
173public:
174    QuietModeEngine();
175};
176
177   
178   
179class InternalSearchEngine {
180public:
181    InternalSearchEngine();
182    ~InternalSearchEngine();
183   
184    void setRecordBreak(GrepRecordBreakKind b) {mGrepRecordBreak = b;}
185    void setCaseInsensitive()  {mCaseInsensitive = true;}
186   
187    void grepCodeGen(re::RE * matchingRE, re::RE * invertedRE, MatchAccumulator * accum);
188   
189    void doGrep(const char * search_buffer, size_t bufferLength);
190   
191private:
192    GrepRecordBreakKind mGrepRecordBreak;
193    bool mCaseInsensitive;
194
195    Driver * mGrepDriver;
196    bool grepMatchFound;
197};
198   
199
200}
201
202#endif
Note: See TracBrowser for help on using the repository browser.