source: icGREP/icgrep-devel/icgrep/grep/grep_engine.h @ 5963

Last change on this file since 5963 was 5963, checked in by cameron, 12 months ago

Fixes for internal buffer search

File size: 5.8 KB
Line 
1
2/*
3 *  Copyright (c) 2017 International Characters.
4 *  This software is licensed to the public under the Open Software License 3.0.
5 *  icgrep is a trademark of International Characters.
6 */
7#ifndef GREP_ENGINE_H
8#define GREP_ENGINE_H
9#include <grep_interface.h>
10#include <kernels/streamset.h>
11#include <cc/multiplex_CCs.h>
12#include <string>
13#include <vector>
14#include <sstream>
15#include <atomic>
16#include <util/aligned_allocator.h>
17
18namespace re { class CC; }
19namespace re { class RE; }
20namespace llvm { namespace cl { class OptionCategory; } }
21class Driver;
22
23
24namespace grep {
25   
26enum class GrepRecordBreakKind {Null, LF, Unicode};
27
28class MatchAccumulator {
29public:
30    MatchAccumulator() {}
31    virtual void accumulate_match(const size_t lineNum, char * line_start, char * line_end) = 0;
32    virtual void finalize_match(char * buffer_end) {}  // default: no op
33};
34
35extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end);
36
37extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end);
38
39
40class GrepEngine {
41    enum class FileStatus {Pending, GrepComplete, PrintComplete};
42public:
43
44    GrepEngine();
45    virtual ~GrepEngine();
46   
47    void setPreferMMap() {mPreferMMap = true;}
48   
49    void showFileNames() {mShowFileNames = true;}
50    void setStdinLabel(std::string lbl) {mStdinLabel = lbl;}
51    void showLineNumbers() {mShowLineNumbers = true;}
52    void setInitialTab() {mInitialTab = true;}
53
54    void setMaxCount(int m) {mMaxCount = m;}
55    void setInvertMatches() {mInvertMatches = true;}
56    void setCaseInsensitive()  {mCaseInsensitive = true;}
57
58    void suppressFileMessages() {mSuppressFileMessages = true;}
59
60    void setRecordBreak(GrepRecordBreakKind b);
61    void initFileResult(std::vector<std::string> & filenames);
62    void initREs(std::vector<re::RE *> & REs);
63    virtual void grepCodeGen();
64    bool searchAllFiles();
65    void * DoGrepThreadMethod();
66
67protected:
68    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(parabix::StreamSetBuffer * ByteStream);
69
70    virtual uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx);
71    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
72
73    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
74    EngineKind mEngineKind;
75   
76    std::string linePrefix(std::string fileName);
77
78    bool mSuppressFileMessages;
79    bool mPreferMMap;
80    bool mShowFileNames;
81    std::string mStdinLabel;
82    bool mShowLineNumbers;
83    bool mInitialTab;
84    bool mCaseInsensitive;
85    bool mInvertMatches;
86    int mMaxCount;
87   
88    Driver * mGrepDriver;
89
90    std::atomic<unsigned> mNextFileToGrep;
91    std::atomic<unsigned> mNextFileToPrint;
92    std::vector<std::string> inputFiles;
93    std::vector<std::ostringstream> mResultStrs;
94    std::vector<FileStatus> mFileStatus;
95    bool grepMatchFound;
96    GrepRecordBreakKind mGrepRecordBreak;
97   
98    std::vector<re:: RE *> mREs;
99    std::set<re::Name *> mUnicodeProperties;
100    re::CC * mBreakCC;
101    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
102    std::string mFileSuffix;
103    bool mMoveMatchesToEOL;
104    pthread_t mEngineThread;
105};
106
107
108//
109// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
110// matched lines.
111
112class EmitMatch : public MatchAccumulator {
113    friend class EmitMatchesEngine;
114public:
115    EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm) : mLinePrefix(linePrefix),
116        mShowLineNumbers(showLineNumbers),
117        mInitialTab(initialTab),
118        mLineCount(0),
119        mTerminated(true),
120        mResultStr(strm) {}
121    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
122    void finalize_match(char * buffer_end) override;
123protected:
124    std::string mLinePrefix;
125    bool mShowLineNumbers;
126    bool mInitialTab;
127    size_t mLineCount;
128    bool mTerminated;
129    std::ostringstream & mResultStr;
130};
131
132class EmitMatchesEngine : public GrepEngine {
133public:
134    EmitMatchesEngine();
135    void grepCodeGen() override;
136private:
137    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
138};
139
140class CountOnlyEngine : public GrepEngine {
141public:
142    CountOnlyEngine();
143private:
144    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
145};
146
147class MatchOnlyEngine : public GrepEngine {
148public:
149    MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators);
150private:
151    uint64_t doGrep(const std::string & fileName, const uint32_t fileIdx) override;
152    unsigned mRequiredCount;
153};
154
155class QuietModeEngine : public GrepEngine {
156public:
157    QuietModeEngine();
158};
159
160   
161   
162class InternalSearchEngine {
163public:
164    InternalSearchEngine();
165    ~InternalSearchEngine();
166   
167    void setRecordBreak(GrepRecordBreakKind b) {mGrepRecordBreak = b;}
168    void setCaseInsensitive()  {mCaseInsensitive = true;}
169   
170    void grepCodeGen(re::RE * matchingRE, re::RE * invertedRE, MatchAccumulator * accum);
171   
172    void doGrep(const char * search_buffer, size_t bufferLength);
173   
174private:
175    GrepRecordBreakKind mGrepRecordBreak;
176    bool mCaseInsensitive;
177
178    Driver * mGrepDriver;
179    bool grepMatchFound;
180};
181   
182   
183#define MAX_SIMD_WIDTH_SUPPORTED 512
184#define INITIAL_CAPACITY 1024
185   
186class SearchableBuffer  {
187public:
188    SearchableBuffer();
189    void addSearchCandidate(const char * string_ptr);
190    size_t getCandidateCount() {return mEntries;}
191    char * getBufferBase() {return mBuffer_base;}
192    size_t getBufferSize() {return mSpace_used;}
193    ~SearchableBuffer();
194private:
195    static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8;
196    AlignedAllocator<char, BUFFER_ALIGNMENT> mAllocator;
197    size_t mAllocated_capacity;
198    size_t mSpace_used;
199    size_t mEntries;
200    char * mBuffer_base;
201    alignas(BUFFER_ALIGNMENT) char mInitial_buffer[INITIAL_CAPACITY];
202};
203
204}
205
206#endif
Note: See TracBrowser for help on using the repository browser.