source: icGREP/icgrep-devel/icgrep/grep/grep_engine.h @ 5998

Last change on this file since 5998 was 5998, checked in by nmedfort, 13 months ago

Added temporary buffer functionality to the pipeline for single stream source buffers. Fixed memory leak from UCD::UnicodeBreakRE()

File size: 6.7 KB
Line 
1
2/*
3 *  Copyright (c) 2018 International Characters.
4 *  This software is licensed to the public under the Open Software License 3.0.
5 *  icgrep is a trademark of International Characters.
6 */
7#ifndef GREP_ENGINE_H
8#define GREP_ENGINE_H
9#include <grep_interface.h>
10#include <kernels/streamset.h>
11#include <cc/multiplex_CCs.h>
12#include <string>
13#include <vector>
14#include <sstream>
15#include <atomic>
16#include <util/aligned_allocator.h>
17#include <boost/filesystem.hpp>
18
19namespace re { class CC; }
20namespace re { class RE; }
21namespace llvm { namespace cl { class OptionCategory; } }
22class Driver;
23
24
25namespace grep {
26   
27enum class GrepRecordBreakKind {Null, LF, Unicode};
28
29enum GrepSignal : unsigned {BinaryFile};
30
31class GrepCallBackObject {
32public:
33    GrepCallBackObject() : mBinaryFile(false) {}
34    virtual void handle_signal(unsigned signal);
35    bool binaryFileSignalled() {return mBinaryFile;}
36private:
37    bool mBinaryFile;
38};
39   
40class MatchAccumulator : public GrepCallBackObject {
41public:
42    MatchAccumulator() {}
43    virtual void accumulate_match(const size_t lineNum, char * line_start, char * line_end) = 0;
44    virtual void finalize_match(char * buffer_end) {}  // default: no op
45};
46
47extern "C" void signal_dispatcher(intptr_t callback_object_addr, unsigned signal);
48   
49extern "C" void accumulate_match_wrapper(intptr_t accum_addr, const size_t lineNum, char * line_start, char * line_end);
50
51extern "C" void finalize_match_wrapper(intptr_t accum_addr, char * buffer_end);
52
53class GrepEngine {
54    enum class FileStatus {Pending, GrepComplete, PrintComplete};
55public:
56
57    enum class EngineKind {QuietMode, MatchOnly, CountOnly, EmitMatches};
58
59    GrepEngine();
60
61    virtual ~GrepEngine() = 0;
62   
63    void setPreferMMap() {mPreferMMap = true;}
64   
65    void showFileNames() {mShowFileNames = true;}
66    void setStdinLabel(std::string lbl) {mStdinLabel = lbl;}
67    void showLineNumbers() {mShowLineNumbers = true;}
68    void setInitialTab() {mInitialTab = true;}
69
70    void setMaxCount(int m) {mMaxCount = m;}
71    void setGrepStdIn() {mGrepStdIn = true;}
72    void setInvertMatches() {mInvertMatches = true;}
73    void setCaseInsensitive()  {mCaseInsensitive = true;}
74
75    void suppressFileMessages() {mSuppressFileMessages = true;}
76    void setBinaryFilesOption(argv::BinaryFilesMode mode) {mBinaryFilesMode = mode;}
77    void setRecordBreak(GrepRecordBreakKind b);
78    void initFileResult(std::vector<boost::filesystem::path> & filenames);
79    void initREs(std::vector<re::RE *> & REs);
80    virtual void grepCodeGen();
81    bool searchAllFiles();
82    void * DoGrepThreadMethod();
83    virtual void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm);
84
85protected:
86    std::pair<parabix::StreamSetBuffer *, parabix::StreamSetBuffer *> grepPipeline(parabix::StreamSetBuffer * ByteStream, llvm::Value * callback_object_addr);
87
88    virtual uint64_t doGrep(const std::string & fileName, std::ostringstream & strm);
89    int32_t openFile(const std::string & fileName, std::ostringstream & msgstrm);
90
91    std::string linePrefix(std::string fileName);
92
93protected:
94
95    EngineKind mEngineKind;
96    bool mSuppressFileMessages;
97    argv::BinaryFilesMode mBinaryFilesMode;
98    bool mPreferMMap;
99    bool mShowFileNames;
100    std::string mStdinLabel;
101    bool mShowLineNumbers;
102    bool mInitialTab;
103    bool mCaseInsensitive;
104    bool mInvertMatches;
105    int mMaxCount;
106    bool mGrepStdIn;
107    std::unique_ptr<Driver> mGrepDriver;
108
109    std::atomic<unsigned> mNextFileToGrep;
110    std::atomic<unsigned> mNextFileToPrint;
111    std::vector<boost::filesystem::path> inputPaths;
112    std::vector<std::ostringstream> mResultStrs;
113    std::vector<FileStatus> mFileStatus;
114    bool grepMatchFound;
115    GrepRecordBreakKind mGrepRecordBreak;
116   
117    std::vector<re:: RE *> mREs;
118    std::set<re::Name *> mUnicodeProperties;
119    re::CC * mBreakCC;
120    std::unique_ptr<cc::MultiplexedAlphabet> mpx;
121    std::string mFileSuffix;
122    bool mMoveMatchesToEOL;
123    pthread_t mEngineThread;
124};
125
126
127//
128// The EmitMatches engine uses an EmitMatchesAccumulator object to concatenate together
129// matched lines.
130
131class EmitMatch : public MatchAccumulator {
132    friend class EmitMatchesEngine;
133public:
134    EmitMatch(std::string linePrefix, bool showLineNumbers, bool initialTab, std::ostringstream & strm) : mLinePrefix(linePrefix),
135        mShowLineNumbers(showLineNumbers),
136        mInitialTab(initialTab),
137        mLineCount(0),
138        mTerminated(true),
139        mResultStr(strm) {}
140    void accumulate_match(const size_t lineNum, char * line_start, char * line_end) override;
141    void finalize_match(char * buffer_end) override;
142protected:
143    std::string mLinePrefix;
144    bool mShowLineNumbers;
145    bool mInitialTab;
146    size_t mLineCount;
147    bool mTerminated;
148    std::ostringstream & mResultStr;
149};
150
151class EmitMatchesEngine : public GrepEngine {
152public:
153    EmitMatchesEngine();
154    void grepCodeGen() override;
155private:
156    uint64_t doGrep(const std::string & fileName, std::ostringstream & strm) override;
157};
158
159class CountOnlyEngine : public GrepEngine {
160public:
161    CountOnlyEngine();
162private:
163    void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) override;
164};
165
166class MatchOnlyEngine : public GrepEngine {
167public:
168    MatchOnlyEngine(bool showFilesWithoutMatch, bool useNullSeparators);
169private:
170    void showResult(uint64_t grepResult, const std::string & fileName, std::ostringstream & strm) override;
171    unsigned mRequiredCount;
172};
173
174class QuietModeEngine : public GrepEngine {
175public:
176    QuietModeEngine();
177};
178
179   
180   
181class InternalSearchEngine {
182public:
183    InternalSearchEngine();
184    ~InternalSearchEngine();
185   
186    void setRecordBreak(GrepRecordBreakKind b) {mGrepRecordBreak = b;}
187    void setCaseInsensitive()  {mCaseInsensitive = true;}
188   
189    void grepCodeGen(re::RE * matchingRE, re::RE * invertedRE, MatchAccumulator * accum);
190   
191    void doGrep(const char * search_buffer, size_t bufferLength);
192   
193private:
194    GrepRecordBreakKind mGrepRecordBreak;
195    bool mCaseInsensitive;
196    bool mSaveSegmentPipelineParallel;
197
198    std::unique_ptr<Driver> mGrepDriver;
199};
200   
201   
202#define MAX_SIMD_WIDTH_SUPPORTED 512
203#define INITIAL_CAPACITY (MAX_SIMD_WIDTH_SUPPORTED * 4)
204   
205class SearchableBuffer  {
206public:
207    SearchableBuffer();
208    void addSearchCandidate(const char * string_ptr);
209    size_t getCandidateCount() {return mEntries;}
210    char * getBufferBase() {return mBuffer_base;}
211    size_t getBufferSize() {return mSpace_used;}
212    ~SearchableBuffer();
213private:
214    static const unsigned BUFFER_ALIGNMENT = MAX_SIMD_WIDTH_SUPPORTED/8;
215    AlignedAllocator<char, BUFFER_ALIGNMENT> mAllocator;
216    size_t mAllocated_capacity;
217    size_t mSpace_used;
218    size_t mEntries;
219    char * mBuffer_base;
220    alignas(BUFFER_ALIGNMENT) char mInitial_buffer[INITIAL_CAPACITY];
221};
222
223}
224
225#endif
Note: See TracBrowser for help on using the repository browser.