source: icGREP/icgrep-devel/icgrep/grep_interface.cpp @ 5772

Last change on this file since 5772 was 5753, checked in by cameron, 18 months ago

Parser for fixed strings (-F) mode

File size: 18.5 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_interface.h>
8#include <llvm/Support/CommandLine.h>
9#include <llvm/Support/ErrorHandling.h>
10#include <llvm/Support/Signals.h>
11#include <llvm/Support/raw_ostream.h>
12#include <toolchain/toolchain.h>
13#include <re/re_toolchain.h>
14#include <pablo/pablo_toolchain.h>
15
16using namespace llvm;
17
18namespace grep {
19
20/*
21 *  A.  Regular expression syntax, interpretation and processing.
22 */
23
24static cl::OptionCategory RE_Options("A. Regular Expression Interpretation", "These options control regular expression parsing and interpretation");
25
26re::RE_Syntax RegexpSyntax;
27static cl::opt<re::RE_Syntax, true> RegexpSyntaxOption(cl::desc("Regular expression syntax: (default PCRE)"),
28    cl::values(
29        clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
30        clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
31        clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
32        clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax"),
33        clEnumValN(re::RE_Syntax::ERE, "extended-regexp", "Alias for -E"),
34        clEnumValN(re::RE_Syntax::FixedStrings, "fixed-strings", "Alias for -F"),
35        clEnumValN(re::RE_Syntax::BRE, "basic-regexp", "Alias for -G"),
36        clEnumValN(re::RE_Syntax::PCRE, "perl-regexp", "Alias for -P"),
37        clEnumValN(re::RE_Syntax::PROSITE, "PROSITE", "PROSITE protein patterns syntax")
38        CL_ENUM_VAL_SENTINEL), cl::cat(RE_Options), cl::Grouping, cl::location(RegexpSyntax), cl::init(re::RE_Syntax::PCRE));
39
40bool IgnoreCaseFlag;
41static cl::opt<bool, true> IgnoreCaseOption("i", cl::location(IgnoreCaseFlag), cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(RE_Options), cl::Grouping);
42static cl::alias IgnoreCaseAlias("ignore-case", cl::desc("Alias for -i"), cl::aliasopt(IgnoreCaseOption));
43
44bool InvertMatchFlag;
45static cl::opt<bool, true> InvertMatchOption("v", cl::location(InvertMatchFlag), cl::desc("Invert match results: select non-matching lines."), cl::cat(RE_Options), cl::Grouping);
46static cl::alias InvertMatchAlias("invert-match", cl::desc("Alias for -v"), cl::aliasopt(InvertMatchOption));
47
48bool LineRegexpFlag;
49static cl::opt<bool, true> LineRegexpOption("x", cl::location(LineRegexpFlag), cl::desc("Require that entire lines be matched."), cl::cat(RE_Options), cl::Grouping);
50static cl::alias LineRegexpAlias("line-regexp", cl::desc("Alias for -x"), cl::aliasopt(LineRegexpOption));
51
52bool WordRegexpFlag;
53static cl::opt<bool, true> WordRegexpOption("w", cl::location(WordRegexpFlag), cl::desc("Require that that whole words be matched."), cl::cat(RE_Options), cl::Grouping);
54static cl::alias WordRegexpAlias("word-regexp", cl::desc("Alias for -w"), cl::aliasopt(WordRegexpOption));
55
56std::vector<std::string> RegexpVector;
57static cl::list<std::string, std::vector<std::string>> RegexpOption("e", cl::location(RegexpVector), cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(RE_Options), cl::Grouping);
58static cl::alias RegexpAlias("regexp", cl::desc("Alias for -e"), cl::aliasopt(RegexpOption));
59
60std::string FileFlag;
61static cl::opt<std::string, true> FileOption("f", cl::location(FileFlag), cl::desc("Take regular expressions (one per line) from a file."), cl::cat(RE_Options), cl::Grouping);
62static cl::alias FileAlias("file", cl::desc("Alias for -f"), cl::aliasopt(FileOption));
63   
64/*
65 *  B.  Grep input sources and interpretation.
66 */
67   
68static cl::OptionCategory Input_Options("B. Input Options", "These options control the input.");
69
70bool RecursiveFlag;
71static cl::opt<bool, true> RecursiveOption("r", cl::location(RecursiveFlag), cl::desc("Recursively process files within directories, (but follow only top-level symlinks unless -R)."), cl::cat(Input_Options), cl::Grouping);
72static cl::alias RecursiveAlias("recursive", cl::desc("Alias for -r"), cl::aliasopt(RecursiveOption));
73
74bool DereferenceRecursiveFlag;
75static cl::opt<bool, true> DereferenceRecursiveOption("R", cl::location(DereferenceRecursiveFlag), cl::desc("Recursively process files within directories, following symlinks at all levels."), cl::cat(Input_Options), cl::Grouping);
76static cl::alias DereferenceRecursiveAlias("dereference-recursive", cl::desc("Alias for -R"), cl::aliasopt(DereferenceRecursiveOption));
77
78bool TextFlag;
79static cl::opt<bool, true> TextOption("a", cl::location(TextFlag), cl::desc("Treat each input file as text, even if it is a binary file."), cl::cat(Input_Options), cl::Grouping);
80static cl::alias TextAlias("text", cl::desc("Alias for -a"), cl::aliasopt(TextOption));
81
82bool BinaryNonMatchingFlag;
83static cl::opt<bool, true> BinaryNonMatchingOption("I", cl::location(BinaryNonMatchingFlag), cl::desc("Treat binary files as non-matching."), cl::cat(Input_Options), cl::Grouping);
84static cl::alias BinaryNonMatchingAlias("binary-non-matching", cl::desc("Alias for -I"), cl::aliasopt(BinaryNonMatchingOption));
85
86bool BinaryFlag;
87static cl::opt<bool, true> BinaryOption("U", cl::location(BinaryFlag), cl::desc("Treat each input file as a binary file, without CRLF normalization."), cl::cat(Input_Options), cl::Grouping);
88static cl::alias BinaryAlias("binary", cl::desc("Alias for -U"), cl::aliasopt(BinaryOption));
89
90bool NullDataFlag;
91static cl::opt<bool, true> NullDataOption("z", cl::location(NullDataFlag), cl::desc("Use the NUL character (codepoint 00) as the line-break character for input."), cl::cat(Input_Options), cl::Grouping);
92static cl::alias NullDataAlias("null-data", cl::desc("Alias for -z"), cl::aliasopt(NullDataOption));
93
94bool MmapFlag;
95static cl::opt<bool, true> MmapOption("mmap", cl::location(MmapFlag), cl::desc("Use mmap for file input."), cl::cat(Input_Options));
96
97std::string ExcludeFlag;
98static cl::opt<std::string, true> ExcludeOption("exclude", cl::location(ExcludeFlag), cl::desc("Exclude files matching the given filename GLOB pattern."), cl::cat(Input_Options));
99
100std::string ExcludeFromFlag;
101static cl::opt<std::string, true> ExcludeFromOption("exclude-from", cl::location(ExcludeFromFlag), cl::desc("Exclude files matching filename GLOB patterns from the given file."), cl::cat(Input_Options));
102
103std::string ExcludeDirFlag;
104static cl::opt<std::string, true> ExcludeDirOption("exclude-dir", cl::location(ExcludeDirFlag), cl::desc("Exclude directories matching the given pattern."), cl::cat(Input_Options));
105
106std::string IncludeFlag;
107static cl::opt<std::string, true> IncludeOption("include", cl::location(IncludeFlag), cl::desc("Include only files matching the given filename GLOB pattern."), cl::cat(Input_Options));
108
109DevDirAction DevicesFlag;
110static cl::opt<DevDirAction, true> DevicesOption("D", cl::desc("Processing mode for devices:"),
111                                                 cl::values(clEnumValN(Read, "read", "Treat devices as files to be searched."),
112                                                            clEnumValN(Skip, "skip", "Silently skip devices.")
113                                                            CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(DevicesFlag), cl::init(Read));
114static cl::alias DevicesAlias("devices", cl::desc("Alias for -D"), cl::aliasopt(DevicesOption));
115
116DevDirAction DirectoriesFlag;
117static cl::opt<DevDirAction, true> DirectoriesOption("d", cl::desc("Processing mode for directories:"),
118                                                     cl::values(clEnumValN(Read, "read", "Print an error message for any listed directories."),
119                                                                clEnumValN(Skip, "skip", "Silently skip directories."),
120                                                                clEnumValN(Recurse, "recurse", "Recursive process directories, equivalent to -r.")
121                                                                CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(DirectoriesFlag), cl::init(Read));
122static cl::alias DirectoriesAlias("directories", cl::desc("Alias for -d"), cl::aliasopt(DirectoriesOption));
123
124BinaryFilesMode BinaryFilesFlag;
125static cl::opt<BinaryFilesMode, true> BinaryFilesOption("binary-files", cl::desc("Processing mode for binary files:"),
126                                                     cl::values(clEnumValN(Binary, "binary", "Report match/non-match without printing matches."),
127                                                                clEnumValN(WithoutMatch, "without-match", "Always report as non-matching."),
128                                                                clEnumValN(Text, "text", "Treat binary files as text.")
129                                                                CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(BinaryFilesFlag), cl::init(Binary));
130   
131/*
132 *  C.  Grep output modes and options.
133 */
134   
135   
136static cl::OptionCategory Output_Options("C. Output Options",
137                                            "These options control the output.");
138   
139GrepModeType Mode;
140static cl::opt<GrepModeType, true> GrepModeOption(cl::desc("Abbreviated output mode options:"),
141    cl::values(
142        clEnumValN(CountOnly, "c", "Display only the count of matching lines per file."),
143        clEnumValN(FilesWithMatch, "l", "Display only the names of files that have at least one match to the pattern."),
144        clEnumValN(FilesWithoutMatch, "L", "Display only the names of files that do not match the pattern."),
145        clEnumValN(QuietMode, "q", "Do not generate any output and ignore errors; set the return to zero status if a match is found."),
146        clEnumValN(CountOnly, "count", "Alias for -c"),
147        clEnumValN(FilesWithMatch, "files-with-match", "Alias for -l"),
148        clEnumValN(FilesWithoutMatch, "files-without-match", "Alias for -L"),
149        clEnumValN(QuietMode, "quiet", "Alias for -q"),
150        clEnumValN(QuietMode, "silent", "Alias for -q")
151        CL_ENUM_VAL_SENTINEL), cl::cat(Output_Options), cl::Grouping, cl::location(Mode), cl::init(NormalMode));
152
153bool NoMessagesFlag;
154static cl::opt<bool, true> NoMessagesOption("s", cl::location(NoMessagesFlag), cl::desc("Suppress messages for file errors."), cl::cat(Output_Options), cl::Grouping);
155static cl::alias NoMessagesAlias("no-messages", cl::desc("Alias for -s"), cl::aliasopt(NoMessagesOption));
156
157bool WithFilenameFlag;
158static cl::opt<bool, true> WithFilenameOption("H", cl::location(WithFilenameFlag), cl::desc("Show the file name with each matching line."), cl::cat(Output_Options), cl::Grouping);
159static cl::alias WithFilenameAlias("with-filename", cl::desc("Alias for -H"), cl::aliasopt(WithFilenameOption));
160
161bool NoFilenameFlag;
162static cl::opt<bool, true> NoFilenameOption("h", cl::location(NoFilenameFlag), cl::desc("Do not show filenames with maches."), cl::cat(Output_Options), cl::Grouping);
163static cl::alias NoFilenameAlias("no-filename", cl::desc("Alias for -h"), cl::aliasopt(NoFilenameOption));
164
165bool NullFlag;
166static cl::opt<bool, true> NullOption("Z", cl::location(NullFlag), cl::desc("Write NUL characters after filenames generated to output."), cl::cat(Output_Options), cl::Grouping);
167static cl::alias NullAlias("null", cl::desc("Alias for -Z"), cl::aliasopt(NullOption));
168
169bool LineNumberFlag;
170static cl::opt<bool, true> LineNumberOption("n", cl::location(LineNumberFlag), cl::desc("Show the line number with each matching line."), cl::cat(Output_Options), cl::Grouping);
171static cl::alias LineNumberAlias("line-number", cl::desc("Alias for -n"), cl::aliasopt(LineNumberOption));
172
173bool ByteOffsetFlag;
174static cl::opt<bool, true> ByteOffsetOption("b", cl::location(ByteOffsetFlag), cl::desc("Show the byte offset within the file for each matching line."), cl::cat(Output_Options), cl::Grouping);
175static cl::alias ByteOffsetAlias("byte-offset", cl::desc("Alias for -b"), cl::aliasopt(ByteOffsetOption));
176
177bool UnixByteOffsetsFlag;
178static cl::opt<bool, true> UnixByteOffsetsOption("u", cl::location(UnixByteOffsetsFlag), cl::desc("If byte offsets are displayed, report offsets as if all lines are terminated with a single LF."), cl::cat(Output_Options), cl::Grouping);
179static cl::alias UnixByteOffsetsAlias("unix-byte-offsets", cl::desc("Alias for -u"), cl::aliasopt(UnixByteOffsetsOption));
180
181bool InitialTabFlag;
182static cl::opt<bool, true> InitialTabOption("T", cl::location(InitialTabFlag), cl::desc("Line up matched line content using an inital tab character."), cl::cat(Output_Options), cl::Grouping);
183static cl::alias InitialTabAlias("initial-tab", cl::desc("Alias for -T"), cl::aliasopt(InitialTabOption));
184
185bool OnlyMatchingFlag;
186static cl::opt<bool, true> OnlyMatchingOption("o", cl::location(OnlyMatchingFlag), cl::desc("Display only the exact strings that match the pattern, with possibly multiple matches per line."), cl::cat(Output_Options), cl::Grouping);
187static cl::alias OnlyMatchingAlias("only-matching", cl::desc("Alias for -o"), cl::aliasopt(OnlyMatchingOption));
188
189std::string LabelFlag;
190    static cl::opt<std::string, true> LabelOption("label", cl::location(LabelFlag), cl::init("(standard input)"),
191                                              cl::desc("Set a label for input lines matched from stdin."), cl::cat(Output_Options));
192
193bool LineBufferedFlag;
194static cl::opt<bool, true> LineBufferedOption("line-buffered", cl::location(LineBufferedFlag), cl::desc("Buffer lines to output."), cl::cat(Output_Options));
195
196int AfterContextFlag;
197static cl::opt<int, true> AfterContextOption("A", cl::location(AfterContextFlag), cl::desc("Print <num> lines of context after each matching line."), cl::cat(Output_Options), cl::Grouping);
198static cl::alias AfterContextAlias("after-context", cl::desc("Alias for -A"), cl::aliasopt(AfterContextOption));
199
200int BeforeContextFlag;
201static cl::opt<int, true> BeforeContextOption("B", cl::location(BeforeContextFlag), cl::desc("Print <num>lines of context before each matching line."), cl::cat(Output_Options), cl::Grouping);
202static cl::alias BeforeContextAlias("before-context", cl::desc("Alias for -B"), cl::aliasopt(BeforeContextOption));
203
204int ContextFlag;
205static cl::opt<int, true> ContextOption("C", cl::location(ContextFlag), cl::desc("Print <num> lines of context before and after each matching line."), cl::cat(Output_Options), cl::Grouping);
206static cl::alias ContextAlias("context", cl::desc("Alias for -C"), cl::aliasopt(ContextOption));
207
208int MaxCountFlag;
209static cl::opt<int, true> MaxCountOption("m", cl::location(MaxCountFlag),
210                                         cl::desc("Process only the first <num> matches per file3."),
211                                         cl::cat(Output_Options), cl::Grouping);
212static cl::alias MaxCountAlias("max-count", cl::desc("Alias for -m"), cl::aliasopt(MaxCountOption));
213   
214ColoringType ColorFlag;
215static cl::opt<ColoringType, true> Color("color", cl::desc("Set colorization of the output"), cl::location(ColorFlag), cl::cat(Output_Options), cl::init(neverColor),
216                                 cl::values(clEnumValN(alwaysColor, "always", "Enable colorization"),
217                                            clEnumValN(autoColor,   "auto", "Colorize output to stdout"),
218                                            clEnumValN(neverColor,  "never", "Disable colorization")
219                                            CL_ENUM_VAL_SENTINEL));
220static cl::alias ColorAlias("colour", cl::desc("Alias for -color"), cl::aliasopt(Color));
221//
222// Handler for errors reported through llvm::report_fatal_error.  Report
223// and signal error the InternalFailure exit code.
224//
225static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
226#ifndef NDEBUG
227        throw std::runtime_error(Message);
228#else
229        // Modified from LLVM's internal report_fatal_error logic.
230        SmallVector<char, 64> Buffer;
231        raw_svector_ostream OS(Buffer);
232        OS << "icgrep ERROR: " << Message << "\n";
233        StringRef MessageStr = OS.str();
234        ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
235        (void)written; // If something went wrong, we deliberately just give up.
236        // Run the interrupt handlers to make sure any special cleanups get done, in
237        // particular that we remove files registered with RemoveFileOnSignal.
238        llvm::sys::RunInterruptHandlers();
239        exit(InternalFailureCode);
240#endif
241}
242   
243
244void InitializeCommandLineInterface(int argc, char *argv[]) {
245    llvm::install_fatal_error_handler(&icgrep_error_handler);
246    codegen::ParseCommandLineOptions(argc, argv, {&RE_Options, &Input_Options, &Output_Options, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
247    if (RecursiveFlag || DereferenceRecursiveFlag) {
248        DirectoriesFlag = Recurse;
249    }
250   
251    if (TextFlag) {
252        if (BinaryNonMatchingFlag || (BinaryFilesFlag == WithoutMatch)) {
253            llvm::report_fatal_error("Conflicting options for binary files.\n");
254        }
255        BinaryFilesFlag = Text;
256    }
257    if (BinaryNonMatchingFlag) {
258        if (BinaryFilesFlag == Text) {
259            llvm::report_fatal_error("Conflicting options for binary files.\n");
260        }
261        BinaryFilesFlag = WithoutMatch;
262    }
263    if (BinaryFlag) {
264        llvm::report_fatal_error("Sorry, -U is not yet supported.\n");
265    }
266    if (NullDataFlag) {
267        llvm::report_fatal_error("Sorry, -z is not yet supported.\n");
268    }
269    if (ExcludeFlag!="") {
270        llvm::report_fatal_error("Sorry, -exclude is not yet supported.\n");
271    }
272    if (ExcludeFromFlag!="") {
273        llvm::report_fatal_error("Sorry, -exclude-from is not yet supported.\n");
274    }
275    if (ExcludeDirFlag!="") {
276        llvm::report_fatal_error("Sorry, -exclude-dir is not yet supported.\n");
277    }
278    if (IncludeFlag!="") {
279        llvm::report_fatal_error("Sorry, -include is not yet supported.\n");
280    }   
281    if (ByteOffsetFlag) {
282        llvm::report_fatal_error("Sorry, -b is not yet supported.\n");
283    }
284    if (UnixByteOffsetsFlag) {
285        llvm::report_fatal_error("Sorry, -u is not yet supported.\n");
286    }
287    if (OnlyMatchingFlag) {
288        llvm::report_fatal_error("Sorry, -o is not yet supported.\n");
289    }
290    if (LineBufferedFlag) {
291        llvm::report_fatal_error("Sorry, -line-buffered is not yet supported.\n");
292    }
293    if (AfterContextFlag) {
294        llvm::report_fatal_error("Sorry, -A is not yet supported.\n");
295    }
296    if (BeforeContextFlag) {
297        llvm::report_fatal_error("Sorry, -B is not yet supported.\n");
298    }
299    if (ContextFlag) {
300        llvm::report_fatal_error("Sorry, -C is not yet supported.\n");
301    }
302    if (ColorFlag!=neverColor) {
303        llvm::report_fatal_error("Sorry, -color is not yet supported.\n");
304    }
305    if (Mode == QuietMode) {
306        NoMessagesFlag = true;
307    }
308    if ((Mode == QuietMode) | (Mode == FilesWithMatch) | (Mode == FilesWithoutMatch)) {
309        MaxCountFlag = 1;
310    }
311}
312}
Note: See TracBrowser for help on using the repository browser.