source: icGREP/icgrep-devel/icgrep/grep_interface.cpp @ 5609

Last change on this file since 5609 was 5486, checked in by nmedfort, 2 years ago

Initial attempt to improve debugging capabilities with compilation stack traces on error.

File size: 18.6 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_interface.h>
8#include <llvm/Support/CommandLine.h>
9#include <llvm/Support/ErrorHandling.h>
10#include <llvm/Support/Signals.h>
11#include <llvm/Support/raw_ostream.h>
12#include <toolchain/toolchain.h>
13#include <re/re_toolchain.h>
14#include <pablo/pablo_toolchain.h>
15
16using namespace llvm;
17
18namespace grep {
19
20/*
21 *  A.  Regular expression syntax, interpretation and processing.
22 */
23
24static cl::OptionCategory RE_Options("A. Regular Expression Interpretation", "These options control regular expression parsing and interpretation");
25
26re::RE_Syntax RegexpSyntax;
27static cl::opt<re::RE_Syntax, true> RegexpSyntaxOption(cl::desc("Regular expression syntax: (default PCRE)"),
28    cl::values(
29        clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
30        clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
31        clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
32        clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax"),
33        clEnumValN(re::RE_Syntax::ERE, "extended-regexp", "Alias for -E"),
34        clEnumValN(re::RE_Syntax::FixedStrings, "fixed-strings", "Alias for -F"),
35        clEnumValN(re::RE_Syntax::BRE, "basic-regexp", "Alias for -G"),
36        clEnumValN(re::RE_Syntax::PCRE, "perl-regexp", "Alias for -P"),
37        clEnumValN(re::RE_Syntax::PROSITE, "PROSITE", "PROSITE protein patterns syntax"),
38        clEnumValEnd), cl::cat(RE_Options), cl::Grouping, cl::location(RegexpSyntax), cl::init(re::RE_Syntax::PCRE));
39
40bool IgnoreCaseFlag;
41static cl::opt<bool, true> IgnoreCaseOption("i", cl::location(IgnoreCaseFlag), cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(RE_Options), cl::Grouping);
42static cl::alias IgnoreCaseAlias("ignore-case", cl::desc("Alias for -i"), cl::aliasopt(IgnoreCaseOption));
43
44bool InvertMatchFlag;
45static cl::opt<bool, true> InvertMatchOption("v", cl::location(InvertMatchFlag), cl::desc("Invert match results: select non-matching lines."), cl::cat(RE_Options), cl::Grouping);
46static cl::alias InvertMatchAlias("invert-match", cl::desc("Alias for -v"), cl::aliasopt(InvertMatchOption));
47
48bool LineRegexpFlag;
49static cl::opt<bool, true> LineRegexpOption("x", cl::location(LineRegexpFlag), cl::desc("Require that entire lines be matched."), cl::cat(RE_Options), cl::Grouping);
50static cl::alias LineRegexpAlias("line-regexp", cl::desc("Alias for -x"), cl::aliasopt(LineRegexpOption));
51
52bool WordRegexpFlag;
53static cl::opt<bool, true> WordRegexpOption("w", cl::location(WordRegexpFlag), cl::desc("Require that that whole words be matched."), cl::cat(RE_Options), cl::Grouping);
54static cl::alias WordRegexpAlias("word-regexp", cl::desc("Alias for -w"), cl::aliasopt(WordRegexpOption));
55
56std::vector<std::string> RegexpVector;
57static cl::list<std::string, std::vector<std::string>> RegexpOption("e", cl::location(RegexpVector), cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(RE_Options), cl::Grouping);
58static cl::alias RegexpAlias("regexp", cl::desc("Alias for -e"), cl::aliasopt(RegexpOption));
59
60std::string FileFlag;
61static cl::opt<std::string, true> FileOption("f", cl::location(FileFlag), cl::desc("Take regular expressions (one per line) from a file."), cl::cat(RE_Options), cl::Grouping);
62static cl::alias FileAlias("file", cl::desc("Alias for -f"), cl::aliasopt(FileOption));
63   
64/*
65 *  B.  Grep input sources and interpretation.
66 */
67   
68static cl::OptionCategory Input_Options("B. Input Options", "These options control the input.");
69
70bool RecursiveFlag;
71static cl::opt<bool, true> RecursiveOption("r", cl::location(RecursiveFlag), cl::desc("Recursively process files within directories, (but follow only top-level symlinks unless -R)."), cl::cat(Input_Options), cl::Grouping);
72static cl::alias RecursiveAlias("recursive", cl::desc("Alias for -r"), cl::aliasopt(RecursiveOption));
73
74bool DereferenceRecursiveFlag;
75static cl::opt<bool, true> DereferenceRecursiveOption("R", cl::location(DereferenceRecursiveFlag), cl::desc("Recursively process files within directories, following symlinks at all levels."), cl::cat(Input_Options), cl::Grouping);
76static cl::alias DereferenceRecursiveAlias("dereference-recursive", cl::desc("Alias for -R"), cl::aliasopt(DereferenceRecursiveOption));
77
78bool TextFlag;
79static cl::opt<bool, true> TextOption("a", cl::location(TextFlag), cl::desc("Treat each input file as text, even if it is a binary file."), cl::cat(Input_Options), cl::Grouping);
80static cl::alias TextAlias("text", cl::desc("Alias for -a"), cl::aliasopt(TextOption));
81
82bool BinaryNonMatchingFlag;
83static cl::opt<bool, true> BinaryNonMatchingOption("I", cl::location(BinaryNonMatchingFlag), cl::desc("Treat binary files as non-matching."), cl::cat(Input_Options), cl::Grouping);
84static cl::alias BinaryNonMatchingAlias("binary-non-matching", cl::desc("Alias for -I"), cl::aliasopt(BinaryNonMatchingOption));
85
86bool BinaryFlag;
87static cl::opt<bool, true> BinaryOption("U", cl::location(BinaryFlag), cl::desc("Treat each input file as a binary file, without CRLF normalization."), cl::cat(Input_Options), cl::Grouping);
88static cl::alias BinaryAlias("binary", cl::desc("Alias for -U"), cl::aliasopt(BinaryOption));
89
90bool NullDataFlag;
91static cl::opt<bool, true> NullDataOption("z", cl::location(NullDataFlag), cl::desc("Use the NUL character (codepoint 00) as the line-break character for input."), cl::cat(Input_Options), cl::Grouping);
92static cl::alias NullDataAlias("null-data", cl::desc("Alias for -z"), cl::aliasopt(NullDataOption));
93
94bool MmapFlag;
95static cl::opt<bool, true> MmapOption("mmap", cl::location(MmapFlag), cl::desc("Use mmap for file input."), cl::cat(Input_Options));
96
97std::string ExcludeFlag;
98static cl::opt<std::string, true> ExcludeOption("exclude", cl::location(ExcludeFlag), cl::desc("Exclude files matching the given filename GLOB pattern."), cl::cat(Input_Options));
99
100std::string ExcludeFromFlag;
101static cl::opt<std::string, true> ExcludeFromOption("exclude-from", cl::location(ExcludeFromFlag), cl::desc("Exclude files matching filename GLOB patterns from the given file."), cl::cat(Input_Options));
102
103std::string ExcludeDirFlag;
104static cl::opt<std::string, true> ExcludeDirOption("exclude-dir", cl::location(ExcludeDirFlag), cl::desc("Exclude directories matching the given pattern."), cl::cat(Input_Options));
105
106std::string IncludeFlag;
107static cl::opt<std::string, true> IncludeOption("include", cl::location(IncludeFlag), cl::desc("Include only files matching the given filename GLOB pattern."), cl::cat(Input_Options));
108
109DevDirAction DevicesFlag;
110static cl::opt<DevDirAction, true> DevicesOption("D", cl::desc("Processing mode for devices:"),
111                                                 cl::values(clEnumValN(Read, "read", "Treat devices as files to be searched."),
112                                                            clEnumValN(Skip, "skip", "Silently skip devices."),
113                                                            clEnumValEnd), cl::cat(Input_Options), cl::location(DevicesFlag), cl::init(Read));
114static cl::alias DevicesAlias("devices", cl::desc("Alias for -D"), cl::aliasopt(DevicesOption));
115
116DevDirAction DirectoriesFlag;
117static cl::opt<DevDirAction, true> DirectoriesOption("d", cl::desc("Processing mode for directories:"),
118                                                     cl::values(clEnumValN(Read, "read", "Print an error message for any listed directories."),
119                                                                clEnumValN(Skip, "skip", "Silently skip directories."),
120                                                                clEnumValN(Recurse, "recurse", "Recursive process directories, equivalent to -r."),
121                                                                clEnumValEnd), cl::cat(Input_Options), cl::location(DirectoriesFlag), cl::init(Read));
122static cl::alias DirectoriesAlias("directories", cl::desc("Alias for -d"), cl::aliasopt(DirectoriesOption));
123
124BinaryFilesMode BinaryFilesFlag;
125static cl::opt<BinaryFilesMode, true> BinaryFilesOption("binary-files", cl::desc("Processing mode for binary files:"),
126                                                     cl::values(clEnumValN(Binary, "binary", "Report match/non-match without printing matches."),
127                                                                clEnumValN(WithoutMatch, "without-match", "Always report as non-matching."),
128                                                                clEnumValN(Text, "text", "Treat binary files as text."),
129                                                                clEnumValEnd), cl::cat(Input_Options), cl::location(BinaryFilesFlag), cl::init(Binary));
130   
131/*
132 *  C.  Grep output modes and options.
133 */
134   
135   
136static cl::OptionCategory Output_Options("C. Output Options",
137                                            "These options control the output.");
138   
139GrepModeType Mode;
140static cl::opt<GrepModeType, true> GrepModeOption(cl::desc("Abbreviated output mode options:"),
141    cl::values(
142        clEnumValN(CountOnly, "c", "Display only the count of matching lines per file."),
143        clEnumValN(FilesWithMatch, "l", "Display only the names of files that have at least one match to the pattern."),
144        clEnumValN(FilesWithoutMatch, "L", "Display only the names of files that do not match the pattern."),
145        clEnumValN(QuietMode, "q", "Do not generate any output and ignore errors; set the return to zero status if a match is found."),
146        clEnumValN(CountOnly, "count", "Alias for -c"),
147        clEnumValN(FilesWithMatch, "files-with-match", "Alias for -l"),
148        clEnumValN(FilesWithoutMatch, "files-without-match", "Alias for -L"),
149        clEnumValN(QuietMode, "quiet", "Alias for -q"),
150        clEnumValN(QuietMode, "silent", "Alias for -q"),
151        clEnumValEnd), cl::cat(Output_Options), cl::Grouping, cl::location(Mode), cl::init(NormalMode));
152
153bool NoMessagesFlag;
154static cl::opt<bool, true> NoMessagesOption("s", cl::location(NoMessagesFlag), cl::desc("Suppress messages for file errors."), cl::cat(Output_Options), cl::Grouping);
155static cl::alias NoMessagesAlias("no-messages", cl::desc("Alias for -s"), cl::aliasopt(NoMessagesOption));
156
157bool WithFilenameFlag;
158static cl::opt<bool, true> WithFilenameOption("H", cl::location(WithFilenameFlag), cl::desc("Show the file name with each matching line."), cl::cat(Output_Options), cl::Grouping);
159static cl::alias WithFilenameAlias("with-filename", cl::desc("Alias for -H"), cl::aliasopt(WithFilenameOption));
160
161bool NoFilenameFlag;
162static cl::opt<bool, true> NoFilenameOption("h", cl::location(NoFilenameFlag), cl::desc("Do not show filenames with maches."), cl::cat(Output_Options), cl::Grouping);
163static cl::alias NoFilenameAlias("no-filename", cl::desc("Alias for -h"), cl::aliasopt(NoFilenameOption));
164
165bool NullFlag;
166static cl::opt<bool, true> NullOption("Z", cl::location(NullFlag), cl::desc("Write NUL characters after filenames generated to output."), cl::cat(Output_Options), cl::Grouping);
167static cl::alias NullAlias("null", cl::desc("Alias for -Z"), cl::aliasopt(NullOption));
168
169bool LineNumberFlag;
170static cl::opt<bool, true> LineNumberOption("n", cl::location(LineNumberFlag), cl::desc("Show the line number with each matching line."), cl::cat(Output_Options), cl::Grouping);
171static cl::alias LineNumberAlias("line-number", cl::desc("Alias for -n"), cl::aliasopt(LineNumberOption));
172
173bool ByteOffsetFlag;
174static cl::opt<bool, true> ByteOffsetOption("b", cl::location(ByteOffsetFlag), cl::desc("Show the byte offset within the file for each matching line."), cl::cat(Output_Options), cl::Grouping);
175static cl::alias ByteOffsetAlias("byte-offset", cl::desc("Alias for -b"), cl::aliasopt(ByteOffsetOption));
176
177bool UnixByteOffsetsFlag;
178static cl::opt<bool, true> UnixByteOffsetsOption("u", cl::location(UnixByteOffsetsFlag), cl::desc("If byte offsets are displayed, report offsets as if all lines are terminated with a single LF."), cl::cat(Output_Options), cl::Grouping);
179static cl::alias UnixByteOffsetsAlias("unix-byte-offsets", cl::desc("Alias for -u"), cl::aliasopt(UnixByteOffsetsOption));
180
181bool InitialTabFlag;
182static cl::opt<bool, true> InitialTabOption("T", cl::location(InitialTabFlag), cl::desc("Line up matched line content using an inital tab character."), cl::cat(Output_Options), cl::Grouping);
183static cl::alias InitialTabAlias("initial-tab", cl::desc("Alias for -T"), cl::aliasopt(InitialTabOption));
184
185bool OnlyMatchingFlag;
186static cl::opt<bool, true> OnlyMatchingOption("o", cl::location(OnlyMatchingFlag), cl::desc("Display only the exact strings that match the pattern, with possibly multiple matches per line."), cl::cat(Output_Options), cl::Grouping);
187static cl::alias OnlyMatchingAlias("only-matching", cl::desc("Alias for -o"), cl::aliasopt(OnlyMatchingOption));
188
189std::string LabelFlag;
190    static cl::opt<std::string, true> LabelOption("label", cl::location(LabelFlag), cl::init("(standard input)"),
191                                              cl::desc("Set a label for input lines matched from stdin."), cl::cat(Output_Options));
192
193bool LineBufferedFlag;
194static cl::opt<bool, true> LineBufferedOption("line-buffered", cl::location(LineBufferedFlag), cl::desc("Buffer lines to output."), cl::cat(Output_Options));
195
196bool NormalizeLineBreaksFlag;
197static cl::opt<bool, true> NormalizeLineBreaksOption("normalize-line-breaks", cl::location(NormalizeLineBreaksFlag), cl::desc("Normalize line breaks to LF."), cl::cat(Output_Options));
198
199int AfterContextFlag;
200static cl::opt<int, true> AfterContextOption("A", cl::location(AfterContextFlag), cl::desc("Print <num> lines of context after each matching line."), cl::cat(Output_Options), cl::Grouping);
201static cl::alias AfterContextAlias("after-context", cl::desc("Alias for -A"), cl::aliasopt(AfterContextOption));
202
203int BeforeContextFlag;
204static cl::opt<int, true> BeforeContextOption("B", cl::location(BeforeContextFlag), cl::desc("Print <num>lines of context before each matching line."), cl::cat(Output_Options), cl::Grouping);
205static cl::alias BeforeContextAlias("before-context", cl::desc("Alias for -B"), cl::aliasopt(BeforeContextOption));
206
207int ContextFlag;
208static cl::opt<int, true> ContextOption("C", cl::location(ContextFlag), cl::desc("Print <num> lines of context before and after each matching line."), cl::cat(Output_Options), cl::Grouping);
209static cl::alias ContextAlias("context", cl::desc("Alias for -C"), cl::aliasopt(ContextOption));
210
211int MaxCountFlag;
212static cl::opt<int, true> MaxCountOption("m", cl::location(MaxCountFlag), cl::desc("Process only the first <num> matches per file."), cl::cat(Output_Options), cl::Grouping);
213static cl::alias MaxCountAlias("max-count", cl::desc("Alias for -m"), cl::aliasopt(MaxCountOption));
214   
215ColoringType ColorFlag;
216static cl::opt<ColoringType, true> Color("color", cl::desc("Set colorization of the output"), cl::location(ColorFlag), cl::cat(Output_Options), cl::init(neverColor),
217                                 cl::values(clEnumValN(alwaysColor, "always", "Enable colorization"),
218                                            clEnumValN(autoColor,   "auto", "Colorize output to stdout"),
219                                            clEnumValN(neverColor,  "never", "Disable colorization"),
220                                            clEnumValEnd));
221static cl::alias ColorAlias("colour", cl::desc("Alias for -color"), cl::aliasopt(Color));
222//
223// Handler for errors reported through llvm::report_fatal_error.  Report
224// and signal error the InternalFailure exit code.
225//
226static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
227#ifndef NDEBUG
228        throw std::runtime_error(Message);
229#else
230        // Modified from LLVM's internal report_fatal_error logic.
231        SmallVector<char, 64> Buffer;
232        raw_svector_ostream OS(Buffer);
233        OS << "icgrep ERROR: " << Message << "\n";
234        StringRef MessageStr = OS.str();
235        ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
236        (void)written; // If something went wrong, we deliberately just give up.
237        // Run the interrupt handlers to make sure any special cleanups get done, in
238        // particular that we remove files registered with RemoveFileOnSignal.
239        llvm::sys::RunInterruptHandlers();
240        exit(InternalFailureCode);
241#endif
242}
243   
244
245void InitializeCommandLineInterface(int argc, char *argv[]) {
246    llvm::install_fatal_error_handler(&icgrep_error_handler);
247    codegen::ParseCommandLineOptions(argc, argv, {&RE_Options, &Input_Options, &Output_Options, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
248    if (RecursiveFlag || DereferenceRecursiveFlag) {
249        DirectoriesFlag = Recurse;
250    }
251   
252    if (RegexpSyntax == re::RE_Syntax::FixedStrings) {
253        llvm::report_fatal_error("Sorry, FixedStrings syntax is not fully supported.\n");
254    }
255    if (TextFlag) {
256        if (BinaryNonMatchingFlag || (BinaryFilesFlag == WithoutMatch)) {
257            llvm::report_fatal_error("Conflicting options for binary files.\n");
258        }
259        BinaryFilesFlag = Text;
260    }
261    if (BinaryNonMatchingFlag) {
262        if (BinaryFilesFlag == Text) {
263            llvm::report_fatal_error("Conflicting options for binary files.\n");
264        }
265        BinaryFilesFlag = WithoutMatch;
266    }
267    if (BinaryFlag) {
268        llvm::report_fatal_error("Sorry, -U is not yet supported.\n");
269    }
270    if (NullDataFlag) {
271        llvm::report_fatal_error("Sorry, -z is not yet supported.\n");
272    }
273    if (ExcludeFlag!="") {
274        llvm::report_fatal_error("Sorry, -exclude is not yet supported.\n");
275    }
276    if (ExcludeFromFlag!="") {
277        llvm::report_fatal_error("Sorry, -exclude-from is not yet supported.\n");
278    }
279    if (ExcludeDirFlag!="") {
280        llvm::report_fatal_error("Sorry, -exclude-dir is not yet supported.\n");
281    }
282    if (IncludeFlag!="") {
283        llvm::report_fatal_error("Sorry, -include is not yet supported.\n");
284    }   
285    if (ByteOffsetFlag) {
286        llvm::report_fatal_error("Sorry, -b is not yet supported.\n");
287    }
288    if (UnixByteOffsetsFlag) {
289        llvm::report_fatal_error("Sorry, -u is not yet supported.\n");
290    }
291    if (OnlyMatchingFlag) {
292        llvm::report_fatal_error("Sorry, -o is not yet supported.\n");
293    }
294    if (LineBufferedFlag) {
295        llvm::report_fatal_error("Sorry, -line-buffered is not yet supported.\n");
296    }
297    if (AfterContextFlag) {
298        llvm::report_fatal_error("Sorry, -A is not yet supported.\n");
299    }
300    if (BeforeContextFlag) {
301        llvm::report_fatal_error("Sorry, -B is not yet supported.\n");
302    }
303    if (ContextFlag) {
304        llvm::report_fatal_error("Sorry, -C is not yet supported.\n");
305    }
306    if (ColorFlag!=neverColor) {
307        llvm::report_fatal_error("Sorry, -color is not yet supported.\n");
308    }
309}
310}
Note: See TracBrowser for help on using the repository browser.