source: icGREP/icgrep-devel/icgrep/grep_interface.cpp @ 5923

Last change on this file since 5923 was 5900, checked in by cameron, 16 months ago

Unicode-lines option; set default linebreak to LF

File size: 18.8 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_interface.h>
8#include <llvm/Support/CommandLine.h>
9#include <llvm/Support/ErrorHandling.h>
10#include <llvm/Support/Signals.h>
11#include <llvm/Support/raw_ostream.h>
12#include <toolchain/toolchain.h>
13#include <re/re_toolchain.h>
14#include <pablo/pablo_toolchain.h>
15
16using namespace llvm;
17
18namespace grep {
19
20/*
21 *  A.  Regular expression syntax, interpretation and processing.
22 */
23
24static cl::OptionCategory RE_Options("A. Regular Expression Interpretation", "These options control regular expression parsing and interpretation");
25
26re::RE_Syntax RegexpSyntax;
27static cl::opt<re::RE_Syntax, true> RegexpSyntaxOption(cl::desc("Regular expression syntax: (default PCRE)"),
28    cl::values(
29        clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
30        clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
31        clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
32        clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax"),
33        clEnumValN(re::RE_Syntax::ERE, "extended-regexp", "Alias for -E"),
34        clEnumValN(re::RE_Syntax::FixedStrings, "fixed-strings", "Alias for -F"),
35        clEnumValN(re::RE_Syntax::BRE, "basic-regexp", "Alias for -G"),
36        clEnumValN(re::RE_Syntax::PCRE, "perl-regexp", "Alias for -P"),
37        clEnumValN(re::RE_Syntax::PROSITE, "PROSITE", "PROSITE protein patterns syntax")
38        CL_ENUM_VAL_SENTINEL), cl::cat(RE_Options), cl::Grouping, cl::location(RegexpSyntax), cl::init(re::RE_Syntax::PCRE));
39
40bool IgnoreCaseFlag;
41static cl::opt<bool, true> IgnoreCaseOption("i", cl::location(IgnoreCaseFlag), cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(RE_Options), cl::Grouping);
42static cl::alias IgnoreCaseAlias("ignore-case", cl::desc("Alias for -i"), cl::aliasopt(IgnoreCaseOption));
43
44bool InvertMatchFlag;
45static cl::opt<bool, true> InvertMatchOption("v", cl::location(InvertMatchFlag), cl::desc("Invert match results: select non-matching lines."), cl::cat(RE_Options), cl::Grouping);
46static cl::alias InvertMatchAlias("invert-match", cl::desc("Alias for -v"), cl::aliasopt(InvertMatchOption));
47
48bool LineRegexpFlag;
49static cl::opt<bool, true> LineRegexpOption("x", cl::location(LineRegexpFlag), cl::desc("Require that entire lines be matched."), cl::cat(RE_Options), cl::Grouping);
50static cl::alias LineRegexpAlias("line-regexp", cl::desc("Alias for -x"), cl::aliasopt(LineRegexpOption));
51
52bool WordRegexpFlag;
53static cl::opt<bool, true> WordRegexpOption("w", cl::location(WordRegexpFlag), cl::desc("Require that that whole words be matched."), cl::cat(RE_Options), cl::Grouping);
54static cl::alias WordRegexpAlias("word-regexp", cl::desc("Alias for -w"), cl::aliasopt(WordRegexpOption));
55
56std::vector<std::string> RegexpVector;
57static cl::list<std::string, std::vector<std::string>> RegexpOption("e", cl::location(RegexpVector), cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(RE_Options), cl::Grouping);
58static cl::alias RegexpAlias("regexp", cl::desc("Alias for -e"), cl::aliasopt(RegexpOption));
59
60std::string FileFlag;
61static cl::opt<std::string, true> FileOption("f", cl::location(FileFlag), cl::desc("Take regular expressions (one per line) from a file."), cl::cat(RE_Options), cl::Grouping);
62static cl::alias FileAlias("file", cl::desc("Alias for -f"), cl::aliasopt(FileOption));
63   
64/*
65 *  B.  Grep input sources and interpretation.
66 */
67   
68static cl::OptionCategory Input_Options("B. Input Options", "These options control the input.");
69
70bool RecursiveFlag;
71static cl::opt<bool, true> RecursiveOption("r", cl::location(RecursiveFlag), cl::desc("Recursively process files within directories, (but follow only top-level symlinks unless -R)."), cl::cat(Input_Options), cl::Grouping);
72static cl::alias RecursiveAlias("recursive", cl::desc("Alias for -r"), cl::aliasopt(RecursiveOption));
73
74bool DereferenceRecursiveFlag;
75static cl::opt<bool, true> DereferenceRecursiveOption("R", cl::location(DereferenceRecursiveFlag), cl::desc("Recursively process files within directories, following symlinks at all levels."), cl::cat(Input_Options), cl::Grouping);
76static cl::alias DereferenceRecursiveAlias("dereference-recursive", cl::desc("Alias for -R"), cl::aliasopt(DereferenceRecursiveOption));
77
78bool TextFlag;
79static cl::opt<bool, true> TextOption("a", cl::location(TextFlag), cl::desc("Treat each input file as text, even if it is a binary file."), cl::cat(Input_Options), cl::Grouping);
80static cl::alias TextAlias("text", cl::desc("Alias for -a"), cl::aliasopt(TextOption));
81
82bool BinaryNonMatchingFlag;
83static cl::opt<bool, true> BinaryNonMatchingOption("I", cl::location(BinaryNonMatchingFlag), cl::desc("Treat binary files as non-matching."), cl::cat(Input_Options), cl::Grouping);
84static cl::alias BinaryNonMatchingAlias("binary-non-matching", cl::desc("Alias for -I"), cl::aliasopt(BinaryNonMatchingOption));
85
86bool BinaryFlag;
87static cl::opt<bool, true> BinaryOption("U", cl::location(BinaryFlag), cl::desc("Treat each input file as a binary file, without CRLF normalization."), cl::cat(Input_Options), cl::Grouping);
88static cl::alias BinaryAlias("binary", cl::desc("Alias for -U"), cl::aliasopt(BinaryOption));
89
90bool NullDataFlag;
91static cl::opt<bool, true> NullDataOption("z", cl::location(NullDataFlag), cl::desc("Use the NUL character (codepoint 00) as the line-break character for input."), cl::cat(Input_Options), cl::Grouping);
92static cl::alias NullDataAlias("null-data", cl::desc("Alias for -z"), cl::aliasopt(NullDataOption));
93
94bool UnicodeLinesFlag;
95static cl::opt<bool, true> UnicodeLinesOption("Unicode-lines", cl::location(UnicodeLinesFlag), cl::desc("Enable Unicode line breaks (LF/VT/FF/CR/NEL/LS/PS/CRLF)"), cl::cat(Input_Options));
96
97bool MmapFlag;
98static cl::opt<bool, true> MmapOption("mmap", cl::location(MmapFlag), cl::desc("Use mmap for file input."), cl::cat(Input_Options));
99
100std::string ExcludeFlag;
101static cl::opt<std::string, true> ExcludeOption("exclude", cl::location(ExcludeFlag), cl::desc("Exclude files matching the given filename GLOB pattern."), cl::cat(Input_Options));
102
103std::string ExcludeFromFlag;
104static cl::opt<std::string, true> ExcludeFromOption("exclude-from", cl::location(ExcludeFromFlag), cl::desc("Exclude files matching filename GLOB patterns from the given file."), cl::cat(Input_Options));
105
106std::string ExcludeDirFlag;
107static cl::opt<std::string, true> ExcludeDirOption("exclude-dir", cl::location(ExcludeDirFlag), cl::desc("Exclude directories matching the given pattern."), cl::cat(Input_Options));
108
109std::string IncludeFlag;
110static cl::opt<std::string, true> IncludeOption("include", cl::location(IncludeFlag), cl::desc("Include only files matching the given filename GLOB pattern."), cl::cat(Input_Options));
111
112DevDirAction DevicesFlag;
113static cl::opt<DevDirAction, true> DevicesOption("D", cl::desc("Processing mode for devices:"),
114                                                 cl::values(clEnumValN(Read, "read", "Treat devices as files to be searched."),
115                                                            clEnumValN(Skip, "skip", "Silently skip devices.")
116                                                            CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(DevicesFlag), cl::init(Read));
117static cl::alias DevicesAlias("devices", cl::desc("Alias for -D"), cl::aliasopt(DevicesOption));
118
119DevDirAction DirectoriesFlag;
120static cl::opt<DevDirAction, true> DirectoriesOption("d", cl::desc("Processing mode for directories:"),
121                                                     cl::values(clEnumValN(Read, "read", "Print an error message for any listed directories."),
122                                                                clEnumValN(Skip, "skip", "Silently skip directories."),
123                                                                clEnumValN(Recurse, "recurse", "Recursive process directories, equivalent to -r.")
124                                                                CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(DirectoriesFlag), cl::init(Read));
125static cl::alias DirectoriesAlias("directories", cl::desc("Alias for -d"), cl::aliasopt(DirectoriesOption));
126
127BinaryFilesMode BinaryFilesFlag;
128static cl::opt<BinaryFilesMode, true> BinaryFilesOption("binary-files", cl::desc("Processing mode for binary files:"),
129                                                     cl::values(clEnumValN(Binary, "binary", "Report match/non-match without printing matches."),
130                                                                clEnumValN(WithoutMatch, "without-match", "Always report as non-matching."),
131                                                                clEnumValN(Text, "text", "Treat binary files as text.")
132                                                                CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(BinaryFilesFlag), cl::init(Binary));
133   
134/*
135 *  C.  Grep output modes and options.
136 */
137   
138   
139static cl::OptionCategory Output_Options("C. Output Options",
140                                            "These options control the output.");
141   
142GrepModeType Mode;
143static cl::opt<GrepModeType, true> GrepModeOption(cl::desc("Abbreviated output mode options:"),
144    cl::values(
145        clEnumValN(CountOnly, "c", "Display only the count of matching lines per file."),
146        clEnumValN(FilesWithMatch, "l", "Display only the names of files that have at least one match to the pattern."),
147        clEnumValN(FilesWithoutMatch, "L", "Display only the names of files that do not match the pattern."),
148        clEnumValN(QuietMode, "q", "Do not generate any output and ignore errors; set the return to zero status if a match is found."),
149        clEnumValN(CountOnly, "count", "Alias for -c"),
150        clEnumValN(FilesWithMatch, "files-with-match", "Alias for -l"),
151        clEnumValN(FilesWithoutMatch, "files-without-match", "Alias for -L"),
152        clEnumValN(QuietMode, "quiet", "Alias for -q"),
153        clEnumValN(QuietMode, "silent", "Alias for -q")
154        CL_ENUM_VAL_SENTINEL), cl::cat(Output_Options), cl::Grouping, cl::location(Mode), cl::init(NormalMode));
155
156bool NoMessagesFlag;
157static cl::opt<bool, true> NoMessagesOption("s", cl::location(NoMessagesFlag), cl::desc("Suppress messages for file errors."), cl::cat(Output_Options), cl::Grouping);
158static cl::alias NoMessagesAlias("no-messages", cl::desc("Alias for -s"), cl::aliasopt(NoMessagesOption));
159
160bool WithFilenameFlag;
161static cl::opt<bool, true> WithFilenameOption("H", cl::location(WithFilenameFlag), cl::desc("Show the file name with each matching line."), cl::cat(Output_Options), cl::Grouping);
162static cl::alias WithFilenameAlias("with-filename", cl::desc("Alias for -H"), cl::aliasopt(WithFilenameOption));
163
164bool NoFilenameFlag;
165static cl::opt<bool, true> NoFilenameOption("h", cl::location(NoFilenameFlag), cl::desc("Do not show filenames with maches."), cl::cat(Output_Options), cl::Grouping);
166static cl::alias NoFilenameAlias("no-filename", cl::desc("Alias for -h"), cl::aliasopt(NoFilenameOption));
167
168bool NullFlag;
169static cl::opt<bool, true> NullOption("Z", cl::location(NullFlag), cl::desc("Write NUL characters after filenames generated to output."), cl::cat(Output_Options), cl::Grouping);
170static cl::alias NullAlias("null", cl::desc("Alias for -Z"), cl::aliasopt(NullOption));
171
172bool LineNumberFlag;
173static cl::opt<bool, true> LineNumberOption("n", cl::location(LineNumberFlag), cl::desc("Show the line number with each matching line."), cl::cat(Output_Options), cl::Grouping);
174static cl::alias LineNumberAlias("line-number", cl::desc("Alias for -n"), cl::aliasopt(LineNumberOption));
175
176bool ByteOffsetFlag;
177static cl::opt<bool, true> ByteOffsetOption("b", cl::location(ByteOffsetFlag), cl::desc("Show the byte offset within the file for each matching line."), cl::cat(Output_Options), cl::Grouping);
178static cl::alias ByteOffsetAlias("byte-offset", cl::desc("Alias for -b"), cl::aliasopt(ByteOffsetOption));
179
180bool UnixByteOffsetsFlag;
181static cl::opt<bool, true> UnixByteOffsetsOption("u", cl::location(UnixByteOffsetsFlag), cl::desc("If byte offsets are displayed, report offsets as if all lines are terminated with a single LF."), cl::cat(Output_Options), cl::Grouping);
182static cl::alias UnixByteOffsetsAlias("unix-byte-offsets", cl::desc("Alias for -u"), cl::aliasopt(UnixByteOffsetsOption));
183
184bool InitialTabFlag;
185static cl::opt<bool, true> InitialTabOption("T", cl::location(InitialTabFlag), cl::desc("Line up matched line content using an inital tab character."), cl::cat(Output_Options), cl::Grouping);
186static cl::alias InitialTabAlias("initial-tab", cl::desc("Alias for -T"), cl::aliasopt(InitialTabOption));
187
188bool OnlyMatchingFlag;
189static cl::opt<bool, true> OnlyMatchingOption("o", cl::location(OnlyMatchingFlag), cl::desc("Display only the exact strings that match the pattern, with possibly multiple matches per line."), cl::cat(Output_Options), cl::Grouping);
190static cl::alias OnlyMatchingAlias("only-matching", cl::desc("Alias for -o"), cl::aliasopt(OnlyMatchingOption));
191
192std::string LabelFlag;
193    static cl::opt<std::string, true> LabelOption("label", cl::location(LabelFlag), cl::init("(standard input)"),
194                                              cl::desc("Set a label for input lines matched from stdin."), cl::cat(Output_Options));
195
196bool LineBufferedFlag;
197static cl::opt<bool, true> LineBufferedOption("line-buffered", cl::location(LineBufferedFlag), cl::desc("Buffer lines to output."), cl::cat(Output_Options));
198
199int AfterContextFlag;
200static cl::opt<int, true> AfterContextOption("A", cl::location(AfterContextFlag), cl::desc("Print <num> lines of context after each matching line."), cl::cat(Output_Options), cl::Grouping);
201static cl::alias AfterContextAlias("after-context", cl::desc("Alias for -A"), cl::aliasopt(AfterContextOption));
202
203int BeforeContextFlag;
204static cl::opt<int, true> BeforeContextOption("B", cl::location(BeforeContextFlag), cl::desc("Print <num>lines of context before each matching line."), cl::cat(Output_Options), cl::Grouping);
205static cl::alias BeforeContextAlias("before-context", cl::desc("Alias for -B"), cl::aliasopt(BeforeContextOption));
206
207int ContextFlag;
208static cl::opt<int, true> ContextOption("C", cl::location(ContextFlag), cl::desc("Print <num> lines of context before and after each matching line."), cl::cat(Output_Options), cl::Grouping);
209static cl::alias ContextAlias("context", cl::desc("Alias for -C"), cl::aliasopt(ContextOption));
210
211int MaxCountFlag;
212static cl::opt<int, true> MaxCountOption("m", cl::location(MaxCountFlag),
213                                         cl::desc("Process only the first <num> matches per file3."),
214                                         cl::cat(Output_Options), cl::Grouping);
215static cl::alias MaxCountAlias("max-count", cl::desc("Alias for -m"), cl::aliasopt(MaxCountOption));
216   
217ColoringType ColorFlag;
218static cl::opt<ColoringType, true> Color("color", cl::desc("Set colorization of the output"), cl::location(ColorFlag), cl::cat(Output_Options), cl::init(neverColor),
219                                 cl::values(clEnumValN(alwaysColor, "always", "Enable colorization"),
220                                            clEnumValN(autoColor,   "auto", "Colorize output to stdout"),
221                                            clEnumValN(neverColor,  "never", "Disable colorization")
222                                            CL_ENUM_VAL_SENTINEL));
223static cl::alias ColorAlias("colour", cl::desc("Alias for -color"), cl::aliasopt(Color));
224//
225// Handler for errors reported through llvm::report_fatal_error.  Report
226// and signal error the InternalFailure exit code.
227//
228static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
229#ifndef NDEBUG
230        throw std::runtime_error(Message);
231#else
232        // Modified from LLVM's internal report_fatal_error logic.
233        SmallVector<char, 64> Buffer;
234        raw_svector_ostream OS(Buffer);
235        OS << "icgrep ERROR: " << Message << "\n";
236        StringRef MessageStr = OS.str();
237        ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
238        (void)written; // If something went wrong, we deliberately just give up.
239        // Run the interrupt handlers to make sure any special cleanups get done, in
240        // particular that we remove files registered with RemoveFileOnSignal.
241        llvm::sys::RunInterruptHandlers();
242        exit(InternalFailureCode);
243#endif
244}
245   
246
247void InitializeCommandLineInterface(int argc, char *argv[]) {
248    llvm::install_fatal_error_handler(&icgrep_error_handler);
249    codegen::ParseCommandLineOptions(argc, argv, {&RE_Options, &Input_Options, &Output_Options, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
250    if (RecursiveFlag || DereferenceRecursiveFlag) {
251        DirectoriesFlag = Recurse;
252    }
253   
254    if (TextFlag) {
255        if (BinaryNonMatchingFlag || (BinaryFilesFlag == WithoutMatch)) {
256            llvm::report_fatal_error("Conflicting options for binary files.\n");
257        }
258        BinaryFilesFlag = Text;
259    }
260    if (BinaryNonMatchingFlag) {
261        if (BinaryFilesFlag == Text) {
262            llvm::report_fatal_error("Conflicting options for binary files.\n");
263        }
264        BinaryFilesFlag = WithoutMatch;
265    }
266    if (BinaryFlag) {
267        llvm::report_fatal_error("Sorry, -U is not yet supported.\n");
268    }
269    if (NullDataFlag) {
270        llvm::report_fatal_error("Sorry, -z is not yet supported.\n");
271    }
272    if (ExcludeFlag!="") {
273        llvm::report_fatal_error("Sorry, -exclude is not yet supported.\n");
274    }
275    if (ExcludeFromFlag!="") {
276        llvm::report_fatal_error("Sorry, -exclude-from is not yet supported.\n");
277    }
278    if (ExcludeDirFlag!="") {
279        llvm::report_fatal_error("Sorry, -exclude-dir is not yet supported.\n");
280    }
281    if (IncludeFlag!="") {
282        llvm::report_fatal_error("Sorry, -include is not yet supported.\n");
283    }   
284    if (ByteOffsetFlag) {
285        llvm::report_fatal_error("Sorry, -b is not yet supported.\n");
286    }
287    if (UnixByteOffsetsFlag) {
288        llvm::report_fatal_error("Sorry, -u is not yet supported.\n");
289    }
290    if (OnlyMatchingFlag) {
291        llvm::report_fatal_error("Sorry, -o is not yet supported.\n");
292    }
293    if (LineBufferedFlag) {
294        llvm::report_fatal_error("Sorry, -line-buffered is not yet supported.\n");
295    }
296    if (AfterContextFlag) {
297        llvm::report_fatal_error("Sorry, -A is not yet supported.\n");
298    }
299    if (BeforeContextFlag) {
300        llvm::report_fatal_error("Sorry, -B is not yet supported.\n");
301    }
302    if (ContextFlag) {
303        llvm::report_fatal_error("Sorry, -C is not yet supported.\n");
304    }
305    if (ColorFlag!=neverColor) {
306        llvm::report_fatal_error("Sorry, -color is not yet supported.\n");
307    }
308    if (Mode == QuietMode) {
309        NoMessagesFlag = true;
310    }
311    if ((Mode == QuietMode) | (Mode == FilesWithMatch) | (Mode == FilesWithoutMatch)) {
312        MaxCountFlag = 1;
313    }
314}
315}
Note: See TracBrowser for help on using the repository browser.