source: icGREP/icgrep-devel/icgrep/grep_interface.cpp @ 5940

Last change on this file since 5940 was 5940, checked in by cameron, 12 months ago

icgrep -GLOB option for GLOB syntax, utilities for file include/exclude command arguments

File size: 20.4 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_interface.h>
8#include <llvm/Support/CommandLine.h>
9#include <llvm/Support/ErrorHandling.h>
10#include <llvm/Support/Signals.h>
11#include <llvm/Support/raw_ostream.h>
12#include <toolchain/toolchain.h>
13#include <re/parsers/parser.h>
14#include <re/re_alt.h>
15#include <re/re_toolchain.h>
16#include <fstream>
17#include <string>
18
19#include <pablo/pablo_toolchain.h>
20
21using namespace llvm;
22
23namespace grep {
24
25/*
26 *  A.  Regular expression syntax, interpretation and processing.
27 */
28
29static cl::OptionCategory RE_Options("A. Regular Expression Interpretation", "These options control regular expression parsing and interpretation");
30
31re::RE_Syntax RegexpSyntax;
32static cl::opt<re::RE_Syntax, true> RegexpSyntaxOption(cl::desc("Regular expression syntax: (default PCRE)"),
33    cl::values(
34        clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
35        clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
36        clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
37        clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax"),
38        clEnumValN(re::RE_Syntax::ERE, "extended-regexp", "Alias for -E"),
39        clEnumValN(re::RE_Syntax::FixedStrings, "fixed-strings", "Alias for -F"),
40        clEnumValN(re::RE_Syntax::BRE, "basic-regexp", "Alias for -G"),
41        clEnumValN(re::RE_Syntax::PCRE, "perl-regexp", "Alias for -P"),
42        clEnumValN(re::RE_Syntax::FileGLOB, "GLOB", "Posix GLOB syntax for file name patterns"),
43        clEnumValN(re::RE_Syntax::PROSITE, "PROSITE", "PROSITE protein patterns syntax")
44        CL_ENUM_VAL_SENTINEL), cl::cat(RE_Options), cl::Grouping, cl::location(RegexpSyntax), cl::init(re::RE_Syntax::PCRE));
45
46bool IgnoreCaseFlag;
47static cl::opt<bool, true> IgnoreCaseOption("i", cl::location(IgnoreCaseFlag), cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(RE_Options), cl::Grouping);
48static cl::alias IgnoreCaseAlias("ignore-case", cl::desc("Alias for -i"), cl::aliasopt(IgnoreCaseOption));
49
50bool InvertMatchFlag;
51static cl::opt<bool, true> InvertMatchOption("v", cl::location(InvertMatchFlag), cl::desc("Invert match results: select non-matching lines."), cl::cat(RE_Options), cl::Grouping);
52static cl::alias InvertMatchAlias("invert-match", cl::desc("Alias for -v"), cl::aliasopt(InvertMatchOption));
53
54bool LineRegexpFlag;
55static cl::opt<bool, true> LineRegexpOption("x", cl::location(LineRegexpFlag), cl::desc("Require that entire lines be matched."), cl::cat(RE_Options), cl::Grouping);
56static cl::alias LineRegexpAlias("line-regexp", cl::desc("Alias for -x"), cl::aliasopt(LineRegexpOption));
57
58bool WordRegexpFlag;
59static cl::opt<bool, true> WordRegexpOption("w", cl::location(WordRegexpFlag), cl::desc("Require that that whole words be matched."), cl::cat(RE_Options), cl::Grouping);
60static cl::alias WordRegexpAlias("word-regexp", cl::desc("Alias for -w"), cl::aliasopt(WordRegexpOption));
61
62std::vector<std::string> RegexpVector;
63static cl::list<std::string, std::vector<std::string>> RegexpOption("e", cl::location(RegexpVector), cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(RE_Options), cl::Grouping);
64static cl::alias RegexpAlias("regexp", cl::desc("Alias for -e"), cl::aliasopt(RegexpOption));
65
66std::string FileFlag;
67static cl::opt<std::string, true> FileOption("f", cl::location(FileFlag), cl::desc("Take regular expressions (one per line) from a file."), cl::cat(RE_Options), cl::Grouping);
68static cl::alias FileAlias("file", cl::desc("Alias for -f"), cl::aliasopt(FileOption));
69   
70/*
71 *  B.  Grep input sources and interpretation.
72 */
73   
74static cl::OptionCategory Input_Options("B. Input Options", "These options control the input.");
75
76bool RecursiveFlag;
77static cl::opt<bool, true> RecursiveOption("r", cl::location(RecursiveFlag), cl::desc("Recursively process files within directories, (but follow only top-level symlinks unless -R)."), cl::cat(Input_Options), cl::Grouping);
78static cl::alias RecursiveAlias("recursive", cl::desc("Alias for -r"), cl::aliasopt(RecursiveOption));
79
80bool DereferenceRecursiveFlag;
81static cl::opt<bool, true> DereferenceRecursiveOption("R", cl::location(DereferenceRecursiveFlag), cl::desc("Recursively process files within directories, following symlinks at all levels."), cl::cat(Input_Options), cl::Grouping);
82static cl::alias DereferenceRecursiveAlias("dereference-recursive", cl::desc("Alias for -R"), cl::aliasopt(DereferenceRecursiveOption));
83
84bool TextFlag;
85static cl::opt<bool, true> TextOption("a", cl::location(TextFlag), cl::desc("Treat each input file as text, even if it is a binary file."), cl::cat(Input_Options), cl::Grouping);
86static cl::alias TextAlias("text", cl::desc("Alias for -a"), cl::aliasopt(TextOption));
87
88bool BinaryNonMatchingFlag;
89static cl::opt<bool, true> BinaryNonMatchingOption("I", cl::location(BinaryNonMatchingFlag), cl::desc("Treat binary files as non-matching."), cl::cat(Input_Options), cl::Grouping);
90static cl::alias BinaryNonMatchingAlias("binary-non-matching", cl::desc("Alias for -I"), cl::aliasopt(BinaryNonMatchingOption));
91
92bool BinaryFlag;
93static cl::opt<bool, true> BinaryOption("U", cl::location(BinaryFlag), cl::desc("Treat each input file as a binary file, without CRLF normalization."), cl::cat(Input_Options), cl::Grouping);
94static cl::alias BinaryAlias("binary", cl::desc("Alias for -U"), cl::aliasopt(BinaryOption));
95
96bool NullDataFlag;
97static cl::opt<bool, true> NullDataOption("z", cl::location(NullDataFlag), cl::desc("Use the NUL character (codepoint 00) as the line-break character for input."), cl::cat(Input_Options), cl::Grouping);
98static cl::alias NullDataAlias("null-data", cl::desc("Alias for -z"), cl::aliasopt(NullDataOption));
99
100bool UnicodeLinesFlag;
101static cl::opt<bool, true> UnicodeLinesOption("Unicode-lines", cl::location(UnicodeLinesFlag), cl::desc("Enable Unicode line breaks (LF/VT/FF/CR/NEL/LS/PS/CRLF)"), cl::cat(Input_Options));
102
103bool MmapFlag;
104static cl::opt<bool, true> MmapOption("mmap", cl::location(MmapFlag),  cl::init(1), cl::desc("Use mmap for file input (default)."), cl::cat(Input_Options));
105
106std::string ExcludeFlag;
107static cl::opt<std::string, true> ExcludeOption("exclude", cl::location(ExcludeFlag), cl::desc("Exclude files matching the given filename GLOB pattern."), cl::cat(Input_Options));
108
109std::string ExcludeFromFlag;
110static cl::opt<std::string, true> ExcludeFromOption("exclude-from", cl::location(ExcludeFromFlag), cl::desc("Exclude files matching filename GLOB patterns from the given file."), cl::cat(Input_Options));
111
112std::string ExcludeDirFlag;
113static cl::opt<std::string, true> ExcludeDirOption("exclude-dir", cl::location(ExcludeDirFlag), cl::desc("Exclude directories matching the given pattern."), cl::cat(Input_Options));
114
115std::string IncludeFlag;
116static cl::opt<std::string, true> IncludeOption("include", cl::location(IncludeFlag), cl::desc("Include only files matching the given filename GLOB pattern."), cl::cat(Input_Options));
117
118DevDirAction DevicesFlag;
119static cl::opt<DevDirAction, true> DevicesOption("D", cl::desc("Processing mode for devices:"),
120                                                 cl::values(clEnumValN(Read, "read", "Treat devices as files to be searched."),
121                                                            clEnumValN(Skip, "skip", "Silently skip devices.")
122                                                            CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(DevicesFlag), cl::init(Read));
123static cl::alias DevicesAlias("devices", cl::desc("Alias for -D"), cl::aliasopt(DevicesOption));
124
125DevDirAction DirectoriesFlag;
126static cl::opt<DevDirAction, true> DirectoriesOption("d", cl::desc("Processing mode for directories:"),
127                                                     cl::values(clEnumValN(Read, "read", "Print an error message for any listed directories."),
128                                                                clEnumValN(Skip, "skip", "Silently skip directories."),
129                                                                clEnumValN(Recurse, "recurse", "Recursive process directories, equivalent to -r.")
130                                                                CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(DirectoriesFlag), cl::init(Read));
131static cl::alias DirectoriesAlias("directories", cl::desc("Alias for -d"), cl::aliasopt(DirectoriesOption));
132
133BinaryFilesMode BinaryFilesFlag;
134static cl::opt<BinaryFilesMode, true> BinaryFilesOption("binary-files", cl::desc("Processing mode for binary files:"),
135                                                     cl::values(clEnumValN(Binary, "binary", "Report match/non-match without printing matches."),
136                                                                clEnumValN(WithoutMatch, "without-match", "Always report as non-matching."),
137                                                                clEnumValN(Text, "text", "Treat binary files as text.")
138                                                                CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(BinaryFilesFlag), cl::init(Binary));
139   
140
141   
142re::RE * getFileExcludePattern() {
143    std::vector<re::RE *> patterns;
144    if (grep::ExcludeFlag != "") {
145        re::RE * glob = re::RE_Parser::parse(grep::ExcludeFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB);
146        patterns.push_back(glob);
147    }
148   
149    if (grep::ExcludeFromFlag != "") {
150        std::ifstream globFile(grep::ExcludeFromFlag.c_str());
151        std::string r;
152        if (globFile.is_open()) {
153            while (std::getline(globFile, r)) {
154                re::RE * glob = re::RE_Parser::parse(r, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB);
155                patterns.push_back(glob);
156            }
157            globFile.close();
158        }
159    }
160    if (patterns.empty()) return nullptr;
161    return re::makeAlt(patterns.begin(), patterns.end());
162}
163
164re::RE * getDirectoryExcludePattern() {
165    if (grep::ExcludeDirFlag != "") {
166        return re::RE_Parser::parse(grep::ExcludeDirFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB);
167    }
168    return nullptr;
169}
170
171re::RE * getFileIncludePattern() {
172    if (grep::IncludeFlag != "") {
173        return re::RE_Parser::parse(grep::IncludeFlag, re::DEFAULT_MODE, re::RE_Syntax::FileGLOB);
174    }
175    return nullptr;
176}
177
178// Include is the default unless a -include= option exists and is prior to any -exclude
179// or -exclude-dir option.
180bool includeIsDefault() {
181    if (IncludeFlag == "") return true;
182    if ((ExcludeFlag != "") && (ExcludeOption.getPosition() < IncludeOption.getPosition())) return true;
183    if ((ExcludeDirFlag != "") && (ExcludeDirOption.getPosition() < IncludeOption.getPosition())) return true;
184    return false;
185}
186   
187/*
188 *  C.  Grep output modes and options.
189 */
190   
191   
192static cl::OptionCategory Output_Options("C. Output Options",
193                                            "These options control the output.");
194   
195GrepModeType Mode;
196static cl::opt<GrepModeType, true> GrepModeOption(cl::desc("Abbreviated output mode options:"),
197    cl::values(
198        clEnumValN(CountOnly, "c", "Display only the count of matching lines per file."),
199        clEnumValN(FilesWithMatch, "l", "Display only the names of files that have at least one match to the pattern."),
200        clEnumValN(FilesWithoutMatch, "L", "Display only the names of files that do not match the pattern."),
201        clEnumValN(QuietMode, "q", "Do not generate any output and ignore errors; set the return to zero status if a match is found."),
202        clEnumValN(CountOnly, "count", "Alias for -c"),
203        clEnumValN(FilesWithMatch, "files-with-match", "Alias for -l"),
204        clEnumValN(FilesWithoutMatch, "files-without-match", "Alias for -L"),
205        clEnumValN(QuietMode, "quiet", "Alias for -q"),
206        clEnumValN(QuietMode, "silent", "Alias for -q")
207        CL_ENUM_VAL_SENTINEL), cl::cat(Output_Options), cl::Grouping, cl::location(Mode), cl::init(NormalMode));
208
209bool NoMessagesFlag;
210static cl::opt<bool, true> NoMessagesOption("s", cl::location(NoMessagesFlag), cl::desc("Suppress messages for file errors."), cl::cat(Output_Options), cl::Grouping);
211static cl::alias NoMessagesAlias("no-messages", cl::desc("Alias for -s"), cl::aliasopt(NoMessagesOption));
212
213bool WithFilenameFlag;
214static cl::opt<bool, true> WithFilenameOption("H", cl::location(WithFilenameFlag), cl::desc("Show the file name with each matching line."), cl::cat(Output_Options), cl::Grouping);
215static cl::alias WithFilenameAlias("with-filename", cl::desc("Alias for -H"), cl::aliasopt(WithFilenameOption));
216
217bool NoFilenameFlag;
218static cl::opt<bool, true> NoFilenameOption("h", cl::location(NoFilenameFlag), cl::desc("Do not show filenames with maches."), cl::cat(Output_Options), cl::Grouping);
219static cl::alias NoFilenameAlias("no-filename", cl::desc("Alias for -h"), cl::aliasopt(NoFilenameOption));
220
221bool NullFlag;
222static cl::opt<bool, true> NullOption("Z", cl::location(NullFlag), cl::desc("Write NUL characters after filenames generated to output."), cl::cat(Output_Options), cl::Grouping);
223static cl::alias NullAlias("null", cl::desc("Alias for -Z"), cl::aliasopt(NullOption));
224
225bool LineNumberFlag;
226static cl::opt<bool, true> LineNumberOption("n", cl::location(LineNumberFlag), cl::desc("Show the line number with each matching line."), cl::cat(Output_Options), cl::Grouping);
227static cl::alias LineNumberAlias("line-number", cl::desc("Alias for -n"), cl::aliasopt(LineNumberOption));
228
229bool ByteOffsetFlag;
230static cl::opt<bool, true> ByteOffsetOption("b", cl::location(ByteOffsetFlag), cl::desc("Show the byte offset within the file for each matching line."), cl::cat(Output_Options), cl::Grouping);
231static cl::alias ByteOffsetAlias("byte-offset", cl::desc("Alias for -b"), cl::aliasopt(ByteOffsetOption));
232
233bool UnixByteOffsetsFlag;
234static cl::opt<bool, true> UnixByteOffsetsOption("u", cl::location(UnixByteOffsetsFlag), cl::desc("If byte offsets are displayed, report offsets as if all lines are terminated with a single LF."), cl::cat(Output_Options), cl::Grouping);
235static cl::alias UnixByteOffsetsAlias("unix-byte-offsets", cl::desc("Alias for -u"), cl::aliasopt(UnixByteOffsetsOption));
236
237bool InitialTabFlag;
238static cl::opt<bool, true> InitialTabOption("T", cl::location(InitialTabFlag), cl::desc("Line up matched line content using an inital tab character."), cl::cat(Output_Options), cl::Grouping);
239static cl::alias InitialTabAlias("initial-tab", cl::desc("Alias for -T"), cl::aliasopt(InitialTabOption));
240
241bool OnlyMatchingFlag;
242static cl::opt<bool, true> OnlyMatchingOption("o", cl::location(OnlyMatchingFlag), cl::desc("Display only the exact strings that match the pattern, with possibly multiple matches per line."), cl::cat(Output_Options), cl::Grouping);
243static cl::alias OnlyMatchingAlias("only-matching", cl::desc("Alias for -o"), cl::aliasopt(OnlyMatchingOption));
244
245std::string LabelFlag;
246    static cl::opt<std::string, true> LabelOption("label", cl::location(LabelFlag), cl::init("(standard input)"),
247                                              cl::desc("Set a label for input lines matched from stdin."), cl::cat(Output_Options));
248
249bool LineBufferedFlag;
250static cl::opt<bool, true> LineBufferedOption("line-buffered", cl::location(LineBufferedFlag), cl::desc("Buffer lines to output."), cl::cat(Output_Options));
251
252int AfterContextFlag;
253static cl::opt<int, true> AfterContextOption("A", cl::location(AfterContextFlag), cl::desc("Print <num> lines of context after each matching line."), cl::cat(Output_Options), cl::Grouping);
254static cl::alias AfterContextAlias("after-context", cl::desc("Alias for -A"), cl::aliasopt(AfterContextOption));
255
256int BeforeContextFlag;
257static cl::opt<int, true> BeforeContextOption("B", cl::location(BeforeContextFlag), cl::desc("Print <num>lines of context before each matching line."), cl::cat(Output_Options), cl::Grouping);
258static cl::alias BeforeContextAlias("before-context", cl::desc("Alias for -B"), cl::aliasopt(BeforeContextOption));
259
260int ContextFlag;
261static cl::opt<int, true> ContextOption("C", cl::location(ContextFlag), cl::desc("Print <num> lines of context before and after each matching line."), cl::cat(Output_Options), cl::Grouping);
262static cl::alias ContextAlias("context", cl::desc("Alias for -C"), cl::aliasopt(ContextOption));
263
264int MaxCountFlag;
265static cl::opt<int, true> MaxCountOption("m", cl::location(MaxCountFlag),
266                                         cl::desc("Process only the first <num> matches per file3."),
267                                         cl::cat(Output_Options), cl::Grouping);
268static cl::alias MaxCountAlias("max-count", cl::desc("Alias for -m"), cl::aliasopt(MaxCountOption));
269   
270ColoringType ColorFlag;
271static cl::opt<ColoringType, true> Color("color", cl::desc("Set colorization of the output"), cl::location(ColorFlag), cl::cat(Output_Options), cl::init(neverColor),
272                                 cl::values(clEnumValN(alwaysColor, "always", "Enable colorization"),
273                                            clEnumValN(autoColor,   "auto", "Colorize output to stdout"),
274                                            clEnumValN(neverColor,  "never", "Disable colorization")
275                                            CL_ENUM_VAL_SENTINEL));
276static cl::alias ColorAlias("colour", cl::desc("Alias for -color"), cl::aliasopt(Color));
277//
278// Handler for errors reported through llvm::report_fatal_error.  Report
279// and signal error the InternalFailure exit code.
280//
281static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
282#ifndef NDEBUG
283        throw std::runtime_error(Message);
284#else
285        // Modified from LLVM's internal report_fatal_error logic.
286        SmallVector<char, 64> Buffer;
287        raw_svector_ostream OS(Buffer);
288        OS << "icgrep ERROR: " << Message << "\n";
289        StringRef MessageStr = OS.str();
290        ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
291        (void)written; // If something went wrong, we deliberately just give up.
292        // Run the interrupt handlers to make sure any special cleanups get done, in
293        // particular that we remove files registered with RemoveFileOnSignal.
294        llvm::sys::RunInterruptHandlers();
295        exit(InternalFailureCode);
296#endif
297}
298   
299
300void InitializeCommandLineInterface(int argc, char *argv[]) {
301    llvm::install_fatal_error_handler(&icgrep_error_handler);
302    codegen::ParseCommandLineOptions(argc, argv, {&RE_Options, &Input_Options, &Output_Options, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
303    if (RecursiveFlag || DereferenceRecursiveFlag) {
304        DirectoriesFlag = Recurse;
305    }
306   
307    if (TextFlag) {
308        if (BinaryNonMatchingFlag || (BinaryFilesFlag == WithoutMatch)) {
309            llvm::report_fatal_error("Conflicting options for binary files.\n");
310        }
311        BinaryFilesFlag = Text;
312    }
313    if (BinaryNonMatchingFlag) {
314        if (BinaryFilesFlag == Text) {
315            llvm::report_fatal_error("Conflicting options for binary files.\n");
316        }
317        BinaryFilesFlag = WithoutMatch;
318    }
319    if (BinaryFlag) {
320        llvm::report_fatal_error("Sorry, -U is not yet supported.\n");
321    }
322    if (ExcludeFlag!="") {
323        llvm::report_fatal_error("Sorry, -exclude is not yet supported.\n");
324    }
325    if (ExcludeFromFlag!="") {
326        llvm::report_fatal_error("Sorry, -exclude-from is not yet supported.\n");
327    }
328    if (ExcludeDirFlag!="") {
329        llvm::report_fatal_error("Sorry, -exclude-dir is not yet supported.\n");
330    }
331    if (IncludeFlag!="") {
332        llvm::report_fatal_error("Sorry, -include is not yet supported.\n");
333    }   
334    if (ByteOffsetFlag) {
335        llvm::report_fatal_error("Sorry, -b is not yet supported.\n");
336    }
337    if (UnixByteOffsetsFlag) {
338        llvm::report_fatal_error("Sorry, -u is not yet supported.\n");
339    }
340    if (OnlyMatchingFlag) {
341        llvm::report_fatal_error("Sorry, -o is not yet supported.\n");
342    }
343    if (LineBufferedFlag) {
344        llvm::report_fatal_error("Sorry, -line-buffered is not yet supported.\n");
345    }
346    if (AfterContextFlag) {
347        llvm::report_fatal_error("Sorry, -A is not yet supported.\n");
348    }
349    if (BeforeContextFlag) {
350        llvm::report_fatal_error("Sorry, -B is not yet supported.\n");
351    }
352    if (ContextFlag) {
353        llvm::report_fatal_error("Sorry, -C is not yet supported.\n");
354    }
355    if (ColorFlag!=neverColor) {
356        llvm::report_fatal_error("Sorry, -color is not yet supported.\n");
357    }
358    if (Mode == QuietMode) {
359        NoMessagesFlag = true;
360    }
361    if ((Mode == QuietMode) | (Mode == FilesWithMatch) | (Mode == FilesWithoutMatch)) {
362        MaxCountFlag = 1;
363    }
364}
365}
Note: See TracBrowser for help on using the repository browser.