source: icGREP/icgrep-devel/icgrep/grep_interface.cpp @ 6135

Last change on this file since 6135 was 5999, checked in by cameron, 13 months ago

Handling of file system errors

File size: 15.0 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_interface.h>
8#include <llvm/Support/CommandLine.h>
9#include <llvm/Support/ErrorHandling.h>
10#include <llvm/Support/Signals.h>
11#include <llvm/Support/raw_ostream.h>
12#include <util/file_select.h>
13#include <toolchain/toolchain.h>
14#include <re/parsers/parser.h>
15#include <re/re_alt.h>
16#include <re/re_toolchain.h>
17#include <fstream>
18#include <string>
19
20#include <pablo/pablo_toolchain.h>
21
22using namespace llvm;
23
24namespace argv {
25
26/*
27 *  A.  Regular expression syntax, interpretation and processing.
28 */
29
30static cl::OptionCategory RE_Options("A. Regular Expression Interpretation", "These options control regular expression parsing and interpretation");
31
32re::RE_Syntax RegexpSyntax;
33static cl::opt<re::RE_Syntax, true> RegexpSyntaxOption(cl::desc("Regular expression syntax: (default PCRE)"),
34    cl::values(
35        clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
36        clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
37        clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
38        clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax"),
39        clEnumValN(re::RE_Syntax::ERE, "extended-regexp", "Alias for -E"),
40        clEnumValN(re::RE_Syntax::FixedStrings, "fixed-strings", "Alias for -F"),
41        clEnumValN(re::RE_Syntax::BRE, "basic-regexp", "Alias for -G"),
42        clEnumValN(re::RE_Syntax::PCRE, "perl-regexp", "Alias for -P"),
43        clEnumValN(re::RE_Syntax::FileGLOB, "GLOB", "Posix GLOB syntax for file name patterns"),
44        clEnumValN(re::RE_Syntax::PROSITE, "PROSITE", "PROSITE protein patterns syntax")
45        CL_ENUM_VAL_SENTINEL), cl::cat(RE_Options), cl::Grouping, cl::location(RegexpSyntax), cl::init(re::RE_Syntax::PCRE));
46
47bool IgnoreCaseFlag;
48static cl::opt<bool, true> IgnoreCaseOption("i", cl::location(IgnoreCaseFlag), cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(RE_Options), cl::Grouping);
49static cl::alias IgnoreCaseAlias("ignore-case", cl::desc("Alias for -i"), cl::aliasopt(IgnoreCaseOption));
50
51bool InvertMatchFlag;
52static cl::opt<bool, true> InvertMatchOption("v", cl::location(InvertMatchFlag), cl::desc("Invert match results: select non-matching lines."), cl::cat(RE_Options), cl::Grouping);
53static cl::alias InvertMatchAlias("invert-match", cl::desc("Alias for -v"), cl::aliasopt(InvertMatchOption));
54
55bool LineRegexpFlag;
56static cl::opt<bool, true> LineRegexpOption("x", cl::location(LineRegexpFlag), cl::desc("Require that entire lines be matched."), cl::cat(RE_Options), cl::Grouping);
57static cl::alias LineRegexpAlias("line-regexp", cl::desc("Alias for -x"), cl::aliasopt(LineRegexpOption));
58
59bool WordRegexpFlag;
60static cl::opt<bool, true> WordRegexpOption("w", cl::location(WordRegexpFlag), cl::desc("Require that that whole words be matched."), cl::cat(RE_Options), cl::Grouping);
61static cl::alias WordRegexpAlias("word-regexp", cl::desc("Alias for -w"), cl::aliasopt(WordRegexpOption));
62
63std::vector<std::string> RegexpVector;
64static cl::list<std::string, std::vector<std::string>> RegexpOption("e", cl::location(RegexpVector), cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(RE_Options), cl::Grouping);
65static cl::alias RegexpAlias("regexp", cl::desc("Alias for -e"), cl::aliasopt(RegexpOption));
66
67std::string FileFlag;
68static cl::opt<std::string, true> FileOption("f", cl::location(FileFlag), cl::desc("Take regular expressions (one per line) from a file."), cl::cat(RE_Options), cl::Grouping);
69static cl::alias FileAlias("file", cl::desc("Alias for -f"), cl::aliasopt(FileOption));
70   
71/*
72 *  B.  Grep input sources and interpretation.
73 */
74   
75static cl::OptionCategory Input_Options("B. Input Options", "These options control the input.");
76
77bool TextFlag;
78static cl::opt<bool, true> TextOption("a", cl::location(TextFlag), cl::desc("Treat each input file as text, even if it is a binary file."), cl::cat(Input_Options), cl::Grouping);
79static cl::alias TextAlias("text", cl::desc("Alias for -a"), cl::aliasopt(TextOption));
80
81bool BinaryNonMatchingFlag;
82static cl::opt<bool, true> BinaryNonMatchingOption("I", cl::location(BinaryNonMatchingFlag), cl::desc("Treat binary files as non-matching."), cl::cat(Input_Options), cl::Grouping);
83static cl::alias BinaryNonMatchingAlias("binary-non-matching", cl::desc("Alias for -I"), cl::aliasopt(BinaryNonMatchingOption));
84
85bool BinaryFlag;
86static cl::opt<bool, true> BinaryOption("U", cl::location(BinaryFlag), cl::desc("Treat each input file as a binary file, without CRLF normalization."), cl::cat(Input_Options), cl::Grouping);
87static cl::alias BinaryAlias("binary", cl::desc("Alias for -U"), cl::aliasopt(BinaryOption));
88
89bool NullDataFlag;
90static cl::opt<bool, true> NullDataOption("z", cl::location(NullDataFlag), cl::desc("Use the NUL character (codepoint 00) as the line-break character for input."), cl::cat(Input_Options), cl::Grouping);
91static cl::alias NullDataAlias("null-data", cl::desc("Alias for -z"), cl::aliasopt(NullDataOption));
92
93bool UnicodeLinesFlag;
94static cl::opt<bool, true> UnicodeLinesOption("Unicode-lines", cl::location(UnicodeLinesFlag), cl::desc("Enable Unicode line breaks (LF/VT/FF/CR/NEL/LS/PS/CRLF)"), cl::cat(Input_Options));
95
96BinaryFilesMode BinaryFilesFlag;
97static cl::opt<BinaryFilesMode, true> BinaryFilesOption("binary-files", cl::desc("Processing mode for binary files:"),
98                                                     cl::values(clEnumValN(Binary, "binary", "Report match/non-match without printing matches."),
99                                                                clEnumValN(WithoutMatch, "without-match", "Always report as non-matching."),
100                                                                clEnumValN(Text, "text", "Treat binary files as text.")
101                                                                CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(BinaryFilesFlag), cl::init(Text));
102   
103
104   
105/*
106 *  C.  Grep output modes and options.
107 */
108   
109   
110static cl::OptionCategory Output_Options("C. Output Options",
111                                            "These options control the output.");
112   
113GrepModeType Mode;
114static cl::opt<GrepModeType, true> GrepModeOption(cl::desc("Abbreviated output mode options:"),
115    cl::values(
116        clEnumValN(CountOnly, "c", "Display only the count of matching lines per file."),
117        clEnumValN(FilesWithMatch, "l", "Display only the names of files that have at least one match to the pattern."),
118        clEnumValN(FilesWithoutMatch, "L", "Display only the names of files that do not match the pattern."),
119        clEnumValN(QuietMode, "q", "Do not generate any output and ignore errors; set the return to zero status if a match is found."),
120        clEnumValN(CountOnly, "count", "Alias for -c"),
121        clEnumValN(FilesWithMatch, "files-with-match", "Alias for -l"),
122        clEnumValN(FilesWithoutMatch, "files-without-match", "Alias for -L"),
123        clEnumValN(QuietMode, "quiet", "Alias for -q"),
124        clEnumValN(QuietMode, "silent", "Alias for -q")
125        CL_ENUM_VAL_SENTINEL), cl::cat(Output_Options), cl::Grouping, cl::location(Mode), cl::init(NormalMode));
126
127bool WithFilenameFlag;
128static cl::opt<bool, true> WithFilenameOption("H", cl::location(WithFilenameFlag), cl::desc("Show the file name with each matching line."), cl::cat(Output_Options), cl::Grouping);
129static cl::alias WithFilenameAlias("with-filename", cl::desc("Alias for -H"), cl::aliasopt(WithFilenameOption));
130
131bool NoFilenameFlag;
132static cl::opt<bool, true> NoFilenameOption("h", cl::location(NoFilenameFlag), cl::desc("Do not show filenames with maches."), cl::cat(Output_Options), cl::Grouping);
133static cl::alias NoFilenameAlias("no-filename", cl::desc("Alias for -h"), cl::aliasopt(NoFilenameOption));
134
135bool NullFlag;
136static cl::opt<bool, true> NullOption("Z", cl::location(NullFlag), cl::desc("Write NUL characters after filenames generated to output."), cl::cat(Output_Options), cl::Grouping);
137static cl::alias NullAlias("null", cl::desc("Alias for -Z"), cl::aliasopt(NullOption));
138
139bool LineNumberFlag;
140static cl::opt<bool, true> LineNumberOption("n", cl::location(LineNumberFlag), cl::desc("Show the line number with each matching line."), cl::cat(Output_Options), cl::Grouping);
141static cl::alias LineNumberAlias("line-number", cl::desc("Alias for -n"), cl::aliasopt(LineNumberOption));
142
143bool ByteOffsetFlag;
144static cl::opt<bool, true> ByteOffsetOption("b", cl::location(ByteOffsetFlag), cl::desc("Show the byte offset within the file for each matching line."), cl::cat(Output_Options), cl::Grouping);
145static cl::alias ByteOffsetAlias("byte-offset", cl::desc("Alias for -b"), cl::aliasopt(ByteOffsetOption));
146
147bool UnixByteOffsetsFlag;
148static cl::opt<bool, true> UnixByteOffsetsOption("u", cl::location(UnixByteOffsetsFlag), cl::desc("If byte offsets are displayed, report offsets as if all lines are terminated with a single LF."), cl::cat(Output_Options), cl::Grouping);
149static cl::alias UnixByteOffsetsAlias("unix-byte-offsets", cl::desc("Alias for -u"), cl::aliasopt(UnixByteOffsetsOption));
150
151bool InitialTabFlag;
152static cl::opt<bool, true> InitialTabOption("T", cl::location(InitialTabFlag), cl::desc("Line up matched line content using an inital tab character."), cl::cat(Output_Options), cl::Grouping);
153static cl::alias InitialTabAlias("initial-tab", cl::desc("Alias for -T"), cl::aliasopt(InitialTabOption));
154
155bool OnlyMatchingFlag;
156static cl::opt<bool, true> OnlyMatchingOption("o", cl::location(OnlyMatchingFlag), cl::desc("Display only the exact strings that match the pattern, with possibly multiple matches per line."), cl::cat(Output_Options), cl::Grouping);
157static cl::alias OnlyMatchingAlias("only-matching", cl::desc("Alias for -o"), cl::aliasopt(OnlyMatchingOption));
158
159std::string LabelFlag;
160    static cl::opt<std::string, true> LabelOption("label", cl::location(LabelFlag), cl::init("(standard input)"),
161                                              cl::desc("Set a label for input lines matched from stdin."), cl::cat(Output_Options));
162
163bool LineBufferedFlag;
164static cl::opt<bool, true> LineBufferedOption("line-buffered", cl::location(LineBufferedFlag), cl::desc("Buffer lines to output."), cl::cat(Output_Options));
165
166int AfterContextFlag;
167static cl::opt<int, true> AfterContextOption("A", cl::location(AfterContextFlag), cl::desc("Print <num> lines of context after each matching line."), cl::cat(Output_Options), cl::Grouping);
168static cl::alias AfterContextAlias("after-context", cl::desc("Alias for -A"), cl::aliasopt(AfterContextOption));
169
170int BeforeContextFlag;
171static cl::opt<int, true> BeforeContextOption("B", cl::location(BeforeContextFlag), cl::desc("Print <num>lines of context before each matching line."), cl::cat(Output_Options), cl::Grouping);
172static cl::alias BeforeContextAlias("before-context", cl::desc("Alias for -B"), cl::aliasopt(BeforeContextOption));
173
174int ContextFlag;
175static cl::opt<int, true> ContextOption("C", cl::location(ContextFlag), cl::desc("Print <num> lines of context before and after each matching line."), cl::cat(Output_Options), cl::Grouping);
176static cl::alias ContextAlias("context", cl::desc("Alias for -C"), cl::aliasopt(ContextOption));
177
178int MaxCountFlag;
179static cl::opt<int, true> MaxCountOption("m", cl::location(MaxCountFlag),
180                                         cl::desc("Process only the first <num> matches per file3."),
181                                         cl::cat(Output_Options), cl::Grouping);
182static cl::alias MaxCountAlias("max-count", cl::desc("Alias for -m"), cl::aliasopt(MaxCountOption));
183   
184ColoringType ColorFlag;
185static cl::opt<ColoringType, true> Color("color", cl::desc("Set colorization of the output"), cl::location(ColorFlag), cl::cat(Output_Options), cl::init(neverColor),
186                                 cl::values(clEnumValN(alwaysColor, "always", "Enable colorization"),
187                                            clEnumValN(autoColor,   "auto", "Colorize output to stdout"),
188                                            clEnumValN(neverColor,  "never", "Disable colorization")
189                                            CL_ENUM_VAL_SENTINEL));
190static cl::alias ColorAlias("colour", cl::desc("Alias for -color"), cl::aliasopt(Color));
191//
192// Handler for errors reported through llvm::report_fatal_error.  Report
193// and signal error the InternalFailure exit code.
194//
195static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
196#ifndef NDEBUG
197        throw std::runtime_error(Message);
198#else
199        // Modified from LLVM's internal report_fatal_error logic.
200        SmallVector<char, 64> Buffer;
201        raw_svector_ostream OS(Buffer);
202        OS << "icgrep ERROR: " << Message << "\n";
203        StringRef MessageStr = OS.str();
204        ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
205        (void)written; // If something went wrong, we deliberately just give up.
206        // Run the interrupt handlers to make sure any special cleanups get done, in
207        // particular that we remove files registered with RemoveFileOnSignal.
208        llvm::sys::RunInterruptHandlers();
209        exit(InternalFailureCode);
210#endif
211}
212   
213
214void InitializeCommandLineInterface(int argc, char *argv[]) {
215    llvm::install_fatal_error_handler(&icgrep_error_handler);
216    codegen::ParseCommandLineOptions(argc, argv, {&RE_Options, &Input_Options, &Output_Options, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
217    if (argv::RecursiveFlag || argv::DereferenceRecursiveFlag) {
218        argv::DirectoriesFlag = argv::Recurse;
219    }
220   
221    if (TextFlag) {
222        if (BinaryNonMatchingFlag || (BinaryFilesFlag == WithoutMatch)) {
223            llvm::report_fatal_error("Conflicting options for binary files.\n");
224        }
225        BinaryFilesFlag = Text;
226    }
227    if (BinaryNonMatchingFlag) {
228        if (BinaryFilesFlag == Binary) {
229            llvm::report_fatal_error("Conflicting options for binary files.\n");
230        }
231        BinaryFilesFlag = WithoutMatch;
232    }
233    if (BinaryFlag) {
234        llvm::report_fatal_error("Sorry, -U is not yet supported.\n");
235    }
236    if (ByteOffsetFlag) {
237        llvm::report_fatal_error("Sorry, -b is not yet supported.\n");
238    }
239    if (UnixByteOffsetsFlag) {
240        llvm::report_fatal_error("Sorry, -u is not yet supported.\n");
241    }
242    if (OnlyMatchingFlag) {
243        llvm::report_fatal_error("Sorry, -o is not yet supported.\n");
244    }
245    if (LineBufferedFlag) {
246        llvm::report_fatal_error("Sorry, -line-buffered is not yet supported.\n");
247    }
248    if (AfterContextFlag) {
249        llvm::report_fatal_error("Sorry, -A is not yet supported.\n");
250    }
251    if (BeforeContextFlag) {
252        llvm::report_fatal_error("Sorry, -B is not yet supported.\n");
253    }
254    if (ContextFlag) {
255        llvm::report_fatal_error("Sorry, -C is not yet supported.\n");
256    }
257    if (ColorFlag!=neverColor) {
258        llvm::report_fatal_error("Sorry, -color is not yet supported.\n");
259    }
260    if ((Mode == QuietMode) | (Mode == FilesWithMatch) | (Mode == FilesWithoutMatch)) {
261        MaxCountFlag = 1;
262    }
263}
264}
Note: See TracBrowser for help on using the repository browser.