source: icGREP/icgrep-devel/icgrep/grep_interface.cpp @ 5992

Last change on this file since 5992 was 5992, checked in by cameron, 12 months ago

Setting BinaryFilesMode? to Text (temporary); conversion to unique_ptr progress

File size: 15.3 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_interface.h>
8#include <llvm/Support/CommandLine.h>
9#include <llvm/Support/ErrorHandling.h>
10#include <llvm/Support/Signals.h>
11#include <llvm/Support/raw_ostream.h>
12#include <util/file_select.h>
13#include <toolchain/toolchain.h>
14#include <re/parsers/parser.h>
15#include <re/re_alt.h>
16#include <re/re_toolchain.h>
17#include <fstream>
18#include <string>
19
20#include <pablo/pablo_toolchain.h>
21
22using namespace llvm;
23
24namespace argv {
25
26/*
27 *  A.  Regular expression syntax, interpretation and processing.
28 */
29
30static cl::OptionCategory RE_Options("A. Regular Expression Interpretation", "These options control regular expression parsing and interpretation");
31
32re::RE_Syntax RegexpSyntax;
33static cl::opt<re::RE_Syntax, true> RegexpSyntaxOption(cl::desc("Regular expression syntax: (default PCRE)"),
34    cl::values(
35        clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
36        clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
37        clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
38        clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax"),
39        clEnumValN(re::RE_Syntax::ERE, "extended-regexp", "Alias for -E"),
40        clEnumValN(re::RE_Syntax::FixedStrings, "fixed-strings", "Alias for -F"),
41        clEnumValN(re::RE_Syntax::BRE, "basic-regexp", "Alias for -G"),
42        clEnumValN(re::RE_Syntax::PCRE, "perl-regexp", "Alias for -P"),
43        clEnumValN(re::RE_Syntax::FileGLOB, "GLOB", "Posix GLOB syntax for file name patterns"),
44        clEnumValN(re::RE_Syntax::PROSITE, "PROSITE", "PROSITE protein patterns syntax")
45        CL_ENUM_VAL_SENTINEL), cl::cat(RE_Options), cl::Grouping, cl::location(RegexpSyntax), cl::init(re::RE_Syntax::PCRE));
46
47bool IgnoreCaseFlag;
48static cl::opt<bool, true> IgnoreCaseOption("i", cl::location(IgnoreCaseFlag), cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(RE_Options), cl::Grouping);
49static cl::alias IgnoreCaseAlias("ignore-case", cl::desc("Alias for -i"), cl::aliasopt(IgnoreCaseOption));
50
51bool InvertMatchFlag;
52static cl::opt<bool, true> InvertMatchOption("v", cl::location(InvertMatchFlag), cl::desc("Invert match results: select non-matching lines."), cl::cat(RE_Options), cl::Grouping);
53static cl::alias InvertMatchAlias("invert-match", cl::desc("Alias for -v"), cl::aliasopt(InvertMatchOption));
54
55bool LineRegexpFlag;
56static cl::opt<bool, true> LineRegexpOption("x", cl::location(LineRegexpFlag), cl::desc("Require that entire lines be matched."), cl::cat(RE_Options), cl::Grouping);
57static cl::alias LineRegexpAlias("line-regexp", cl::desc("Alias for -x"), cl::aliasopt(LineRegexpOption));
58
59bool WordRegexpFlag;
60static cl::opt<bool, true> WordRegexpOption("w", cl::location(WordRegexpFlag), cl::desc("Require that that whole words be matched."), cl::cat(RE_Options), cl::Grouping);
61static cl::alias WordRegexpAlias("word-regexp", cl::desc("Alias for -w"), cl::aliasopt(WordRegexpOption));
62
63std::vector<std::string> RegexpVector;
64static cl::list<std::string, std::vector<std::string>> RegexpOption("e", cl::location(RegexpVector), cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(RE_Options), cl::Grouping);
65static cl::alias RegexpAlias("regexp", cl::desc("Alias for -e"), cl::aliasopt(RegexpOption));
66
67std::string FileFlag;
68static cl::opt<std::string, true> FileOption("f", cl::location(FileFlag), cl::desc("Take regular expressions (one per line) from a file."), cl::cat(RE_Options), cl::Grouping);
69static cl::alias FileAlias("file", cl::desc("Alias for -f"), cl::aliasopt(FileOption));
70   
71/*
72 *  B.  Grep input sources and interpretation.
73 */
74   
75static cl::OptionCategory Input_Options("B. Input Options", "These options control the input.");
76
77bool TextFlag;
78static cl::opt<bool, true> TextOption("a", cl::location(TextFlag), cl::desc("Treat each input file as text, even if it is a binary file."), cl::cat(Input_Options), cl::Grouping);
79static cl::alias TextAlias("text", cl::desc("Alias for -a"), cl::aliasopt(TextOption));
80
81bool BinaryNonMatchingFlag;
82static cl::opt<bool, true> BinaryNonMatchingOption("I", cl::location(BinaryNonMatchingFlag), cl::desc("Treat binary files as non-matching."), cl::cat(Input_Options), cl::Grouping);
83static cl::alias BinaryNonMatchingAlias("binary-non-matching", cl::desc("Alias for -I"), cl::aliasopt(BinaryNonMatchingOption));
84
85bool BinaryFlag;
86static cl::opt<bool, true> BinaryOption("U", cl::location(BinaryFlag), cl::desc("Treat each input file as a binary file, without CRLF normalization."), cl::cat(Input_Options), cl::Grouping);
87static cl::alias BinaryAlias("binary", cl::desc("Alias for -U"), cl::aliasopt(BinaryOption));
88
89bool NullDataFlag;
90static cl::opt<bool, true> NullDataOption("z", cl::location(NullDataFlag), cl::desc("Use the NUL character (codepoint 00) as the line-break character for input."), cl::cat(Input_Options), cl::Grouping);
91static cl::alias NullDataAlias("null-data", cl::desc("Alias for -z"), cl::aliasopt(NullDataOption));
92
93bool UnicodeLinesFlag;
94static cl::opt<bool, true> UnicodeLinesOption("Unicode-lines", cl::location(UnicodeLinesFlag), cl::desc("Enable Unicode line breaks (LF/VT/FF/CR/NEL/LS/PS/CRLF)"), cl::cat(Input_Options));
95
96BinaryFilesMode BinaryFilesFlag;
97static cl::opt<BinaryFilesMode, true> BinaryFilesOption("binary-files", cl::desc("Processing mode for binary files:"),
98                                                     cl::values(clEnumValN(Binary, "binary", "Report match/non-match without printing matches."),
99                                                                clEnumValN(WithoutMatch, "without-match", "Always report as non-matching."),
100                                                                clEnumValN(Text, "text", "Treat binary files as text.")
101                                                                CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(BinaryFilesFlag), cl::init(Text));
102   
103
104   
105/*
106 *  C.  Grep output modes and options.
107 */
108   
109   
110static cl::OptionCategory Output_Options("C. Output Options",
111                                            "These options control the output.");
112   
113GrepModeType Mode;
114static cl::opt<GrepModeType, true> GrepModeOption(cl::desc("Abbreviated output mode options:"),
115    cl::values(
116        clEnumValN(CountOnly, "c", "Display only the count of matching lines per file."),
117        clEnumValN(FilesWithMatch, "l", "Display only the names of files that have at least one match to the pattern."),
118        clEnumValN(FilesWithoutMatch, "L", "Display only the names of files that do not match the pattern."),
119        clEnumValN(QuietMode, "q", "Do not generate any output and ignore errors; set the return to zero status if a match is found."),
120        clEnumValN(CountOnly, "count", "Alias for -c"),
121        clEnumValN(FilesWithMatch, "files-with-match", "Alias for -l"),
122        clEnumValN(FilesWithoutMatch, "files-without-match", "Alias for -L"),
123        clEnumValN(QuietMode, "quiet", "Alias for -q"),
124        clEnumValN(QuietMode, "silent", "Alias for -q")
125        CL_ENUM_VAL_SENTINEL), cl::cat(Output_Options), cl::Grouping, cl::location(Mode), cl::init(NormalMode));
126
127bool NoMessagesFlag;
128static cl::opt<bool, true> NoMessagesOption("s", cl::location(NoMessagesFlag), cl::desc("Suppress messages for file errors."), cl::cat(Output_Options), cl::Grouping);
129static cl::alias NoMessagesAlias("no-messages", cl::desc("Alias for -s"), cl::aliasopt(NoMessagesOption));
130
131bool WithFilenameFlag;
132static cl::opt<bool, true> WithFilenameOption("H", cl::location(WithFilenameFlag), cl::desc("Show the file name with each matching line."), cl::cat(Output_Options), cl::Grouping);
133static cl::alias WithFilenameAlias("with-filename", cl::desc("Alias for -H"), cl::aliasopt(WithFilenameOption));
134
135bool NoFilenameFlag;
136static cl::opt<bool, true> NoFilenameOption("h", cl::location(NoFilenameFlag), cl::desc("Do not show filenames with maches."), cl::cat(Output_Options), cl::Grouping);
137static cl::alias NoFilenameAlias("no-filename", cl::desc("Alias for -h"), cl::aliasopt(NoFilenameOption));
138
139bool NullFlag;
140static cl::opt<bool, true> NullOption("Z", cl::location(NullFlag), cl::desc("Write NUL characters after filenames generated to output."), cl::cat(Output_Options), cl::Grouping);
141static cl::alias NullAlias("null", cl::desc("Alias for -Z"), cl::aliasopt(NullOption));
142
143bool LineNumberFlag;
144static cl::opt<bool, true> LineNumberOption("n", cl::location(LineNumberFlag), cl::desc("Show the line number with each matching line."), cl::cat(Output_Options), cl::Grouping);
145static cl::alias LineNumberAlias("line-number", cl::desc("Alias for -n"), cl::aliasopt(LineNumberOption));
146
147bool ByteOffsetFlag;
148static cl::opt<bool, true> ByteOffsetOption("b", cl::location(ByteOffsetFlag), cl::desc("Show the byte offset within the file for each matching line."), cl::cat(Output_Options), cl::Grouping);
149static cl::alias ByteOffsetAlias("byte-offset", cl::desc("Alias for -b"), cl::aliasopt(ByteOffsetOption));
150
151bool UnixByteOffsetsFlag;
152static cl::opt<bool, true> UnixByteOffsetsOption("u", cl::location(UnixByteOffsetsFlag), cl::desc("If byte offsets are displayed, report offsets as if all lines are terminated with a single LF."), cl::cat(Output_Options), cl::Grouping);
153static cl::alias UnixByteOffsetsAlias("unix-byte-offsets", cl::desc("Alias for -u"), cl::aliasopt(UnixByteOffsetsOption));
154
155bool InitialTabFlag;
156static cl::opt<bool, true> InitialTabOption("T", cl::location(InitialTabFlag), cl::desc("Line up matched line content using an inital tab character."), cl::cat(Output_Options), cl::Grouping);
157static cl::alias InitialTabAlias("initial-tab", cl::desc("Alias for -T"), cl::aliasopt(InitialTabOption));
158
159bool OnlyMatchingFlag;
160static cl::opt<bool, true> OnlyMatchingOption("o", cl::location(OnlyMatchingFlag), cl::desc("Display only the exact strings that match the pattern, with possibly multiple matches per line."), cl::cat(Output_Options), cl::Grouping);
161static cl::alias OnlyMatchingAlias("only-matching", cl::desc("Alias for -o"), cl::aliasopt(OnlyMatchingOption));
162
163std::string LabelFlag;
164    static cl::opt<std::string, true> LabelOption("label", cl::location(LabelFlag), cl::init("(standard input)"),
165                                              cl::desc("Set a label for input lines matched from stdin."), cl::cat(Output_Options));
166
167bool LineBufferedFlag;
168static cl::opt<bool, true> LineBufferedOption("line-buffered", cl::location(LineBufferedFlag), cl::desc("Buffer lines to output."), cl::cat(Output_Options));
169
170int AfterContextFlag;
171static cl::opt<int, true> AfterContextOption("A", cl::location(AfterContextFlag), cl::desc("Print <num> lines of context after each matching line."), cl::cat(Output_Options), cl::Grouping);
172static cl::alias AfterContextAlias("after-context", cl::desc("Alias for -A"), cl::aliasopt(AfterContextOption));
173
174int BeforeContextFlag;
175static cl::opt<int, true> BeforeContextOption("B", cl::location(BeforeContextFlag), cl::desc("Print <num>lines of context before each matching line."), cl::cat(Output_Options), cl::Grouping);
176static cl::alias BeforeContextAlias("before-context", cl::desc("Alias for -B"), cl::aliasopt(BeforeContextOption));
177
178int ContextFlag;
179static cl::opt<int, true> ContextOption("C", cl::location(ContextFlag), cl::desc("Print <num> lines of context before and after each matching line."), cl::cat(Output_Options), cl::Grouping);
180static cl::alias ContextAlias("context", cl::desc("Alias for -C"), cl::aliasopt(ContextOption));
181
182int MaxCountFlag;
183static cl::opt<int, true> MaxCountOption("m", cl::location(MaxCountFlag),
184                                         cl::desc("Process only the first <num> matches per file3."),
185                                         cl::cat(Output_Options), cl::Grouping);
186static cl::alias MaxCountAlias("max-count", cl::desc("Alias for -m"), cl::aliasopt(MaxCountOption));
187   
188ColoringType ColorFlag;
189static cl::opt<ColoringType, true> Color("color", cl::desc("Set colorization of the output"), cl::location(ColorFlag), cl::cat(Output_Options), cl::init(neverColor),
190                                 cl::values(clEnumValN(alwaysColor, "always", "Enable colorization"),
191                                            clEnumValN(autoColor,   "auto", "Colorize output to stdout"),
192                                            clEnumValN(neverColor,  "never", "Disable colorization")
193                                            CL_ENUM_VAL_SENTINEL));
194static cl::alias ColorAlias("colour", cl::desc("Alias for -color"), cl::aliasopt(Color));
195//
196// Handler for errors reported through llvm::report_fatal_error.  Report
197// and signal error the InternalFailure exit code.
198//
199static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
200#ifndef NDEBUG
201        throw std::runtime_error(Message);
202#else
203        // Modified from LLVM's internal report_fatal_error logic.
204        SmallVector<char, 64> Buffer;
205        raw_svector_ostream OS(Buffer);
206        OS << "icgrep ERROR: " << Message << "\n";
207        StringRef MessageStr = OS.str();
208        ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
209        (void)written; // If something went wrong, we deliberately just give up.
210        // Run the interrupt handlers to make sure any special cleanups get done, in
211        // particular that we remove files registered with RemoveFileOnSignal.
212        llvm::sys::RunInterruptHandlers();
213        exit(InternalFailureCode);
214#endif
215}
216   
217
218void InitializeCommandLineInterface(int argc, char *argv[]) {
219    llvm::install_fatal_error_handler(&icgrep_error_handler);
220    codegen::ParseCommandLineOptions(argc, argv, {&RE_Options, &Input_Options, &Output_Options, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
221    if (argv::RecursiveFlag || argv::DereferenceRecursiveFlag) {
222        argv::DirectoriesFlag = argv::Recurse;
223    }
224   
225    if (TextFlag) {
226        if (BinaryNonMatchingFlag || (BinaryFilesFlag == WithoutMatch)) {
227            llvm::report_fatal_error("Conflicting options for binary files.\n");
228        }
229        BinaryFilesFlag = Text;
230    }
231    if (BinaryNonMatchingFlag) {
232        if (BinaryFilesFlag == Binary) {
233            llvm::report_fatal_error("Conflicting options for binary files.\n");
234        }
235        BinaryFilesFlag = WithoutMatch;
236    }
237    if (BinaryFlag) {
238        llvm::report_fatal_error("Sorry, -U is not yet supported.\n");
239    }
240    if (ByteOffsetFlag) {
241        llvm::report_fatal_error("Sorry, -b is not yet supported.\n");
242    }
243    if (UnixByteOffsetsFlag) {
244        llvm::report_fatal_error("Sorry, -u is not yet supported.\n");
245    }
246    if (OnlyMatchingFlag) {
247        llvm::report_fatal_error("Sorry, -o is not yet supported.\n");
248    }
249    if (LineBufferedFlag) {
250        llvm::report_fatal_error("Sorry, -line-buffered is not yet supported.\n");
251    }
252    if (AfterContextFlag) {
253        llvm::report_fatal_error("Sorry, -A is not yet supported.\n");
254    }
255    if (BeforeContextFlag) {
256        llvm::report_fatal_error("Sorry, -B is not yet supported.\n");
257    }
258    if (ContextFlag) {
259        llvm::report_fatal_error("Sorry, -C is not yet supported.\n");
260    }
261    if (ColorFlag!=neverColor) {
262        llvm::report_fatal_error("Sorry, -color is not yet supported.\n");
263    }
264    if (Mode == QuietMode) {
265        NoMessagesFlag = true;
266    }
267    if ((Mode == QuietMode) | (Mode == FilesWithMatch) | (Mode == FilesWithoutMatch)) {
268        MaxCountFlag = 1;
269    }
270}
271}
Note: See TracBrowser for help on using the repository browser.