source: icGREP/icgrep-devel/icgrep/grep_interface.cpp @ 5477

Last change on this file since 5477 was 5477, checked in by cameron, 2 years ago

Fix for max-count option

File size: 14.2 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_interface.h>
8#include <llvm/Support/CommandLine.h>
9#include <llvm/Support/ErrorHandling.h>
10#include <llvm/Support/Signals.h>
11#include <llvm/Support/raw_ostream.h>
12#include <toolchain/toolchain.h>
13#include <re/re_toolchain.h>
14#include <pablo/pablo_toolchain.h>
15
16using namespace llvm;
17
18namespace grep {
19
20/*
21 *  A.  Regular expression syntax, interpretation and processing.
22 */
23
24static cl::OptionCategory RE_Options("A. Regular Expression Interpretation", "These options control regular expression parsing and interpretation");
25
26re::RE_Syntax RegexpSyntax;
27static cl::opt<re::RE_Syntax, true> RegexpSyntaxOption(cl::desc("Regular expression syntax: (default PCRE)"),
28    cl::values(
29        clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
30        clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
31        clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
32        clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax"),
33        clEnumValN(re::RE_Syntax::ERE, "extended-regexp", "Alias for -E"),
34        clEnumValN(re::RE_Syntax::FixedStrings, "fixed-strings", "Alias for -F"),
35        clEnumValN(re::RE_Syntax::BRE, "basic-regexp", "Alias for -G"),
36        clEnumValN(re::RE_Syntax::PCRE, "perl-regexp", "Alias for -P"),
37        clEnumValN(re::RE_Syntax::PROSITE, "PROSITE", "PROSITE protein patterns syntax"),
38        clEnumValEnd), cl::cat(RE_Options), cl::Grouping, cl::location(RegexpSyntax), cl::init(re::RE_Syntax::PCRE));
39
40bool IgnoreCaseFlag;
41static cl::opt<bool, true> IgnoreCase("i", cl::location(IgnoreCaseFlag), cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(RE_Options), cl::Grouping);
42static cl::alias IgnoreCaseAlias("ignore-case", cl::desc("Alias for -i"), cl::aliasopt(IgnoreCase));
43
44bool InvertMatchFlag;
45static cl::opt<bool, true> InvertMatch("v", cl::location(InvertMatchFlag), cl::desc("Invert match results: select non-matching lines."), cl::cat(RE_Options), cl::Grouping);
46static cl::alias InvertMatchAlias("invert-match", cl::desc("Alias for -v"), cl::aliasopt(InvertMatch));
47
48bool LineRegexpFlag;
49static cl::opt<bool, true> LineRegexp("x", cl::location(LineRegexpFlag), cl::desc("Require that entire lines be matched."), cl::cat(RE_Options), cl::Grouping);
50static cl::alias LineRegexpAlias("line-regexp", cl::desc("Alias for -x"), cl::aliasopt(LineRegexp));
51
52bool WordRegexpFlag;
53static cl::opt<bool, true> WordRegexp("w", cl::location(WordRegexpFlag), cl::desc("Require that that whole words be matched."), cl::cat(RE_Options), cl::Grouping);
54static cl::alias WordRegexpAlias("word-regexp", cl::desc("Alias for -w"), cl::aliasopt(WordRegexp));
55   
56   
57const cl::OptionCategory * grep_regexp_flags() {
58    return &RE_Options;
59}
60
61/*
62 *  B.  Grep input sources and interpretation.
63 */
64   
65static cl::OptionCategory Input_Options("B. Input Options", "These options control the input.");
66
67bool RecursiveFlag;
68static cl::opt<bool, true> Recursive("r", cl::location(RecursiveFlag), cl::desc("Recursively process files within directories, (but follow only top-level symlinks unless -R)."), cl::cat(Input_Options), cl::Grouping);
69static cl::alias RecursiveAlias("recursive", cl::desc("Alias for -r"), cl::aliasopt(Recursive));
70
71bool DereferenceRecursiveFlag;
72static cl::opt<bool, true> DereferenceRecursive("R", cl::location(DereferenceRecursiveFlag), cl::desc("Recursively process files within directories, following symlinks at all levels."), cl::cat(Input_Options), cl::Grouping);
73static cl::alias DereferenceRecursiveAlias("dereference-recursive", cl::desc("Alias for -R"), cl::aliasopt(DereferenceRecursive));
74
75bool TextFlag;
76static cl::opt<bool, true> Text("a", cl::location(TextFlag), cl::desc("Treat each input file as text, even if it is a binary file."), cl::cat(Input_Options), cl::Grouping);
77static cl::alias TextAlias("text", cl::desc("Alias for -a"), cl::aliasopt(Text));
78
79bool BinaryFlag;
80static cl::opt<bool, true> Binary("U", cl::location(BinaryFlag), cl::desc("Treat each input file as a binary file, without CRLF normalization."), cl::cat(Input_Options), cl::Grouping);
81static cl::alias BinaryAlias("binary", cl::desc("Alias for -U"), cl::aliasopt(Binary));
82
83bool NullDataFlag;
84static cl::opt<bool, true> NullData("z", cl::location(NullDataFlag), cl::desc("Use the NUL character (codepoint 00) as the line-break character for input."), cl::cat(Input_Options), cl::Grouping);
85static cl::alias NullDataAlias("null-data", cl::desc("Alias for -z"), cl::aliasopt(NullData));
86
87bool MmapFlag;
88static cl::opt<bool, true> Mmap("mmap", cl::location(MmapFlag), cl::desc("Use mmap for file input."), cl::cat(Input_Options));
89   
90   
91
92/*
93 *  C.  Grep output modes and options.
94 */
95   
96   
97static cl::OptionCategory Output_Options("C. Output Options",
98                                            "These options control the output.");
99   
100GrepModeType Mode;
101static cl::opt<GrepModeType, true> GrepModeOption(cl::desc("Abbreviated output mode options:"),
102    cl::values(
103        clEnumValN(CountOnly, "c", "Display only the count of matching lines per file."),
104        clEnumValN(FilesWithMatch, "l", "Display only the names of files that have at least one match to the pattern."),
105        clEnumValN(FilesWithoutMatch, "L", "Display only the names of files that do not match the pattern."),
106        clEnumValN(QuietMode, "q", "Do not generate any output and ignore errors; set the return to zero status if a match is found."),
107        clEnumValN(CountOnly, "count", "Alias for -c"),
108        clEnumValN(FilesWithMatch, "files-with-match", "Alias for -l"),
109        clEnumValN(FilesWithoutMatch, "files-without-match", "Alias for -L"),
110        clEnumValN(QuietMode, "quiet", "Alias for -q"),
111        clEnumValN(QuietMode, "silent", "Alias for -q"),
112        clEnumValEnd), cl::cat(Output_Options), cl::Grouping, cl::location(Mode), cl::init(NormalMode));
113
114bool NoMessagesFlag;
115static cl::opt<bool, true> NoMessages("s", cl::location(NoMessagesFlag), cl::desc("Suppress messages for file errors."), cl::cat(Output_Options), cl::Grouping);
116static cl::alias NoMessagesAlias("no-messages", cl::desc("Alias for -s"), cl::aliasopt(NoMessages));
117
118bool WithFilenameFlag;
119static cl::opt<bool, true> WithFilename("H", cl::location(WithFilenameFlag), cl::desc("Show the file name with each matching line."), cl::cat(Output_Options), cl::Grouping);
120static cl::alias WithFilenameAlias("with-filename", cl::desc("Alias for -H"), cl::aliasopt(WithFilename));
121
122bool NoFilenameFlag;
123static cl::opt<bool, true> NoFilename("h", cl::location(NoFilenameFlag), cl::desc("Do not show filenames with maches."), cl::cat(Output_Options), cl::Grouping);
124static cl::alias NoFilenameAlias("no-filename", cl::desc("Alias for -h"), cl::aliasopt(NoFilename));
125
126bool NullFlag;
127static cl::opt<bool, true> Null("Z", cl::location(NullFlag), cl::desc("Write NUL characters after filenames generated to output."), cl::cat(Output_Options), cl::Grouping);
128static cl::alias NullAlias("null", cl::desc("Alias for -Z"), cl::aliasopt(Null));
129
130bool LineNumberFlag;
131static cl::opt<bool, true> LineNumber("n", cl::location(LineNumberFlag), cl::desc("Show the line number with each matching line."), cl::cat(Output_Options), cl::Grouping);
132static cl::alias LineNumberAlias("line-number", cl::desc("Alias for -n"), cl::aliasopt(LineNumber));
133
134bool ByteOffsetFlag;
135static cl::opt<bool, true> ByteOffset("b", cl::location(ByteOffsetFlag), cl::desc("Show the byte offset within the file for each matching line."), cl::cat(Output_Options), cl::Grouping);
136static cl::alias ByteOffsetAlias("byte-offset", cl::desc("Alias for -b"), cl::aliasopt(ByteOffset));
137
138bool UnixByteOffsetsFlag;
139static cl::opt<bool, true> UnixByteOffsets("u", cl::location(UnixByteOffsetsFlag), cl::desc("If byte offsets are displayed, report offsets as if all lines are terminated with a single LF."), cl::cat(Output_Options), cl::Grouping);
140static cl::alias UnixByteOffsetsAlias("unix-byte-offsets", cl::desc("Alias for -u"), cl::aliasopt(UnixByteOffsets));
141
142bool InitialTabFlag;
143static cl::opt<bool, true> InitialTab("T", cl::location(InitialTabFlag), cl::desc("Line up matched line content using an inital tab character."), cl::cat(Output_Options), cl::Grouping);
144static cl::alias InitialTabAlias("initial-tab", cl::desc("Alias for -T"), cl::aliasopt(InitialTab));
145
146bool OnlyMatchingFlag;
147static cl::opt<bool, true> OnlyMatching("o", cl::location(OnlyMatchingFlag), cl::desc("Display only the exact strings that match the pattern, with possibly multiple matches per line."), cl::cat(Output_Options), cl::Grouping);
148static cl::alias OnlyMatchingAlias("only-matching", cl::desc("Alias for -o"), cl::aliasopt(OnlyMatching));
149
150bool LineBufferedFlag;
151static cl::opt<bool, true> LineBuffered("line-buffered", cl::location(LineBufferedFlag), cl::desc("Buffer lines to output."), cl::cat(Output_Options));
152
153bool NormalizeLineBreaksFlag;
154static cl::opt<bool, true> NormalizeLineBreaks("normalize-line-breaks", cl::location(NormalizeLineBreaksFlag), cl::desc("Normalize line breaks to LF."), cl::cat(Output_Options));
155
156int AfterContextFlag;
157static cl::opt<int, true> AfterContext("A", cl::location(AfterContextFlag), cl::desc("Print <num> lines of context after each matching line."), cl::cat(Output_Options), cl::Grouping, cl::Prefix);
158static cl::alias AfterContextAlias("after-context", cl::desc("Alias for -A"), cl::aliasopt(AfterContext));
159
160int BeforeContextFlag;
161static cl::opt<int, true> BeforeContext("B", cl::location(BeforeContextFlag), cl::desc("Print <num>lines of context before each matching line."), cl::cat(Output_Options), cl::Grouping, cl::Prefix);
162static cl::alias BeforeContextAlias("before-context", cl::desc("Alias for -B"), cl::aliasopt(BeforeContext));
163
164int ContextFlag;
165static cl::opt<int, true> Context("C", cl::location(ContextFlag), cl::desc("Print <num> lines of context before and after each matching line."), cl::cat(Output_Options), cl::Grouping, cl::Prefix);
166static cl::alias ContextAlias("context", cl::desc("Alias for -C"), cl::aliasopt(Context));
167
168int MaxCountFlag;
169static cl::opt<int, true> MaxCount("m", cl::location(MaxCountFlag), cl::desc("Process only the first <num> matches per file."), cl::cat(Output_Options), cl::Grouping, cl::Prefix);
170static cl::alias MaxCountAlias("max-count", cl::desc("Alias for -m"), cl::aliasopt(MaxCount));
171   
172std::string LabelFlag;
173static cl::opt<std::string, true> Label("label", cl::location(LabelFlag), cl::desc("Set a label for input lines matched from stdin."), cl::cat(Output_Options));
174   
175ColoringType ColorFlag;
176static cl::opt<ColoringType, true> Color("color", cl::desc("Set colorization of the output"), cl::location(ColorFlag), cl::cat(Output_Options), cl::init(neverColor),
177                                 cl::values(clEnumValN(alwaysColor, "always", "Enable colorization"),
178                                            clEnumValN(autoColor,   "auto", "Colorize output to stdout"),
179                                            clEnumValN(neverColor,  "never", "Disable colorization"),
180                                            clEnumValEnd));
181static cl::alias ColorAlias("colour", cl::desc("Alias for -color"), cl::aliasopt(Color));
182//
183// Handler for errors reported through llvm::report_fatal_error.  Report
184// and signal error the InternalFailure exit code.
185//
186static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
187#ifndef NDEBUG
188        throw std::runtime_error(Message);
189#else
190        // Modified from LLVM's internal report_fatal_error logic.
191        SmallVector<char, 64> Buffer;
192        raw_svector_ostream OS(Buffer);
193        OS << "icgrep ERROR: " << Message << "\n";
194        StringRef MessageStr = OS.str();
195        ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
196        (void)written; // If something went wrong, we deliberately just give up.
197        // Run the interrupt handlers to make sure any special cleanups get done, in
198        // particular that we remove files registered with RemoveFileOnSignal.
199        llvm::sys::RunInterruptHandlers();
200        exit(InternalFailureCode);
201#endif
202}
203   
204
205void InitializeCommandLineInterface(int argc, char *argv[]) {
206    llvm::install_fatal_error_handler(&icgrep_error_handler);
207    AddParabixVersionPrinter();
208#ifndef USE_LLVM_3_6
209    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&RE_Options, &Input_Options, &Output_Options, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
210#endif
211    cl::ParseCommandLineOptions(argc, argv);
212    if (RegexpSyntax == re::RE_Syntax::FixedStrings) {
213        llvm::report_fatal_error("Sorry, FixedStrings syntax is not fully supported.\n");
214    }
215    if (TextFlag) {
216        llvm::report_fatal_error("Sorry, -a is not yet supported.\n");
217    }
218    if (BinaryFlag) {
219        llvm::report_fatal_error("Sorry, -U is not yet supported.\n");
220    }
221    if (NullDataFlag) {
222        llvm::report_fatal_error("Sorry, -z is not yet supported.\n");
223    }
224    if (NoMessagesFlag) {
225        llvm::report_fatal_error("Sorry, -s is not yet supported.\n");
226    }
227    if (NullFlag) {
228        llvm::report_fatal_error("Sorry, -Z is not yet supported.\n");
229    }
230    if (ByteOffsetFlag) {
231        llvm::report_fatal_error("Sorry, -b is not yet supported.\n");
232    }
233    if (UnixByteOffsetsFlag) {
234        llvm::report_fatal_error("Sorry, -u is not yet supported.\n");
235    }
236    if (InitialTabFlag) {
237        llvm::report_fatal_error("Sorry, -T is not yet supported.\n");
238    }
239    if (OnlyMatchingFlag) {
240        llvm::report_fatal_error("Sorry, -o is not yet supported.\n");
241    }
242    if (LineBufferedFlag) {
243        llvm::report_fatal_error("Sorry, -line-buffered is not yet supported.\n");
244    }
245    if (AfterContextFlag) {
246        llvm::report_fatal_error("Sorry, -A is not yet supported.\n");
247    }
248    if (BeforeContextFlag) {
249        llvm::report_fatal_error("Sorry, -B is not yet supported.\n");
250    }
251    if (ContextFlag) {
252        llvm::report_fatal_error("Sorry, -C is not yet supported.\n");
253    }
254    if (LabelFlag!="") {
255        llvm::report_fatal_error("Sorry, -label is not yet supported.\n");
256    }
257    if (ColorFlag!=neverColor) {
258        llvm::report_fatal_error("Sorry, -color is not yet supported.\n");
259    }
260}
261}
Note: See TracBrowser for help on using the repository browser.