source: icGREP/icgrep-devel/icgrep/grep_interface.cpp

Last change on this file was 6184, checked in by nmedfort, 6 months ago

Initial version of PipelineKernel? + revised StreamSet? model.

File size: 14.9 KB
Line 
1/*
2 *  Copyright (c) 2018 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <grep_interface.h>
8#include <llvm/Support/CommandLine.h>
9#include <llvm/Support/ErrorHandling.h>
10#include <llvm/Support/Signals.h>
11#include <llvm/Support/raw_ostream.h>
12#include <util/file_select.h>
13#include <toolchain/toolchain.h>
14#include <re/parsers/parser.h>
15#include <re/re_alt.h>
16#include <re/re_toolchain.h>
17#include <fstream>
18#include <string>
19
20#include <pablo/pablo_toolchain.h>
21
22using namespace llvm;
23
24namespace argv {
25
26/*
27 *  A.  Regular expression syntax, interpretation and processing.
28 */
29
30static cl::OptionCategory RE_Options("A. Regular Expression Interpretation", "These options control regular expression parsing and interpretation");
31
32re::RE_Syntax RegexpSyntax;
33static cl::opt<re::RE_Syntax, true> RegexpSyntaxOption(cl::desc("Regular expression syntax: (default PCRE)"),
34    cl::values(
35        clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
36        clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
37        clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
38        clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax"),
39        clEnumValN(re::RE_Syntax::ERE, "extended-regexp", "Alias for -E"),
40        clEnumValN(re::RE_Syntax::FixedStrings, "fixed-strings", "Alias for -F"),
41        clEnumValN(re::RE_Syntax::BRE, "basic-regexp", "Alias for -G"),
42        clEnumValN(re::RE_Syntax::PCRE, "perl-regexp", "Alias for -P"),
43        clEnumValN(re::RE_Syntax::FileGLOB, "GLOB", "Posix GLOB syntax for file name patterns"),
44        clEnumValN(re::RE_Syntax::PROSITE, "PROSITE", "PROSITE protein patterns syntax")
45        CL_ENUM_VAL_SENTINEL), cl::cat(RE_Options), cl::Grouping, cl::location(RegexpSyntax), cl::init(re::RE_Syntax::PCRE));
46
47bool IgnoreCaseFlag;
48static cl::opt<bool, true> IgnoreCaseOption("i", cl::location(IgnoreCaseFlag), cl::desc("Ignore case distinctions in the pattern and the file."), cl::cat(RE_Options), cl::Grouping);
49static cl::alias IgnoreCaseAlias("ignore-case", cl::desc("Alias for -i"), cl::aliasopt(IgnoreCaseOption));
50
51bool InvertMatchFlag;
52static cl::opt<bool, true> InvertMatchOption("v", cl::location(InvertMatchFlag), cl::desc("Invert match results: select non-matching lines."), cl::cat(RE_Options), cl::Grouping);
53static cl::alias InvertMatchAlias("invert-match", cl::desc("Alias for -v"), cl::aliasopt(InvertMatchOption));
54
55bool LineRegexpFlag;
56static cl::opt<bool, true> LineRegexpOption("x", cl::location(LineRegexpFlag), cl::desc("Require that entire lines be matched."), cl::cat(RE_Options), cl::Grouping);
57static cl::alias LineRegexpAlias("line-regexp", cl::desc("Alias for -x"), cl::aliasopt(LineRegexpOption));
58
59bool WordRegexpFlag;
60static cl::opt<bool, true> WordRegexpOption("w", cl::location(WordRegexpFlag), cl::desc("Require that that whole words be matched."), cl::cat(RE_Options), cl::Grouping);
61static cl::alias WordRegexpAlias("word-regexp", cl::desc("Alias for -w"), cl::aliasopt(WordRegexpOption));
62
63std::vector<std::string> RegexpVector;
64static cl::list<std::string, std::vector<std::string>> RegexpOption("e", cl::location(RegexpVector), cl::desc("Regular expression"), cl::ZeroOrMore, cl::cat(RE_Options), cl::Grouping);
65static cl::alias RegexpAlias("regexp", cl::desc("Alias for -e"), cl::aliasopt(RegexpOption));
66
67std::string FileFlag;
68static cl::opt<std::string, true> FileOption("f", cl::location(FileFlag), cl::desc("Take regular expressions (one per line) from a file."), cl::cat(RE_Options), cl::Grouping);
69static cl::alias FileAlias("file", cl::desc("Alias for -f"), cl::aliasopt(FileOption));
70   
71/*
72 *  B.  Grep input sources and interpretation.
73 */
74   
75static cl::OptionCategory Input_Options("B. Input Options", "These options control the input.");
76
77bool TextFlag;
78static cl::opt<bool, true> TextOption("a", cl::location(TextFlag), cl::desc("Treat each input file as text, even if it is a binary file."), cl::cat(Input_Options), cl::Grouping);
79static cl::alias TextAlias("text", cl::desc("Alias for -a"), cl::aliasopt(TextOption));
80
81bool BinaryNonMatchingFlag;
82static cl::opt<bool, true> BinaryNonMatchingOption("I", cl::location(BinaryNonMatchingFlag), cl::desc("Treat binary files as non-matching."), cl::cat(Input_Options), cl::Grouping);
83static cl::alias BinaryNonMatchingAlias("binary-non-matching", cl::desc("Alias for -I"), cl::aliasopt(BinaryNonMatchingOption));
84
85bool BinaryFlag;
86static cl::opt<bool, true> BinaryOption("U", cl::location(BinaryFlag), cl::desc("Treat each input file as a binary file, without CRLF normalization."), cl::cat(Input_Options), cl::Grouping);
87static cl::alias BinaryAlias("binary", cl::desc("Alias for -U"), cl::aliasopt(BinaryOption));
88
89bool NullDataFlag;
90static cl::opt<bool, true> NullDataOption("z", cl::location(NullDataFlag), cl::desc("Use the NUL character (codepoint 00) as the line-break character for input."), cl::cat(Input_Options), cl::Grouping);
91static cl::alias NullDataAlias("null-data", cl::desc("Alias for -z"), cl::aliasopt(NullDataOption));
92
93bool UnicodeLinesFlag;
94static cl::opt<bool, true> UnicodeLinesOption("Unicode-lines", cl::location(UnicodeLinesFlag), cl::desc("Enable Unicode line breaks (LF/VT/FF/CR/NEL/LS/PS/CRLF)"), cl::cat(Input_Options));
95
96BinaryFilesMode BinaryFilesFlag;
97static cl::opt<BinaryFilesMode, true> BinaryFilesOption("binary-files", cl::desc("Processing mode for binary files:"),
98                                                     cl::values(clEnumValN(Binary, "binary", "Report match/non-match without printing matches."),
99                                                                clEnumValN(WithoutMatch, "without-match", "Always report as non-matching."),
100                                                                clEnumValN(Text, "text", "Treat binary files as text.")
101                                                                CL_ENUM_VAL_SENTINEL), cl::cat(Input_Options), cl::location(BinaryFilesFlag), cl::init(Text));
102   
103
104   
105/*
106 *  C.  Grep output modes and options.
107 */
108   
109   
110static cl::OptionCategory Output_Options("C. Output Options",
111                                            "These options control the output.");
112   
113GrepModeType Mode;
114static cl::opt<GrepModeType, true> GrepModeOption(cl::desc("Abbreviated output mode options:"),
115    cl::values(
116        clEnumValN(CountOnly, "c", "Display only the count of matching lines per file."),
117        clEnumValN(FilesWithMatch, "l", "Display only the names of files that have at least one match to the pattern."),
118        clEnumValN(FilesWithoutMatch, "L", "Display only the names of files that do not match the pattern."),
119        clEnumValN(QuietMode, "q", "Do not generate any output and ignore errors; set the return to zero status if a match is found."),
120        clEnumValN(CountOnly, "count", "Alias for -c"),
121        clEnumValN(FilesWithMatch, "files-with-match", "Alias for -l"),
122        clEnumValN(FilesWithoutMatch, "files-without-match", "Alias for -L"),
123        clEnumValN(QuietMode, "quiet", "Alias for -q"),
124        clEnumValN(QuietMode, "silent", "Alias for -q")
125        CL_ENUM_VAL_SENTINEL), cl::cat(Output_Options), cl::Grouping, cl::location(Mode), cl::init(NormalMode));
126
127bool WithFilenameFlag;
128static cl::opt<bool, true> WithFilenameOption("H", cl::location(WithFilenameFlag), cl::desc("Show the file name with each matching line."), cl::cat(Output_Options), cl::Grouping);
129static cl::alias WithFilenameAlias("with-filename", cl::desc("Alias for -H"), cl::aliasopt(WithFilenameOption));
130
131bool NoFilenameFlag;
132static cl::opt<bool, true> NoFilenameOption("h", cl::location(NoFilenameFlag), cl::desc("Do not show filenames with maches."), cl::cat(Output_Options), cl::Grouping);
133static cl::alias NoFilenameAlias("no-filename", cl::desc("Alias for -h"), cl::aliasopt(NoFilenameOption));
134
135bool NullFlag;
136static cl::opt<bool, true> NullOption("Z", cl::location(NullFlag), cl::desc("Write NUL characters after filenames generated to output."), cl::cat(Output_Options), cl::Grouping);
137static cl::alias NullAlias("null", cl::desc("Alias for -Z"), cl::aliasopt(NullOption));
138
139bool LineNumberFlag;
140static cl::opt<bool, true> LineNumberOption("n", cl::location(LineNumberFlag), cl::desc("Show the line number with each matching line."), cl::cat(Output_Options), cl::Grouping);
141static cl::alias LineNumberAlias("line-number", cl::desc("Alias for -n"), cl::aliasopt(LineNumberOption));
142
143bool ByteOffsetFlag;
144static cl::opt<bool, true> ByteOffsetOption("b", cl::location(ByteOffsetFlag), cl::desc("Show the byte offset within the file for each matching line."), cl::cat(Output_Options), cl::Grouping);
145static cl::alias ByteOffsetAlias("byte-offset", cl::desc("Alias for -b"), cl::aliasopt(ByteOffsetOption));
146
147bool UnixByteOffsetsFlag;
148static cl::opt<bool, true> UnixByteOffsetsOption("u", cl::location(UnixByteOffsetsFlag), cl::desc("If byte offsets are displayed, report offsets as if all lines are terminated with a single LF."), cl::cat(Output_Options), cl::Grouping);
149static cl::alias UnixByteOffsetsAlias("unix-byte-offsets", cl::desc("Alias for -u"), cl::aliasopt(UnixByteOffsetsOption));
150
151bool InitialTabFlag;
152static cl::opt<bool, true> InitialTabOption("T", cl::location(InitialTabFlag), cl::desc("Line up matched line content using an inital tab character."), cl::cat(Output_Options), cl::Grouping);
153static cl::alias InitialTabAlias("initial-tab", cl::desc("Alias for -T"), cl::aliasopt(InitialTabOption));
154
155bool OnlyMatchingFlag;
156static cl::opt<bool, true> OnlyMatchingOption("o", cl::location(OnlyMatchingFlag), cl::desc("Display only the exact strings that match the pattern, with possibly multiple matches per line."), cl::cat(Output_Options), cl::Grouping);
157static cl::alias OnlyMatchingAlias("only-matching", cl::desc("Alias for -o"), cl::aliasopt(OnlyMatchingOption));
158
159std::string LabelFlag;
160    static cl::opt<std::string, true> LabelOption("label", cl::location(LabelFlag), cl::init("(standard input)"),
161                                              cl::desc("Set a label for input lines matched from stdin."), cl::cat(Output_Options));
162
163bool LineBufferedFlag;
164static cl::opt<bool, true> LineBufferedOption("line-buffered", cl::location(LineBufferedFlag), cl::desc("Buffer lines to output."), cl::cat(Output_Options));
165
166int AfterContextFlag;
167static cl::opt<int, true> AfterContextOption("A", cl::location(AfterContextFlag), cl::desc("Print <num> lines of context after each matching line."), cl::cat(Output_Options), cl::Grouping);
168static cl::alias AfterContextAlias("after-context", cl::desc("Alias for -A"), cl::aliasopt(AfterContextOption));
169
170int BeforeContextFlag;
171static cl::opt<int, true> BeforeContextOption("B", cl::location(BeforeContextFlag), cl::desc("Print <num>lines of context before each matching line."), cl::cat(Output_Options), cl::Grouping);
172static cl::alias BeforeContextAlias("before-context", cl::desc("Alias for -B"), cl::aliasopt(BeforeContextOption));
173
174int ContextFlag;
175static cl::opt<int, true> ContextOption("C", cl::location(ContextFlag), cl::desc("Print <num> lines of context before and after each matching line."), cl::cat(Output_Options), cl::Grouping);
176static cl::alias ContextAlias("context", cl::desc("Alias for -C"), cl::aliasopt(ContextOption));
177
178int MaxCountFlag;
179static cl::opt<int, true> MaxCountOption("m", cl::location(MaxCountFlag),
180                                         cl::desc("Process only the first <num> matches per file3."),
181                                         cl::cat(Output_Options), cl::Grouping);
182static cl::alias MaxCountAlias("max-count", cl::desc("Alias for -m"), cl::aliasopt(MaxCountOption));
183   
184ColoringType ColorFlag;
185static cl::opt<ColoringType, true> Color("color", cl::desc("Set colorization of the output"), cl::location(ColorFlag), cl::cat(Output_Options), cl::init(neverColor),
186                                 cl::values(clEnumValN(alwaysColor, "always", "Enable colorization"),
187                                            clEnumValN(autoColor,   "auto", "Colorize output to stdout"),
188                                            clEnumValN(neverColor,  "never", "Disable colorization")
189                                            CL_ENUM_VAL_SENTINEL));
190static cl::alias ColorAlias("colour", cl::desc("Alias for -color"), cl::aliasopt(Color));
191//
192// Handler for errors reported through llvm::report_fatal_error.  Report
193// and signal error the InternalFailure exit code.
194//
195static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
196    // Modified from LLVM's internal report_fatal_error logic.
197    #ifndef NDEBUG
198    throw std::runtime_error(Message);
199    #else
200    SmallVector<char, 64> Buffer;
201    raw_svector_ostream OS(Buffer);
202    OS << "icgrep ERROR: " << Message << "\n";
203    const auto MessageStr = OS.str();
204    ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
205    (void)written; // If something went wrong, we deliberately just give up.
206    // Run the interrupt handlers to make sure any special cleanups get done, in
207    // particular that we remove files registered with RemoveFileOnSignal.
208    llvm::sys::RunInterruptHandlers();
209    exit(InternalFailureCode);
210    #endif
211}
212
213void InitializeCommandLineInterface(int argc, char *argv[]) {
214    llvm::install_fatal_error_handler(&icgrep_error_handler);
215    codegen::ParseCommandLineOptions(argc, argv, {&RE_Options, &Input_Options, &Output_Options, re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
216    if (argv::RecursiveFlag || argv::DereferenceRecursiveFlag) {
217        argv::DirectoriesFlag = argv::Recurse;
218    }
219   
220    if (TextFlag) {
221        if (BinaryNonMatchingFlag || (BinaryFilesFlag == WithoutMatch)) {
222            llvm::report_fatal_error("Conflicting options for binary files.\n");
223        }
224        BinaryFilesFlag = Text;
225    }
226    if (BinaryNonMatchingFlag) {
227        if (BinaryFilesFlag == Binary) {
228            llvm::report_fatal_error("Conflicting options for binary files.\n");
229        }
230        BinaryFilesFlag = WithoutMatch;
231    }
232    if (BinaryFlag) {
233        llvm::report_fatal_error("Sorry, -U is not yet supported.\n");
234    }
235    if (ByteOffsetFlag) {
236        llvm::report_fatal_error("Sorry, -b is not yet supported.\n");
237    }
238    if (UnixByteOffsetsFlag) {
239        llvm::report_fatal_error("Sorry, -u is not yet supported.\n");
240    }
241    if (OnlyMatchingFlag) {
242        llvm::report_fatal_error("Sorry, -o is not yet supported.\n");
243    }
244    if (LineBufferedFlag) {
245        llvm::report_fatal_error("Sorry, -line-buffered is not yet supported.\n");
246    }
247    if (AfterContextFlag) {
248        llvm::report_fatal_error("Sorry, -A is not yet supported.\n");
249    }
250    if (BeforeContextFlag) {
251        llvm::report_fatal_error("Sorry, -B is not yet supported.\n");
252    }
253    if (ContextFlag) {
254        llvm::report_fatal_error("Sorry, -C is not yet supported.\n");
255    }
256    if (ColorFlag!=neverColor) {
257        llvm::report_fatal_error("Sorry, -color is not yet supported.\n");
258    }
259    if ((Mode == QuietMode) | (Mode == FilesWithMatch) | (Mode == FilesWithoutMatch)) {
260        MaxCountFlag = 1;
261    }
262}
263}
Note: See TracBrowser for help on using the repository browser.