Changeset 5476 for icGREP


Ignore:
Timestamp:
May 25, 2017, 11:08:15 AM (2 years ago)
Author:
cameron
Message:

Command line interface - systematic support for legacy flags

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
4 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5474 r5476  
    9797target_link_libraries (RegExpCompiler RegExpADT)
    9898
    99 add_executable(icgrep icgrep.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/match_count.cpp kernels/grep_kernel.cpp kernels/until_n.cpp)
     99add_executable(icgrep icgrep.cpp grep_interface.cpp grep_engine.cpp kernels/scanmatchgen.cpp kernels/cc_kernel.cpp kernels/cc_scan_kernel.cpp kernels/linebreak_kernel.cpp kernels/streams_merge.cpp kernels/match_count.cpp kernels/grep_kernel.cpp kernels/until_n.cpp)
    100100add_executable(u8u16 u8u16.cpp)
    101101add_executable(base64 base64.cpp kernels/radix64.cpp)
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5475 r5476  
    11/*
    2  *  Copyright (c) 2016 International Characters.
     2 *  Copyright (c) 2017 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
     
    66
    77#include "grep_engine.h"
     8#include "grep_interface.h"
    89#include <llvm/IR/Module.h>
    9 #include <llvm/Support/CommandLine.h>
    1010#include <boost/filesystem.hpp>
    1111#include <UCD/UnicodeNameData.h>
     
    4646namespace grep {
    4747
    48 static cl::OptionCategory RE_Options("A. Regular Expression Interpretation", "These options control regular expression parsing and interpretation");
    49 
    50 re::RE_Syntax RegexpSyntax;
    51 static cl::opt<re::RE_Syntax, true> RegexpSyntaxOption(cl::desc("Regular expression syntax: (default PCRE)"),
    52     cl::values(
    53         clEnumValN(re::RE_Syntax::ERE, "E", "Posix extended regular expression (ERE) syntax"),
    54         clEnumValN(re::RE_Syntax::FixedStrings, "F", "Fixed strings, separated by newlines"),
    55         clEnumValN(re::RE_Syntax::BRE, "G", "Posix basic regular expression (BRE) syntax"),
    56         clEnumValN(re::RE_Syntax::PCRE, "P", "Perl-compatible regular expression (PCRE) syntax"),
    57         clEnumValN(re::RE_Syntax::ERE, "extended-regexp", "Alias for -E"),
    58         clEnumValN(re::RE_Syntax::FixedStrings, "fixed-strings", "Alias for -F"),
    59         clEnumValN(re::RE_Syntax::BRE, "basic-regexp", "Alias for -G"),
    60         clEnumValN(re::RE_Syntax::PCRE, "perl-regexp", "Alias for -P"),
    61         clEnumValN(re::RE_Syntax::PROSITE, "PROSITE", "PROSITE protein patterns syntax"),
    62         clEnumValEnd), cl::cat(RE_Options), cl::Grouping, cl::location(RegexpSyntax), cl::init(re::RE_Syntax::PCRE));
    63 
    64 bool IgnoreCaseFlag;
    65 static cl::opt<bool, true> IgnoreCase("i", cl::desc("Ignore case distinctions in the pattern and the file (alias: -ignore-case)."),
    66                                       cl::cat(RE_Options), cl::location(IgnoreCaseFlag), cl::Grouping);
    67 static cl::alias IgnoreCaseAlias("ignore-case", cl::desc("Alias for -i"), cl::aliasopt(IgnoreCase), cl::NotHidden);
    68 
    69 bool InvertMatchFlag;
    70 static cl::opt<bool, true> InvertMatch("v", cl::desc("Invert match results: select non-matching lines (alias: -invert-match)."),
    71                                        cl::cat(RE_Options), cl::location(InvertMatchFlag), cl::Grouping);
    72 static cl::alias InvertMatchAlias("invert-match", cl::desc("Alias for -v"), cl::aliasopt(InvertMatch), cl::NotHidden);
    73 
    74 bool LineRegexpFlag;
    75 static cl::opt<bool, true> LineRegexp("x", cl::desc("Require that entire lines be matched (alias: -line-regexp)."), cl::cat(RE_Options),
    76                                       cl::location(LineRegexpFlag), cl::Grouping);
    77 static cl::alias LineRegexpAlias("line-regexp", cl::desc("Alias for -x"), cl::aliasopt(LineRegexp), cl::NotHidden);
    78 
    79 bool WordRegexpFlag;
    80 static cl::opt<bool, true> WordRegexp("w", cl::desc("Require that that whole words be matched (alias: -word-regexp)."), cl::cat(RE_Options),
    81                                       cl::location(WordRegexpFlag), cl::Grouping);
    82 static cl::alias WordRegexpAlias("word-regexp", cl::desc("Alias for -w"), cl::aliasopt(WordRegexp), cl::NotHidden);
    83 
    84 const cl::OptionCategory * grep_regexp_flags() {
    85     return &RE_Options;
    86 }
    87 
    88 static cl::OptionCategory GrepInputOptions("B. Input Options",
    89                                              "These options control the input.");
    90 
    91 static cl::opt<bool> NullData("z", cl::desc("Use the NUL character (codepoint 00) as the line-break character for input."), cl::cat(GrepInputOptions), cl::Grouping);
    92 static cl::alias NullDataAlias("null-data", cl::desc("Alias for -z"), cl::aliasopt(NullData));
    93 
    94 bool RecursiveFlag;
    95 static cl::opt<bool, true> Recursive("r", cl::desc("Recursively process files within directories, (but follow only top-level symlinks unless -R)."),
    96                                cl::location(RecursiveFlag), cl::cat(GrepInputOptions), cl::Grouping);
    97 static cl::alias RecursiveAlias("recursive", cl::desc("Alias for -r"), cl::aliasopt(Recursive));
    98 
    99 bool DereferenceRecursiveFlag;
    100 static cl::opt<bool, true> DereferenceRecursive("R", cl::desc("Recursively process files within directories, following symlinks at all levels."),
    101                                           cl::location(DereferenceRecursiveFlag), cl::cat(GrepInputOptions), cl::Grouping);
    102 static cl::alias DereferenceRecursiveAlias("dereference-recursive", cl::desc("Alias for -R"), cl::aliasopt(DereferenceRecursive));
    103 
    104 
    105 
    106 
    107 static cl::OptionCategory bGrepOutputOptions("C. Output Options",
    108                                              "These options control the output.");
    109 
    110 GrepModeType Mode;
    111 static cl::opt<GrepModeType, true> GrepModeOption(cl::desc("Abbreviated output mode options:"),
    112     cl::values(
    113         clEnumValN(CountOnly, "c", "Display only the count of matching lines per file."),
    114         clEnumValN(FilesWithMatch, "l", "Display only the names of files that have at least one match to the pattern."),
    115         clEnumValN(FilesWithoutMatch, "L", "Display only the names of files that do not match the pattern."),
    116         clEnumValN(QuietMode, "q", "Do not generate any output and ignore errors; set the return to zero status if a match is found."),
    117         clEnumValN(CountOnly, "count", "Alias for -c"),
    118         clEnumValN(FilesWithMatch, "files-with-match", "Alias for -l"),
    119         clEnumValN(FilesWithoutMatch, "files-without-match", "Alias for -L"),
    120         clEnumValN(QuietMode, "quiet", "Alias for -q"),
    121         clEnumValN(QuietMode, "silent", "Alias for -q"),
    122         clEnumValEnd), cl::cat(bGrepOutputOptions), cl::Grouping, cl::location(Mode), cl::init(NormalMode));
    123 
    124 
    125 static cl::opt<bool> SilenceFileErrors("s", cl::desc("Suppress messages for file errors."), cl::init(false),  cl::cat(bGrepOutputOptions));
    126 
    127 static cl::opt<bool> NormalizeLineBreaks("normalize-line-breaks", cl::desc("Normalize line breaks to std::endl."), cl::init(false),  cl::cat(bGrepOutputOptions));
    128 
    129 static cl::opt<bool> ShowFileNames("H", cl::desc("Show the file name with each matching line."), cl::cat(bGrepOutputOptions));
    130 static cl::alias ShowFileNamesLong("with-filename", cl::desc("Alias for -H"), cl::aliasopt(ShowFileNames));
    131 
    132 static cl::opt<bool> ShowLineNumbers("n", cl::desc("Show the line number with each matching line."), cl::cat(bGrepOutputOptions));
    133 static cl::alias ShowLineNumbersLong("line-number", cl::desc("Alias for -n"), cl::aliasopt(ShowLineNumbers));
    134 
    135 static cl::opt<int> MaxCount("m", cl::desc("Limit the number of matches per file."), cl::cat(bGrepOutputOptions), cl::init((size_t) 0), cl::Prefix);
    136 static cl::alias MaxCountLong("max-count", cl::desc("Alias for -m"), cl::aliasopt(MaxCount));
    137 
    138 static cl::opt<int> AfterContext("A", cl::desc("Print <num> lines of context after each matching line."), cl::cat(bGrepOutputOptions), cl::Prefix);
    139 static cl::alias AfterContextAlias("after-context", cl::desc("Alias for -A"), cl::aliasopt(AfterContext));
    140 
    141 static cl::opt<int> BeforeContext("B", cl::desc("Print <num>lines of context before each matching line."), cl::cat(bGrepOutputOptions), cl::Prefix);
    142 static cl::alias BeforeContextAlias("before-context", cl::desc("Alias for -B"), cl::aliasopt(BeforeContext));
    143 
    144 static cl::opt<int> Context("C", cl::desc("Print <num> lines of context before and after each matching line."), cl::cat(bGrepOutputOptions), cl::Prefix);
    145 static cl::alias ContextAlias("context", cl::desc("Alias for -C"), cl::aliasopt(Context));
    146 
    147 static cl::opt<bool> OnlyMatching("o", cl::desc("Display only the exact strings that match the pattern, with possibly multiple matches per line."), cl::cat(bGrepOutputOptions), cl::Grouping);
    148 static cl::alias OnlyMatchingAlias("only-matching", cl::desc("Alias for -o"), cl::aliasopt(OnlyMatching));
    149 
    150 static cl::opt<bool> Null("Z", cl::desc("Write NUL characters after filenames generated to output."), cl::cat(bGrepOutputOptions), cl::Grouping);
    151 static cl::alias NullAlias("null", cl::desc("Alias for -Z"), cl::aliasopt(Null));
    152 
    153 static cl::opt<bool> ByteOffset("b", cl::desc("Show the byte offset within the file for each matching line."), cl::cat(bGrepOutputOptions), cl::Grouping);
    154 static cl::alias ByteOffsetAlias("byte-offset", cl::desc("Alias for -b"), cl::aliasopt(ByteOffset));
    155 
    156 static cl::opt<bool> UnixByteOffsets("u", cl::desc("If byte offsets are displayed, report offsets as if all lines are terminated with a single LF."), cl::cat(bGrepOutputOptions), cl::Grouping);
    157 static cl::alias UnixByteOffsetsAlias("unix-byte-offsets", cl::desc("Alias for -u"), cl::aliasopt(UnixByteOffsets));
    158 
    159 static cl::opt<bool> InitialTab("T", cl::desc("Line up matched line content using an inital tab character."), cl::cat(bGrepOutputOptions), cl::Grouping);
    160 static cl::alias InitialTabAlias("initial-tab", cl::desc("Alias for -T"), cl::aliasopt(InitialTab));
    161 
    162 
    163 const cl::OptionCategory * grep_output_flags() {
    164     return &bGrepOutputOptions;
    165 }
    166 
    167 const cl::OptionCategory * grep_input_flags() {
    168     return &GrepInputOptions;
    169 }
    170 
    17148
    17249static re::CC * parsedCodePointSet = nullptr;
     
    254131void initFileResult(std::vector<std::string> filenames){
    255132    const int n = filenames.size();
    256     if (n > 1) {
    257         ShowFileNames = true;
     133    if ((n > 1) && !NoFilenameFlag) {
     134        WithFilenameFlag = true;
    258135    }
    259136    inputFiles = filenames;
     
    275152    assert (line_end <= filesize);
    276153
    277     if (ShowFileNames) {
     154    if (WithFilenameFlag) {
    278155        resultStrs[fileIdx] << inputFiles[fileIdx] << ':';
    279156    }
    280     if (ShowLineNumbers) {
     157    if (LineNumberFlag) {
    281158        // Internally line numbers are counted from 0.  For display, adjust
    282159        // the line number so that lines are numbered from 1.
     
    292169        // The match position is at end-of-file.   We have a final unterminated line.
    293170        resultStrs[fileIdx].write((char *)&buffer[line_start], (line_end - line_start) * sizeof(CodeUnit));
    294         if (NormalizeLineBreaks) {
     171        if (NormalizeLineBreaksFlag) {
    295172            resultStrs[fileIdx] << '\n';  // terminate it
    296173        }
    297174    } else {
    298175        const auto end_byte = buffer[line_end];
    299         if (NormalizeLineBreaks) {
     176        if (grep::NormalizeLineBreaksFlag) {
    300177            if (LLVM_UNLIKELY(end_byte == 0x85)) {
    301178                // Line terminated with NEL, on the second byte.  Back up 1.
     
    335212    if (grepMode == CountOnly) {
    336213        size_t total = 0;
    337         if (!ShowFileNames) {
     214        if (!WithFilenameFlag) {
    338215            for (unsigned i = 0; i < inputFiles.size(); ++i) {
    339216                std::cout << total_CountOnly[i] << std::endl;
     
    500377    kernel::Kernel * sourceK = nullptr;
    501378   
    502     size_t MatchLimit = ((grepMode == QuietMode) | (grepMode == FilesWithMatch) | (grepMode == FilesWithoutMatch)) ? 1 : MaxCount;
     379    size_t MatchLimit = ((grepMode == QuietMode) | (grepMode == FilesWithMatch) | (grepMode == FilesWithoutMatch)) ? 1 : MaxCountFlag;
    503380
    504381    if (grepSource == GrepSource::Internal) {
     
    574451    }
    575452   
    576     if (InvertMatch) {
     453    if (InvertMatchFlag) {
    577454        kernel::Kernel * invertK = mGrepDriver->addKernelInstance(make_unique<kernel::InvertMatchesKernel>(idb));
    578455        StreamSetBuffer * OriginalMatches = MergedResults;
  • icGREP/icgrep-devel/icgrep/grep_engine.h

    r5474 r5476  
    11/*
    2  *  Copyright (c) 2016 International Characters.
     2 *  Copyright (c) 2017 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
    55 */
    6 #ifndef DO_GREP_H
    7 #define DO_GREP_H
     6#ifndef GREP_ENGINE_H
     7#define GREP_ENGINE_H
     8#include <grep_interface.h>
    89#include <grep_type.h>  // for GrepType, GrepType::Normal
    910#include <string>       // for string
    1011#include <vector>
    11 #include <re/re_parser.h>  // for
     12#include <re/re_parser.h> 
    1213
    1314namespace re { class CC; }
     
    1819
    1920namespace grep {
    20 
    21 // Regular expression syntax, interpretation and processing.
    22 extern re::RE_Syntax RegexpSyntax;
    23 extern bool IgnoreCaseFlag;
    24 extern bool InvertMatchFlag;
    25 extern bool LineRegexpFlag;
    26 extern bool WordRegexpFlag;
    27 
    28 // Grep input sources and interpretation
    29 extern bool RecursiveFlag;
    30 extern bool DereferenceRecursiveFlag;
    31 
    32 // Grep output modes and flags.
    33 enum GrepModeType {QuietMode, FilesWithMatch, FilesWithoutMatch, CountOnly, NormalMode};
    34 extern GrepModeType Mode;
    35 
    36 
    37 const llvm::cl::OptionCategory * grep_regexp_flags();
    38 const llvm::cl::OptionCategory * grep_input_flags();
    39 const llvm::cl::OptionCategory * grep_output_flags();
    4021
    4122class GrepEngine {
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5474 r5476  
    1818#include <re/re_utility.h>
    1919#include <grep_engine.h>
     20#include <grep_interface.h>
    2021#include <fstream>
    2122#include <string>
     
    5455static cl::opt<int> REsPerGroup("re-num", cl::desc("Number of regular expressions processed by each kernel."), cl::init(1));
    5556static std::vector<std::string> allFiles;
    56 //
    57 // Handler for errors reported through llvm::report_fatal_error.  Report
    58 // and signal error code 2 (grep convention).
    59 //
    60 static void icgrep_error_handler(void *UserData, const std::string &Message, bool GenCrashDiag) {
    61     #ifndef NDEBUG
    62     throw std::runtime_error(Message);
    63     #else
    64     // Modified from LLVM's internal report_fatal_error logic.
    65     SmallVector<char, 64> Buffer;
    66     raw_svector_ostream OS(Buffer);
    67     OS << "icgrep ERROR: " << Message << "\n";
    68     StringRef MessageStr = OS.str();
    69     ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
    70     (void)written; // If something went wrong, we deliberately just give up.
    71     // Run the interrupt handlers to make sure any special cleanups get done, in
    72     // particular that we remove files registered with RemoveFileOnSignal.
    73     llvm::sys::RunInterruptHandlers();
    74     exit(2);
    75     #endif
    76 }
    7757
    7858static re::ModeFlagSet globalFlags = 0;
     
    214194
    215195int main(int argc, char *argv[]) {
    216    
    217     llvm::install_fatal_error_handler(&icgrep_error_handler);
    218     AddParabixVersionPrinter();
    219 #ifndef USE_LLVM_3_6
    220     cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&LegacyGrepOptions, &EnhancedGrepOptions, grep::grep_regexp_flags(), grep::grep_output_flags(), re::re_toolchain_flags(), pablo::pablo_toolchain_flags(), codegen::codegen_flags()});
    221 #endif
    222     cl::ParseCommandLineOptions(argc, argv);
    223     if (grep::RegexpSyntax == re::RE_Syntax::FixedStrings) {
    224         llvm::report_fatal_error("Sorry, FixedStrings syntax is not fully supported\n.");
    225     }
    226 
     196
     197    grep::InitializeCommandLineInterface(argc, argv);
     198   
    227199    const auto REs = readExpressions();
    228200
Note: See TracChangeset for help on using the changeset viewer.