Changeset 5030


Ignore:
Timestamp:
May 5, 2016, 11:08:09 AM (17 months ago)
Author:
cameron
Message:

Restructure regular expression command flags

Location:
icGREP/icgrep-devel/icgrep
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5025 r5030  
    117117    Encoding encoding(Encoding::Type::UTF_8, 8);
    118118    mIsNameExpression = isNameExpression;
    119     re_ast = regular_expression_passes(encoding, re_ast);   
    120     pablo::PabloFunction * function = re2pablo_compiler(encoding, re_ast);
     119    re_ast = re::regular_expression_passes(encoding, re_ast);   
     120    pablo::PabloFunction * function = re::re2pablo_compiler(encoding, re_ast);
    121121   
    122122
  • icGREP/icgrep-devel/icgrep/icgrep.cpp

    r5028 r5030  
    1616#include <boost/uuid/sha1.hpp>
    1717#include <toolchain.h>
     18#include <re/re_toolchain.h>
    1819#include <mutex>
    1920
     
    203204
    204205int main(int argc, char *argv[]) {
    205     cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&LegacyGrepOptions, &EnhancedGrepOptions});
     206    cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *>{&LegacyGrepOptions, &EnhancedGrepOptions, re::re_toolchain_flags()});
    206207    cl::ParseCommandLineOptions(argc, argv);
    207208   
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4980 r5030  
    55 */
    66#include <re/re_compiler.h>
     7#include <re/re_toolchain.h>
    78//Regular Expressions
    89#include <re/re_name.h>
     
    3536#include <unordered_set>
    3637
    37 static cl::OptionCategory fREcompilationOptions("Regex Compilation Options", "These options control the compilation of regular expressions to Pablo.");
    38 static cl::opt<bool> InvertMatches("v", cl::init(false),
    39                      cl::desc("select non-matching lines"), cl::cat(fREcompilationOptions));
    40 static cl::alias InvertMatchesLong("invert-matches", cl::desc("Alias for -v"), cl::aliasopt(InvertMatches));
    41 
    42 static cl::opt<bool> DisableLog2BoundedRepetition("disable-log2-bounded-repetition", cl::init(false),
    43                      cl::desc("disable log2 optimizations for bounded repetition of bytes"), cl::cat(fREcompilationOptions));
    44 static cl::opt<bool> DisableIfHierarchy("disable-if-hierarchy-strategy", cl::init(false),
    45                      cl::desc("disable nested if hierarchy for generated Unicode classes (not recommended)"), cl::cat(fREcompilationOptions));
    46 static cl::opt<int> IfInsertionGap("if-insertion-gap", cl::init(3), cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"), cl::cat(fREcompilationOptions));
    47 static cl::opt<bool> DisableMatchStar("disable-matchstar", cl::init(false),
    48                      cl::desc("disable MatchStar optimization"), cl::cat(fREcompilationOptions));
    49 static cl::opt<bool> DisableUnicodeMatchStar("disable-unicode-matchstar", cl::init(false),
    50                      cl::desc("disable Unicode MatchStar optimization"), cl::cat(fREcompilationOptions));
    51 static cl::opt<bool> DisableUnicodeLineBreak("disable-unicode-linebreak", cl::init(false),
    52                      cl::desc("disable Unicode line breaks - use LF only"), cl::cat(fREcompilationOptions));
    53 
    54 #ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    55 static cl::opt<bool> UsePregeneratedUnicode("use-pregenerated-unicode", cl::init(false),
    56                      cl::desc("use fixed pregenerated Unicode character class sets instead"), cl::cat(fREcompilationOptions));
    57 #endif
    58 
    59 #define UNICODE_LINE_BREAK (!DisableUnicodeLineBreak)
     38
     39#define UNICODE_LINE_BREAK (!AlgorithmOptionIsSet(DisableUnicodeLineBreak))
    6040
    6141using namespace pablo;
     
    173153                    } else {
    174154                        #ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
    175                         if (UsePregeneratedUnicode) {
     155                        if (AlgorithmOptionIsSet(UsePregeneratedUnicode)) {
    176156                            const std::string functionName = UCD::resolvePropertyFunction(name);
    177157                            const UCD::ExternalProperty & ep = UCD::resolveExternalProperty(functionName);
     
    307287    if (LLVM_LIKELY(nameMap.size() > 0)) {
    308288        UCD::UCDCompiler ucdCompiler(mCCCompiler);
    309         if (LLVM_UNLIKELY(DisableIfHierarchy)) {
     289        if (LLVM_UNLIKELY(AlgorithmOptionIsSet(DisableIfHierarchy))) {
    310290            ucdCompiler.generateWithoutIfHierarchy(nameMap, mPB);
    311291        } else {
     
    377357}
    378358
    379 void RE_Compiler::finalizeMatchResult(MarkerType match_result) {
     359void RE_Compiler::finalizeMatchResult(MarkerType match_result, bool InvertMatches) {
    380360    PabloAST * match_follow = mPB.createMatchStar(markerVar(match_result), mAny);
    381361    if (InvertMatches) {
     
    613593
    614594MarkerType RE_Compiler::processLowerBound(RE * repeated, int lb, MarkerType marker, PabloBuilder & pb) {
    615     if (!mGraphemeBoundaryRule && isByteLength(repeated) && !DisableLog2BoundedRepetition) {
     595    if (!mGraphemeBoundaryRule && isByteLength(repeated) && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)) {
    616596        PabloAST * cc = markerVar(compile(repeated, pb));
    617597        PabloAST * cc_lb = consecutive_matches(cc, 1, lb, pb);
     
    630610
    631611MarkerType RE_Compiler::processBoundedRep(RE * repeated, int ub, MarkerType marker, PabloBuilder & pb) {
    632     if (!mGraphemeBoundaryRule && isByteLength(repeated) && ub > 1 && !DisableLog2BoundedRepetition) {
     612    if (!mGraphemeBoundaryRule && isByteLength(repeated) && ub > 1 && !AlgorithmOptionIsSet(DisableLog2BoundedRepetition)) {
    633613        // log2 upper bound for fixed length (=1) class
    634614        // Create a mask of positions reachable within ub from current marker.
     
    656636    // always use PostPosition markers for unbounded repetition.
    657637    PabloAST * base = markerVar(AdvanceMarker(marker, MarkerPosition::InitialPostPositionByte, pb));
    658     if (!mGraphemeBoundaryRule && isByteLength(repeated)  && !DisableMatchStar) {
     638    if (!mGraphemeBoundaryRule && isByteLength(repeated)  && !AlgorithmOptionIsSet(DisableMatchStar)) {
    659639        PabloAST * cc = markerVar(compile(repeated, pb));
    660640        PabloAST * mstar = nullptr;
    661641        mstar = pb.createMatchStar(base, cc, "unbounded");
    662642        return makeMarker(MarkerPosition::InitialPostPositionByte, mstar);
    663     } else if (isUnicodeUnitLength(repeated) && !DisableMatchStar && !DisableUnicodeMatchStar) {
     643    } else if (isUnicodeUnitLength(repeated) && !AlgorithmOptionIsSet(DisableMatchStar) && !AlgorithmOptionIsSet(DisableUnicodeMatchStar)) {
    664644        PabloAST * cc = markerVar(compile(repeated, pb));
    665645        PabloAST * mstar = nullptr;
  • icGREP/icgrep-devel/icgrep/re/re_compiler.h

    r4841 r5030  
    5555    void initializeRequiredStreams();
    5656    void compileUnicodeNames(RE *& re);
    57     void finalizeMatchResult(MarkerType match_result);
     57    void finalizeMatchResult(MarkerType match_result, bool InvertMatches = false);
    5858    MarkerType compile(RE * re) {
    5959        return compile(re, mPB);
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp

    r4984 r5030  
    2525
    2626using namespace pablo;
     27namespace re {
     28
     29static cl::OptionCategory RegexOptions("Regex Toolchain Options",
     30                                              "These options control the regular expression transformation and compilation.");
     31const cl::OptionCategory * re_toolchain_flags() {return &RegexOptions;};
     32
     33static cl::bits<RE_PrintFlags>
     34    PrintOptions(cl::values(clEnumVal(PrintAllREs, "print regular expression passes"),
     35                            clEnumVal(PrintParsedREs, "print out parsed regular expressions"),
     36                            clEnumVal(PrintStrippedREs, "print out REs with nullable prefixes/suffixes removed"),
     37                            clEnumVal(PrintSimplifiedREs, "print out final simplified REs"),
     38                            clEnumValEnd), cl::cat(RegexOptions));
     39
     40static cl::bits<RE_AlgorithmFlags>
     41    AlgorithmOptions(cl::values(clEnumVal(DisableLog2BoundedRepetition, "disable log2 optimizations for bounded repetition of bytes"),
     42                              clEnumVal(DisableIfHierarchy, "disable nested if hierarchy for generated Unicode classes (not recommended)"),
     43                              clEnumVal(DisableMatchStar, "disable MatchStar optimization"),
     44                              clEnumVal(DisableUnicodeMatchStar, "disable Unicode MatchStar optimization"),
     45                              clEnumVal(DisableUnicodeLineBreak, "disable Unicode line breaks - use LF only"),
     46                              clEnumVal(InvertMatches, "select non-matching lines"),
     47#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
     48                              clEnumVal(UsePregeneratedUnicode, "use fixed pregenerated Unicode character class sets instead"),
     49#endif
     50                              clEnumValEnd),
     51                   cl::cat(RegexOptions));
     52   
     53bool AlgorithmOptionIsSet(RE_AlgorithmFlags flag) {
     54    return AlgorithmOptions.isSet(flag);
     55}
     56
     57int IfInsertionGap;
     58static cl::opt<int, true>
     59    IfInsertionGapOption("if-insertion-gap",  cl::location(IfInsertionGap), cl::init(3),
     60                         cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"),
     61                         cl::cat(RegexOptions));
    2762
    2863
    29 static cl::OptionCategory cRegexOutputOptions("Regex Dump Options",
    30                                               "These options control printing of intermediate regular expression structures.");
    31 static cl::opt<bool> PrintAllREs("print-REs", cl::init(false), cl::desc("print regular expression passes"), cl::cat(cRegexOutputOptions));
    32 static cl::opt<bool> PrintParsedREs("print-parsed-REs", cl::init(false), cl::desc("print out parsed regular expressions"), cl::cat(cRegexOutputOptions));
    33 static cl::opt<bool> PrintStrippedREs("print-stripped-REs", cl::init(false), cl::desc("print out REs with nullable prefixes/suffixes removed"), cl::cat(cRegexOutputOptions));
    34 static cl::opt<bool> PrintNamedREs("print-named-REs", cl::init(false), cl::desc("print out named REs"), cl::cat(cRegexOutputOptions));
    35 static cl::opt<bool> PrintUTF8REs("print-utf8-REs", cl::init(false), cl::desc("print out UTF-8 REs"), cl::cat(cRegexOutputOptions));
    36 static cl::opt<bool> PrintSimplifiedREs("print-simplified-REs", cl::init(false), cl::desc("print out final simplified REs"), cl::cat(cRegexOutputOptions));
    3764
    38 re::RE * regular_expression_passes(const Encoding encoding, re::RE * re_ast)  {
    39     if (PrintAllREs || PrintParsedREs) {
     65RE * regular_expression_passes(const Encoding encoding, RE * re_ast)  {
     66    if (PrintOptions.isSet(PrintAllREs) || PrintOptions.isSet(PrintParsedREs)) {
    4067        std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    4168    }
     
    4370    //Optimization passes to simplify the AST.
    4471    re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);
    45     if (PrintAllREs || PrintStrippedREs) {
     72    if (PrintOptions.isSet(PrintAllREs) || PrintOptions.isSet(PrintStrippedREs)) {
    4673        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    4774    }
    4875    re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);
    49     if (PrintAllREs || PrintStrippedREs) {
     76    if (PrintOptions.isSet(PrintAllREs) || PrintOptions.isSet(PrintStrippedREs)) {
    5077        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
    5178    }
    5279
    5380    re_ast = re::RE_Simplifier::simplify(re_ast);
    54     if (PrintAllREs || PrintSimplifiedREs) {
     81    if (PrintOptions.isSet(PrintAllREs) || PrintOptions.isSet(PrintSimplifiedREs)) {
    5582        //Print to the terminal the AST that was generated by the simplifier.
    5683        std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
     
    5986}
    6087   
    61 PabloFunction * re2pablo_compiler(const Encoding encoding, re::RE * re_ast) {
     88PabloFunction * re2pablo_compiler(const Encoding encoding, RE * re_ast) {
    6289    PabloFunction * function = PabloFunction::Create("process_block", 8, 2);
    6390    cc::CC_Compiler cc_compiler(*function, encoding);
     
    6592    re_compiler.initializeRequiredStreams();
    6693    re_compiler.compileUnicodeNames(re_ast);
    67     re_compiler.finalizeMatchResult(re_compiler.compile(re_ast));
     94    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast), AlgorithmOptions.isSet(InvertMatches));
    6895    return function;
    6996}
    70 
     97}
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.h

    r4984 r5030  
    11/*
    2  *  Copyright (c) 2015 International Characters.
     2 *  Copyright (c) 2016 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
     
    1212#include <re/re_re.h>
    1313#include <pablo/function.h>
     14#include <llvm/Support/CommandLine.h>
    1415
    15 re::RE * regular_expression_passes(const Encoding encoding, re::RE * re_ast);
     16namespace re {
    1617
    17 pablo::PabloFunction * re2pablo_compiler(const Encoding encoding, re::RE * re_ast);
     18enum RE_PrintFlags {
     19    PrintAllREs, PrintParsedREs, PrintStrippedREs, PrintSimplifiedREs
     20};
     21   
     22enum RE_AlgorithmFlags {
     23    DisableLog2BoundedRepetition, DisableIfHierarchy, DisableMatchStar, DisableUnicodeMatchStar,
     24    DisableUnicodeLineBreak, InvertMatches, UsePregeneratedUnicode
     25};
     26   
     27bool AlgorithmOptionIsSet(RE_AlgorithmFlags flag);
     28   
     29extern int IfInsertionGap;
    1830
     31const cl::OptionCategory * re_toolchain_flags();
     32
     33RE * regular_expression_passes(const Encoding encoding, RE * re_ast);
     34
     35pablo::PabloFunction * re2pablo_compiler(const Encoding encoding, RE * re_ast);
     36   
     37}
    1938#endif
  • icGREP/icgrep-devel/icgrep/wc.cpp

    r5029 r5030  
    6060};
    6161
    62 static cl::list<CountOptions> wcOptions(cl::desc("Counting options."),
     62static cl::list<CountOptions> wcOptions(
    6363  cl::values(clEnumValN(LineOption, "l", "Report the number of lines in each input file."),
    6464             clEnumValN(WordOption, "w", "Report the number of words in each input file."),
Note: See TracChangeset for help on using the changeset viewer.