source: icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp

Last change on this file was 5951, checked in by cameron, 2 months ago

Back reference analysis in support of future back reference compilation mode

File size: 5.5 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <toolchain/toolchain.h>
8#include <grep_interface.h>
9#include <re/re_toolchain.h>
10#include <cc/cc_compiler.h>            // for CC_Compiler
11#include <llvm/Support/CommandLine.h>  // for clEnumVal, clEnumValEnd, Optio...
12#include <re/re_compiler.h>            // for RE_Compiler
13#include <re/re_nullable.h>            // for RE_Nullable
14#include <re/re_star_normal.h>         // for RE_Star_Normal
15#include <re/re_simplifier.h>          // for RE_Simplifier
16#include <re/re_minimizer.h>
17#include <re/re_local.h>
18#include <re/printer_re.h>
19#include <re/re_analysis.h>
20#include <re/re_cc.h>
21#include <re/casing.h>
22#include <re/exclude_CC.h>
23#include <re/re_name_resolve.h>
24#include <re/grapheme_clusters.h>
25#include <llvm/Support/raw_ostream.h>
26#include <llvm/Support/ErrorHandling.h>
27#include <toolchain/toolchain.h>
28
29using namespace pablo;
30using namespace llvm;
31
32namespace re {
33
34static cl::OptionCategory RegexOptions("Regex Toolchain Options",
35                                              "These options control the regular expression transformation and compilation.");
36const cl::OptionCategory * LLVM_READONLY re_toolchain_flags() {
37    return &RegexOptions;
38}
39
40static cl::bits<RE_PrintFlags> 
41    PrintOptions(cl::values(clEnumVal(ShowREs, "Print parsed or generated regular expressions"),
42                            clEnumVal(ShowAllREs, "Print all regular expression passes"),
43                            clEnumVal(ShowStrippedREs, "Print REs with nullable prefixes/suffixes removed"),
44                            clEnumVal(ShowSimplifiedREs, "Print final simplified REs")
45                            CL_ENUM_VAL_SENTINEL), cl::cat(RegexOptions));
46
47static cl::bits<RE_AlgorithmFlags>
48    AlgorithmOptions(cl::values(clEnumVal(DisableLog2BoundedRepetition, "disable log2 optimizations for bounded repetition of bytes"),
49                              clEnumVal(DisableIfHierarchy, "disable nested if hierarchy for generated Unicode classes (not recommended)"), 
50                              clEnumVal(DisableMatchStar, "disable MatchStar optimization"), 
51                              clEnumVal(DisableUnicodeMatchStar, "disable Unicode MatchStar optimization"),
52                              clEnumVal(DisableUnicodeLineBreak, "disable Unicode line breaks - use LF only")
53                              CL_ENUM_VAL_SENTINEL), cl::cat(RegexOptions));
54
55bool LLVM_READONLY AlgorithmOptionIsSet(RE_AlgorithmFlags flag) {
56    return AlgorithmOptions.isSet(flag);
57}
58
59int IfInsertionGap;
60static cl::opt<int, true> 
61    IfInsertionGapOption("if-insertion-gap",  cl::location(IfInsertionGap), cl::init(3),
62                         cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"), 
63                         cl::cat(RegexOptions));
64
65RE * resolveModesAndExternalSymbols(RE * r, bool globallyCaseInsensitive) {
66    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
67        errs() << "Parser:\n" << Printer_RE::PrintRE(r) << '\n';
68    }
69    r = resolveGraphemeMode(r, false /* not in grapheme mode at top level*/);
70    if (PrintOptions.isSet(ShowAllREs)) {
71        errs() << "resolveGraphemeMode:\n" << Printer_RE::PrintRE(r) << '\n';
72    }
73    r = re::resolveUnicodeProperties(r);
74    if (PrintOptions.isSet(ShowAllREs)) {
75        errs() << "resolveUnicodeProperties:\n" << Printer_RE::PrintRE(r) << '\n';
76    }
77    r = resolveCaseInsensitiveMode(r, globallyCaseInsensitive);
78    if (PrintOptions.isSet(ShowAllREs)) {
79        errs() << "resolveCaseInsensitiveMode:\n" << Printer_RE::PrintRE(r) << '\n';
80    }
81    return r;
82}
83
84RE * excludeUnicodeLineBreak(RE * r) {
85    r = exclude_CC(r, re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029))));
86    if (PrintOptions.isSet(ShowAllREs)) {
87        errs() << "excludeUnicodeLineBreak:\n" << Printer_RE::PrintRE(r) << '\n';
88    }
89    return r;
90}
91
92RE * regular_expression_passes(RE * r) {
93
94    //Optimization passes to simplify the AST.
95    r = RE_Nullable::removeNullablePrefix(r);
96    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
97        errs() << "RemoveNullablePrefix:\n" << Printer_RE::PrintRE(r) << '\n';
98    }
99    r = RE_Nullable::removeNullableSuffix(r);
100    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
101        errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(r) << '\n';
102    }
103    r = RE_Star_Normal::star_normal(r);
104    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
105        //Print to the terminal the AST that was transformed to the star normal form.
106        errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(r) << '\n';
107    }
108    r = re::resolveNames(r);
109    if (PrintOptions.isSet(ShowAllREs)) {
110        errs() << "Resolve Names:\n" << Printer_RE::PrintRE(r) << '\n';
111    }
112    if (codegen::OptLevel > 1) {
113        r = RE_Minimizer::minimize(r);
114    } else {
115        r = RE_Simplifier::simplify(r);
116    }
117    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
118        //Print to the terminal the AST that was generated by the simplifier.
119        errs() << "Simplifier:\n" << Printer_RE::PrintRE(r) << '\n';
120    }
121
122    if (!DefiniteLengthBackReferencesOnly(r)) {
123        llvm::report_fatal_error("Future back reference support: references must be within a fixed distance from a fixed-length capture.");
124    }
125    return r;
126}
127
128}
Note: See TracBrowser for help on using the repository browser.