source: icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp @ 5795

Last change on this file since 5795 was 5795, checked in by cameron, 15 months ago

Adding Alphabet to CCs: initial check-in

File size: 7.5 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <toolchain/toolchain.h>
8#include <grep_interface.h>
9#include <re/re_toolchain.h>
10#include <cc/cc_compiler.h>            // for CC_Compiler
11#include <llvm/Support/CommandLine.h>  // for clEnumVal, clEnumValEnd, Optio...
12#include <re/re_compiler.h>            // for RE_Compiler
13#include <re/re_nullable.h>            // for RE_Nullable
14#include <re/re_star_normal.h>         // for RE_Star_Normal
15#include <re/re_simplifier.h>          // for RE_Simplifier
16#include <re/re_minimizer.h>
17#include <re/re_local.h>
18#include <re/printer_re.h>
19#include <re/re_analysis.h>
20#include <re/re_cc.h>
21#include <re/casing.h>
22#include <re/exclude_CC.h>
23#include <re/re_name_resolve.h>
24#include <re/grapheme_clusters.h>
25#include <llvm/Support/raw_ostream.h>
26
27using namespace pablo;
28using namespace llvm;
29
30namespace re {
31
32static cl::OptionCategory RegexOptions("Regex Toolchain Options",
33                                              "These options control the regular expression transformation and compilation.");
34const cl::OptionCategory * re_toolchain_flags() {
35    return &RegexOptions;
36}
37
38static cl::bits<RE_PrintFlags> 
39    PrintOptions(cl::values(clEnumVal(ShowREs, "Print parsed or generated regular expressions"),
40                            clEnumVal(ShowAllREs, "Print all regular expression passes"),
41                            clEnumVal(ShowStrippedREs, "Print REs with nullable prefixes/suffixes removed"),
42                            clEnumVal(ShowSimplifiedREs, "Print final simplified REs")
43                            CL_ENUM_VAL_SENTINEL), cl::cat(RegexOptions));
44
45static cl::bits<RE_AlgorithmFlags>
46    AlgorithmOptions(cl::values(clEnumVal(DisableLog2BoundedRepetition, "disable log2 optimizations for bounded repetition of bytes"),
47                              clEnumVal(DisableIfHierarchy, "disable nested if hierarchy for generated Unicode classes (not recommended)"), 
48                              clEnumVal(DisableMatchStar, "disable MatchStar optimization"), 
49                              clEnumVal(DisableUnicodeMatchStar, "disable Unicode MatchStar optimization"),
50                              clEnumVal(DisableUnicodeLineBreak, "disable Unicode line breaks - use LF only")
51                              CL_ENUM_VAL_SENTINEL), cl::cat(RegexOptions));
52
53bool AlgorithmOptionIsSet(RE_AlgorithmFlags flag) {
54    return AlgorithmOptions.isSet(flag);
55}
56
57int IfInsertionGap;
58static cl::opt<int, true> 
59    IfInsertionGapOption("if-insertion-gap",  cl::location(IfInsertionGap), cl::init(3),
60                         cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"), 
61                         cl::cat(RegexOptions));
62
63
64RE * multiplexing_prepasses(RE * r) {
65    std::vector<re::CC *> charclasses;
66    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
67        errs() << "Parser:\n" << Printer_RE::PrintRE(r) << '\n';
68    }
69    //Optimization passes to simplify the AST.
70    r = RE_Nullable::removeNullablePrefix(r);
71    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
72        errs() << "RemoveNullablePrefix:\n" << Printer_RE::PrintRE(r) << '\n';
73    }
74    r = RE_Nullable::removeNullableSuffix(r);
75    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
76        errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(r) << '\n';
77    }
78    r = RE_Nullable::removeNullableAssertion(r);
79    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
80        errs() << "RemoveNullableAssertion:\n" << Printer_RE::PrintRE(r) << '\n';
81    }
82    r = RE_Star_Normal::star_normal(r);
83
84    r = resolveGraphemeMode(r, false /* not in grapheme mode at top level*/);
85    if (PrintOptions.isSet(ShowAllREs)) {
86        errs() << "resolveGraphemeMode:\n" << Printer_RE::PrintRE(r) << '\n';
87    }
88    r = re::resolveUnicodeProperties(r);
89    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
90        errs() << "resolveUnicodeProperties:\n" << Printer_RE::PrintRE(r) << '\n';
91    }
92
93    r = RE_Simplifier::simplify(r);
94   
95    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
96        //Print to the terminal the AST that was generated by the simplifier.
97        errs() << "Simplifier:\n" << Printer_RE::PrintRE(r) << '\n';
98    }
99    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
100        //Print to the terminal the AST that was transformed to the star normal form.
101        errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(r) << '\n';
102    }
103    r = resolveCaseInsensitiveMode(r, grep::IgnoreCaseFlag);
104    if (PrintOptions.isSet(ShowAllREs)) {
105        errs() << "resolveCaseInsensitiveMode:\n" << Printer_RE::PrintRE(r) << '\n';
106    }
107    r = re::resolveNames(r);
108    if (PrintOptions.isSet(ShowAllREs)) {
109        errs() << "resolveNames:\n" << Printer_RE::PrintRE(r) << '\n';
110    }
111    r = exclude_CC(r, re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029))));
112    if (PrintOptions.isSet(ShowAllREs)) {
113        errs() << "exclude_CC:\n" << Printer_RE::PrintRE(r) << '\n';
114    }
115    return r;
116}
117
118RE * regular_expression_passes(RE * r)  {
119
120    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
121        errs() << "Parser:\n" << Printer_RE::PrintRE(r) << '\n';
122    }
123
124    //Optimization passes to simplify the AST.
125    r = RE_Nullable::removeNullablePrefix(r);
126    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
127        errs() << "RemoveNullablePrefix:\n" << Printer_RE::PrintRE(r) << '\n';
128    }
129    r = RE_Nullable::removeNullableSuffix(r);
130    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
131        errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(r) << '\n';
132    }
133    r = RE_Nullable::removeNullableAssertion(r);
134    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
135        errs() << "RemoveNullableAssertion:\n" << Printer_RE::PrintRE(r) << '\n';
136    }
137    //r = RE_Nullable::removeNullableAfterAssertion(r);
138    //if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
139    //    errs() << "RemoveNullableAfterAssertion\n" << Printer_RE::PrintRE(r) << '\n';
140    //}
141
142    r = RE_Simplifier::simplify(r);
143
144    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
145        //Print to the terminal the AST that was generated by the simplifier.
146        errs() << "Simplifier:\n" << Printer_RE::PrintRE(r) << '\n';
147    }
148   
149//    r = RE_Minimizer::minimize(r);
150
151//    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
152//        //Print to the terminal the AST that was generated by the simplifier.
153//        errs() << "Minimizer:\n" << Printer_RE::PrintRE(r) << '\n';
154//    }
155
156    r = RE_Star_Normal::star_normal(r);
157
158    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
159        //Print to the terminal the AST that was transformed to the star normal form.
160        errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(r) << '\n';
161    }
162
163    return r;
164}
165   
166PabloAST * re2pablo_compiler(PabloKernel * kernel, RE * re_ast) {
167    Var * const basis = kernel->getInputStreamVar("basis");
168    cc::CC_Compiler cc_compiler(kernel, basis);
169    // compile Unicode names
170    RE_Compiler re_compiler(kernel, cc_compiler);
171    return re_compiler.compile(re_ast);
172}
173
174}
Note: See TracBrowser for help on using the repository browser.