source: icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp @ 5803

Last change on this file since 5803 was 5803, checked in by cameron, 15 months ago

Regular expression toolchain progress

File size: 7.5 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <toolchain/toolchain.h>
8#include <grep_interface.h>
9#include <re/re_toolchain.h>
10#include <cc/cc_compiler.h>            // for CC_Compiler
11#include <llvm/Support/CommandLine.h>  // for clEnumVal, clEnumValEnd, Optio...
12#include <re/re_compiler.h>            // for RE_Compiler
13#include <re/re_nullable.h>            // for RE_Nullable
14#include <re/re_star_normal.h>         // for RE_Star_Normal
15#include <re/re_simplifier.h>          // for RE_Simplifier
16#include <re/re_minimizer.h>
17#include <re/re_local.h>
18#include <re/printer_re.h>
19#include <re/re_analysis.h>
20#include <re/re_cc.h>
21#include <re/casing.h>
22#include <re/exclude_CC.h>
23#include <re/re_name_resolve.h>
24#include <re/grapheme_clusters.h>
25#include <llvm/Support/raw_ostream.h>
26
27using namespace pablo;
28using namespace llvm;
29
30namespace re {
31
32static cl::OptionCategory RegexOptions("Regex Toolchain Options",
33                                              "These options control the regular expression transformation and compilation.");
34const cl::OptionCategory * re_toolchain_flags() {
35    return &RegexOptions;
36}
37
38static cl::bits<RE_PrintFlags> 
39    PrintOptions(cl::values(clEnumVal(ShowREs, "Print parsed or generated regular expressions"),
40                            clEnumVal(ShowAllREs, "Print all regular expression passes"),
41                            clEnumVal(ShowStrippedREs, "Print REs with nullable prefixes/suffixes removed"),
42                            clEnumVal(ShowSimplifiedREs, "Print final simplified REs")
43                            CL_ENUM_VAL_SENTINEL), cl::cat(RegexOptions));
44
45static cl::bits<RE_AlgorithmFlags>
46    AlgorithmOptions(cl::values(clEnumVal(DisableLog2BoundedRepetition, "disable log2 optimizations for bounded repetition of bytes"),
47                              clEnumVal(DisableIfHierarchy, "disable nested if hierarchy for generated Unicode classes (not recommended)"), 
48                              clEnumVal(DisableMatchStar, "disable MatchStar optimization"), 
49                              clEnumVal(DisableUnicodeMatchStar, "disable Unicode MatchStar optimization"),
50                              clEnumVal(DisableUnicodeLineBreak, "disable Unicode line breaks - use LF only")
51                              CL_ENUM_VAL_SENTINEL), cl::cat(RegexOptions));
52
53bool AlgorithmOptionIsSet(RE_AlgorithmFlags flag) {
54    return AlgorithmOptions.isSet(flag);
55}
56
57int IfInsertionGap;
58static cl::opt<int, true> 
59    IfInsertionGapOption("if-insertion-gap",  cl::location(IfInsertionGap), cl::init(3),
60                         cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"), 
61                         cl::cat(RegexOptions));
62
63RE * resolveModesAndExternalSymbols(RE * r) {
64    r = resolveGraphemeMode(r, false /* not in grapheme mode at top level*/);
65    if (PrintOptions.isSet(ShowAllREs)) {
66        errs() << "resolveGraphemeMode:\n" << Printer_RE::PrintRE(r) << '\n';
67    }
68    r = re::resolveUnicodeProperties(r);
69    if (PrintOptions.isSet(ShowAllREs)) {
70        errs() << "resolveUnicodeProperties:\n" << Printer_RE::PrintRE(r) << '\n';
71    }
72    r = resolveCaseInsensitiveMode(r, grep::IgnoreCaseFlag);
73    if (PrintOptions.isSet(ShowAllREs)) {
74        errs() << "resolveCaseInsensitiveMode:\n" << Printer_RE::PrintRE(r) << '\n';
75    }
76}
77
78RE * excludeUnicodeLineBreak(RE * r) {
79    r = exclude_CC(r, re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029))));
80    if (PrintOptions.isSet(ShowAllREs)) {
81        errs() << "excludeUnicodeLineBreak:\n" << Printer_RE::PrintRE(r) << '\n';
82    }
83}
84
85RE * multiplexing_prepasses(RE * r) {
86    std::vector<re::CC *> charclasses;
87    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
88        errs() << "Parser:\n" << Printer_RE::PrintRE(r) << '\n';
89    }
90    //Optimization passes to simplify the AST.
91    r = RE_Nullable::removeNullablePrefix(r);
92    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
93        errs() << "RemoveNullablePrefix:\n" << Printer_RE::PrintRE(r) << '\n';
94    }
95    r = RE_Nullable::removeNullableSuffix(r);
96    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
97        errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(r) << '\n';
98    }
99    r = RE_Nullable::removeNullableAssertion(r);
100    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
101        errs() << "RemoveNullableAssertion:\n" << Printer_RE::PrintRE(r) << '\n';
102    }
103    r = RE_Star_Normal::star_normal(r);
104    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
105        //Print to the terminal the AST that was transformed to the star normal form.
106        errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(r) << '\n';
107    }
108    r = re::resolveNames(r);
109    if (PrintOptions.isSet(ShowAllREs)) {
110        errs() << "resolveNames:\n" << Printer_RE::PrintRE(r) << '\n';
111    }
112
113    r = RE_Simplifier::simplify(r);
114    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
115        //Print to the terminal the AST that was generated by the simplifier.
116        errs() << "Simplifier:\n" << Printer_RE::PrintRE(r) << '\n';
117    }
118    return r;
119}
120
121RE * regular_expression_passes(RE * r)  {
122
123    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
124        errs() << "Parser:\n" << Printer_RE::PrintRE(r) << '\n';
125    }
126
127    //Optimization passes to simplify the AST.
128    r = RE_Nullable::removeNullablePrefix(r);
129    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
130        errs() << "RemoveNullablePrefix:\n" << Printer_RE::PrintRE(r) << '\n';
131    }
132    r = RE_Nullable::removeNullableSuffix(r);
133    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
134        errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(r) << '\n';
135    }
136    r = RE_Nullable::removeNullableAssertion(r);
137    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
138        errs() << "RemoveNullableAssertion:\n" << Printer_RE::PrintRE(r) << '\n';
139    }
140    //r = RE_Nullable::removeNullableAfterAssertion(r);
141    //if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
142    //    errs() << "RemoveNullableAfterAssertion\n" << Printer_RE::PrintRE(r) << '\n';
143    //}
144
145    r = RE_Simplifier::simplify(r);
146
147    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
148        //Print to the terminal the AST that was generated by the simplifier.
149        errs() << "Simplifier:\n" << Printer_RE::PrintRE(r) << '\n';
150    }
151   
152//    r = RE_Minimizer::minimize(r);
153
154//    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
155//        //Print to the terminal the AST that was generated by the simplifier.
156//        errs() << "Minimizer:\n" << Printer_RE::PrintRE(r) << '\n';
157//    }
158
159    r = RE_Star_Normal::star_normal(r);
160
161    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
162        //Print to the terminal the AST that was transformed to the star normal form.
163        errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(r) << '\n';
164    }
165
166    return r;
167}
168   
169PabloAST * re2pablo_compiler(PabloKernel * kernel, RE * re_ast) {
170    Var * const basis = kernel->getInputStreamVar("basis");
171    cc::CC_Compiler cc_compiler(kernel, basis);
172    // compile Unicode names
173    RE_Compiler re_compiler(kernel, cc_compiler);
174    return re_compiler.compile(re_ast);
175}
176
177}
Note: See TracBrowser for help on using the repository browser.