source: icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp @ 5785

Last change on this file since 5785 was 5785, checked in by cameron, 13 months ago

Small fix

File size: 7.8 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <toolchain/toolchain.h>
8#include <grep_interface.h>
9#include <re/re_toolchain.h>
10#include <cc/cc_compiler.h>            // for CC_Compiler
11#include <llvm/Support/CommandLine.h>  // for clEnumVal, clEnumValEnd, Optio...
12#include <re/re_compiler.h>            // for RE_Compiler
13#include <re/re_nullable.h>            // for RE_Nullable
14#include <re/re_star_normal.h>         // for RE_Star_Normal
15#include <re/re_simplifier.h>          // for RE_Simplifier
16#include <re/re_minimizer.h>
17#include <re/re_local.h>
18#include <re/printer_re.h>
19#include <re/re_analysis.h>
20#include <re/re_cc.h>
21#include <re/casing.h>
22#include <re/exclude_CC.h>
23#include <re/re_name_resolve.h>
24#include <re/re_collect_unicodesets.h>
25#include <re/re_multiplex.h>
26#include <re/grapheme_clusters.h>
27#include <cc/multiplex_CCs.h>
28#include <llvm/Support/raw_ostream.h>
29
30using namespace pablo;
31using namespace llvm;
32
33namespace re {
34
35static cl::OptionCategory RegexOptions("Regex Toolchain Options",
36                                              "These options control the regular expression transformation and compilation.");
37const cl::OptionCategory * re_toolchain_flags() {
38    return &RegexOptions;
39}
40
41static cl::bits<RE_PrintFlags> 
42    PrintOptions(cl::values(clEnumVal(ShowREs, "Print parsed or generated regular expressions"),
43                            clEnumVal(ShowAllREs, "Print all regular expression passes"),
44                            clEnumVal(ShowStrippedREs, "Print REs with nullable prefixes/suffixes removed"),
45                            clEnumVal(ShowSimplifiedREs, "Print final simplified REs")
46                            CL_ENUM_VAL_SENTINEL), cl::cat(RegexOptions));
47
48static cl::bits<RE_AlgorithmFlags>
49    AlgorithmOptions(cl::values(clEnumVal(DisableLog2BoundedRepetition, "disable log2 optimizations for bounded repetition of bytes"),
50                              clEnumVal(DisableIfHierarchy, "disable nested if hierarchy for generated Unicode classes (not recommended)"), 
51                              clEnumVal(DisableMatchStar, "disable MatchStar optimization"), 
52                              clEnumVal(DisableUnicodeMatchStar, "disable Unicode MatchStar optimization"),
53                              clEnumVal(DisableUnicodeLineBreak, "disable Unicode line breaks - use LF only")
54                              CL_ENUM_VAL_SENTINEL), cl::cat(RegexOptions));
55
56bool AlgorithmOptionIsSet(RE_AlgorithmFlags flag) {
57    return AlgorithmOptions.isSet(flag);
58}
59
60int IfInsertionGap;
61static cl::opt<int, true> 
62    IfInsertionGapOption("if-insertion-gap",  cl::location(IfInsertionGap), cl::init(3),
63                         cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"), 
64                         cl::cat(RegexOptions));
65
66
67std::pair<RE *, std::vector<re::CC *>> multiplexing_passes(RE * r) {
68    std::vector<re::CC *> charclasses;
69    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
70        errs() << "Parser:\n" << Printer_RE::PrintRE(r) << '\n';
71    }
72    //Optimization passes to simplify the AST.
73    r = RE_Nullable::removeNullablePrefix(r);
74    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
75        errs() << "RemoveNullablePrefix:\n" << Printer_RE::PrintRE(r) << '\n';
76    }
77    r = RE_Nullable::removeNullableSuffix(r);
78    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
79        errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(r) << '\n';
80    }
81    r = RE_Nullable::removeNullableAssertion(r);
82    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
83        errs() << "RemoveNullableAssertion:\n" << Printer_RE::PrintRE(r) << '\n';
84    }
85    r = RE_Star_Normal::star_normal(r);
86   
87    r = RE_Simplifier::simplify(r);
88   
89    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
90        //Print to the terminal the AST that was generated by the simplifier.
91        errs() << "Simplifier:\n" << Printer_RE::PrintRE(r) << '\n';
92    }
93    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
94        //Print to the terminal the AST that was transformed to the star normal form.
95        errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(r) << '\n';
96    }
97    r = resolveCaseInsensitiveMode(r, grep::IgnoreCaseFlag);
98    if (PrintOptions.isSet(ShowAllREs)) {
99        errs() << "resolveCaseInsensitiveMode:\n" << Printer_RE::PrintRE(r) << '\n';
100    }
101    r = resolveGraphemeMode(r, false /* not in grapheme mode at top level*/);
102    if (PrintOptions.isSet(ShowAllREs)) {
103        errs() << "resolveGraphemeMode:\n" << Printer_RE::PrintRE(r) << '\n';
104    }
105    r = re::resolveNames(r);
106    if (PrintOptions.isSet(ShowAllREs)) {
107        errs() << "resolveNames:\n" << Printer_RE::PrintRE(r) << '\n';
108    }
109    r = exclude_CC(r, re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029))));
110    if (PrintOptions.isSet(ShowAllREs)) {
111        errs() << "exclude_CC:\n" << Printer_RE::PrintRE(r) << '\n';
112    }
113    const auto UnicodeSets = re::collectUnicodeSets(r);
114    std::vector<std::vector<unsigned>> exclusiveSetIDs;
115    doMultiplexCCs(UnicodeSets, exclusiveSetIDs, charclasses);
116    r = multiplex(r, UnicodeSets, exclusiveSetIDs);
117    if (PrintOptions.isSet(ShowAllREs)) {
118        errs() << "multiplex:\n" << Printer_RE::PrintRE(r) << '\n';
119    }
120    return std::pair<RE *, std::vector<re::CC *>>(r, charclasses);
121}
122
123RE * regular_expression_passes(RE * r)  {
124
125    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
126        errs() << "Parser:\n" << Printer_RE::PrintRE(r) << '\n';
127    }
128
129    //Optimization passes to simplify the AST.
130    r = RE_Nullable::removeNullablePrefix(r);
131    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
132        errs() << "RemoveNullablePrefix:\n" << Printer_RE::PrintRE(r) << '\n';
133    }
134    r = RE_Nullable::removeNullableSuffix(r);
135    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
136        errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(r) << '\n';
137    }
138    r = RE_Nullable::removeNullableAssertion(r);
139    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
140        errs() << "RemoveNullableAssertion:\n" << Printer_RE::PrintRE(r) << '\n';
141    }
142    //r = RE_Nullable::removeNullableAfterAssertion(r);
143    //if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
144    //    errs() << "RemoveNullableAfterAssertion\n" << Printer_RE::PrintRE(r) << '\n';
145    //}
146
147    r = RE_Simplifier::simplify(r);
148
149    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
150        //Print to the terminal the AST that was generated by the simplifier.
151        errs() << "Simplifier:\n" << Printer_RE::PrintRE(r) << '\n';
152    }
153   
154//    r = RE_Minimizer::minimize(r);
155
156//    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
157//        //Print to the terminal the AST that was generated by the simplifier.
158//        errs() << "Minimizer:\n" << Printer_RE::PrintRE(r) << '\n';
159//    }
160
161    r = RE_Star_Normal::star_normal(r);
162
163    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
164        //Print to the terminal the AST that was transformed to the star normal form.
165        errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(r) << '\n';
166    }
167
168    return r;
169}
170   
171PabloAST * re2pablo_compiler(PabloKernel * kernel, RE * re_ast) {
172    Var * const basis = kernel->getInputStreamVar("basis");
173    cc::CC_Compiler cc_compiler(kernel, basis);
174    RE_Compiler re_compiler(kernel, cc_compiler);
175    re_ast = re_compiler.compileUnicodeNames(re_ast);
176    return re_compiler.compile(re_ast);
177}
178
179}
Note: See TracBrowser for help on using the repository browser.