source: icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp @ 5149

Last change on this file since 5149 was 5147, checked in by xuedongx, 3 years ago

remove nullable assertion

File size: 4.9 KB
Line 
1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <string>
8#include <iostream>
9#include <fstream>
10
11#include <re/re_toolchain.h>
12#include <re/re_cc.h>
13#include <re/re_nullable.h>
14#include <re/re_simplifier.h>
15#include <re/re_alt.h>
16#include <re/parsefailure.h>
17#include <re/re_parser.h>
18#include <re/re_compiler.h>
19#include <utf8_encoder.h>
20#include <cc/cc_compiler.h>
21#include <pablo/function.h>
22#include <re/printer_re.h>
23#include <llvm/Support/CommandLine.h>
24
25
26using namespace pablo;
27namespace re {
28
29static cl::OptionCategory RegexOptions("Regex Toolchain Options",
30                                              "These options control the regular expression transformation and compilation.");
31const cl::OptionCategory * re_toolchain_flags() {return &RegexOptions;};
32
33static cl::bits<RE_PrintFlags> 
34    PrintOptions(cl::values(clEnumVal(PrintAllREs, "print regular expression passes"),
35                            clEnumVal(PrintParsedREs, "print out parsed regular expressions"),
36                            clEnumVal(PrintStrippedREs, "print out REs with nullable prefixes/suffixes removed"),
37                            clEnumVal(PrintSimplifiedREs, "print out final simplified REs"),
38                            clEnumValEnd), cl::cat(RegexOptions));
39
40static cl::bits<RE_AlgorithmFlags>
41    AlgorithmOptions(cl::values(clEnumVal(DisableLog2BoundedRepetition, "disable log2 optimizations for bounded repetition of bytes"),
42                              clEnumVal(DisableIfHierarchy, "disable nested if hierarchy for generated Unicode classes (not recommended)"), 
43                              clEnumVal(DisableMatchStar, "disable MatchStar optimization"), 
44                              clEnumVal(DisableUnicodeMatchStar, "disable Unicode MatchStar optimization"),
45                              clEnumVal(DisableUnicodeLineBreak, "disable Unicode line breaks - use LF only"),
46                              clEnumValN(InvertMatches, "v", "select non-matching lines"),
47#ifndef DISABLE_PREGENERATED_UCD_FUNCTIONS
48                              clEnumVal(UsePregeneratedUnicode, "use fixed pregenerated Unicode character class sets instead"),
49#endif
50                              clEnumValEnd), 
51                   cl::cat(RegexOptions));
52
53bool AlgorithmOptionIsSet(RE_AlgorithmFlags flag) {
54    return AlgorithmOptions.isSet(flag);
55}
56
57int IfInsertionGap;
58static cl::opt<int, true> 
59    IfInsertionGapOption("if-insertion-gap",  cl::location(IfInsertionGap), cl::init(3),
60                         cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"), 
61                         cl::cat(RegexOptions));
62
63
64
65RE * regular_expression_passes(RE * re_ast)  {
66    if (PrintOptions.isSet(PrintAllREs) || PrintOptions.isSet(PrintParsedREs)) {
67        std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
68    }
69
70    //Optimization passes to simplify the AST.
71    re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);
72    if (PrintOptions.isSet(PrintAllREs) || PrintOptions.isSet(PrintStrippedREs)) {
73        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
74    }
75    re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);
76    if (PrintOptions.isSet(PrintAllREs) || PrintOptions.isSet(PrintStrippedREs)) {
77        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
78    }
79    re_ast = re::RE_Nullable::removeNullableAssertion(re_ast);
80    if (PrintOptions.isSet(PrintAllREs) || PrintOptions.isSet(PrintStrippedREs)) {
81        std::cerr << "RemoveNullableAssertion:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
82    }
83    re_ast = re::RE_Nullable::removeNullableAfterAssertion(re_ast);
84    if (PrintOptions.isSet(PrintAllREs) || PrintOptions.isSet(PrintStrippedREs)) {
85        std::cerr << "RemoveNullableAfterAssertion" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
86    }
87   
88    re_ast = re::RE_Simplifier::simplify(re_ast);
89    if (PrintOptions.isSet(PrintAllREs) || PrintOptions.isSet(PrintSimplifiedREs)) {
90        //Print to the terminal the AST that was generated by the simplifier.
91        std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
92    }
93    return re_ast;
94}
95   
96PabloFunction * re2pablo_compiler(const unsigned encodingBits, RE * re_ast, bool CountOnly) {
97    PabloFunction * function = PabloFunction::Create("process_block", encodingBits, CountOnly ? 0 : 2);
98    cc::CC_Compiler cc_compiler(*function, encodingBits);
99    re::RE_Compiler re_compiler(*function, cc_compiler, CountOnly);
100    re_compiler.initializeRequiredStreams(encodingBits);
101    re_compiler.compileUnicodeNames(re_ast);
102    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast), AlgorithmOptions.isSet(InvertMatches));
103    return function;
104}
105}
Note: See TracBrowser for help on using the repository browser.