source: icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp @ 5493

Last change on this file since 5493 was 5493, checked in by cameron, 2 years ago

Restore check-ins from the last several days

File size: 4.9 KB
RevLine 
[4984]1/*
2 *  Copyright (c) 2015 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7#include <re/re_toolchain.h>
[5267]8#include <cc/cc_compiler.h>            // for CC_Compiler
9#include <llvm/Support/CommandLine.h>  // for clEnumVal, clEnumValEnd, Optio...
10#include <re/re_compiler.h>            // for RE_Compiler
11#include <re/re_nullable.h>            // for RE_Nullable
[5493]12#include <re/re_star_normal.h>         // for RE_Star_Normal
[5267]13#include <re/re_simplifier.h>          // for RE_Simplifier
[4984]14#include <re/printer_re.h>
[5267]15#include <iostream>
[4984]16
[5267]17using namespace pablo;
18using namespace llvm;
[4984]19
[5030]20namespace re {
[4984]21
[5030]22static cl::OptionCategory RegexOptions("Regex Toolchain Options",
23                                              "These options control the regular expression transformation and compilation.");
[5202]24const cl::OptionCategory * re_toolchain_flags() {
25    return &RegexOptions;
26}
[4984]27
[5030]28static cl::bits<RE_PrintFlags> 
[5295]29    PrintOptions(cl::values(clEnumVal(ShowREs, "Print parsed or generated regular expressions"),
30                            clEnumVal(ShowAllREs, "Print all regular expression passes"),
31                            clEnumVal(ShowStrippedREs, "Print REs with nullable prefixes/suffixes removed"),
32                            clEnumVal(ShowSimplifiedREs, "Print final simplified REs"),
[5030]33                            clEnumValEnd), cl::cat(RegexOptions));
[4984]34
[5030]35static cl::bits<RE_AlgorithmFlags>
36    AlgorithmOptions(cl::values(clEnumVal(DisableLog2BoundedRepetition, "disable log2 optimizations for bounded repetition of bytes"),
37                              clEnumVal(DisableIfHierarchy, "disable nested if hierarchy for generated Unicode classes (not recommended)"), 
38                              clEnumVal(DisableMatchStar, "disable MatchStar optimization"), 
39                              clEnumVal(DisableUnicodeMatchStar, "disable Unicode MatchStar optimization"),
40                              clEnumVal(DisableUnicodeLineBreak, "disable Unicode line breaks - use LF only"),
41                              clEnumValEnd), 
42                   cl::cat(RegexOptions));
[5033]43
[5030]44bool AlgorithmOptionIsSet(RE_AlgorithmFlags flag) {
45    return AlgorithmOptions.isSet(flag);
46}
47
48int IfInsertionGap;
49static cl::opt<int, true> 
50    IfInsertionGapOption("if-insertion-gap",  cl::location(IfInsertionGap), cl::init(3),
51                         cl::desc("minimum number of nonempty elements between inserted if short-circuit tests"), 
52                         cl::cat(RegexOptions));
53
54
55
[5137]56RE * regular_expression_passes(RE * re_ast)  {
[5295]57    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
[4984]58        std::cerr << "Parser:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
59    }
60
61    //Optimization passes to simplify the AST.
62    re_ast = re::RE_Nullable::removeNullablePrefix(re_ast);
[5295]63    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
[4984]64        std::cerr << "RemoveNullablePrefix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
65    }
66    re_ast = re::RE_Nullable::removeNullableSuffix(re_ast);
[5295]67    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
[4984]68        std::cerr << "RemoveNullableSuffix:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
69    }
[5147]70    re_ast = re::RE_Nullable::removeNullableAssertion(re_ast);
[5295]71    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
[5147]72        std::cerr << "RemoveNullableAssertion:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
73    }
[5308]74    //re_ast = re::RE_Nullable::removeNullableAfterAssertion(re_ast);
75    //if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
76    //    std::cerr << "RemoveNullableAfterAssertion" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
77    //}
[5147]78   
[4984]79    re_ast = re::RE_Simplifier::simplify(re_ast);
[5295]80    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
[4984]81        //Print to the terminal the AST that was generated by the simplifier.
82        std::cerr << "Simplifier:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
83    }
[5493]84
85    re_ast = re::RE_Star_Normal::star_normal(re_ast);
86    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
87        //Print to the terminal the AST that was transformed to the star normal form.
88        std::cerr << "Star_Normal_Form:" << std::endl << Printer_RE::PrintRE(re_ast) << std::endl;
89    }   
[4984]90    return re_ast;
91}
92   
[5413]93void re2pablo_compiler(PabloKernel * kernel, RE * re_ast) {
[5310]94    Var * const basis = kernel->getInputStreamVar("basis");
[5357]95    Var * const linebreak = kernel->getInputStreamVar("linebreak");
[5310]96    cc::CC_Compiler cc_compiler(kernel, basis);
[5413]97    re::RE_Compiler re_compiler(kernel, cc_compiler);
[5357]98    re_compiler.initializeRequiredStreams(basis->getType()->getArrayNumElements(), linebreak);
[4984]99    re_compiler.compileUnicodeNames(re_ast);
[5413]100    re_compiler.finalizeMatchResult(re_compiler.compile(re_ast));
[4984]101}
[5202]102
[5033]103}
Note: See TracBrowser for help on using the repository browser.