Changeset 6160


Ignore:
Timestamp:
Sep 15, 2018, 1:28:02 PM (8 days ago)
Author:
cameron
Message:

Generic RE_Transformer

Location:
icGREP/icgrep-devel/icgrep/re
Files:
14 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_analysis.cpp

    r6154 r6160  
    501501    if (const CC * cc = dyn_cast<CC>(re)) {
    502502        if (cc->getAlphabet() == &cc::Unicode) {
    503             gatherTests(toUTF8(re));
     503            gatherTests(UTF8_Transformer().transform(re));
    504504        } else {
    505505            for (const auto range : *cc) {
  • icGREP/icgrep-devel/icgrep/re/re_analysis.h

    r6153 r6160  
    4343bool DefiniteLengthBackReferencesOnly(const RE * re);
    4444   
    45 void UndefinedNameError (const Name * n);
    4645}
    4746
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r6133 r6160  
    129129        return makeMarker(FinalMatchUnit, pb.createAnd(nextPos, pb.createInFile(mCCCompiler.compileCC(cc, pb))));
    130130    } else if (a == &cc::Unicode) {
    131         MarkerType m = compile(toUTF8(cc), pb);
     131        MarkerType m = compile(UTF8_Transformer().transform(cc), pb);
    132132        if (isByteLength(cc)) {
    133133            if (marker.pos == FinalMatchUnit) {
  • icGREP/icgrep-devel/icgrep/re/re_local.cpp

    r5812 r6160  
    1010#include <re/re_analysis.h>
    1111#include <re/re_nullable.h>
     12#include <re/re_utility.h>
    1213#include <boost/container/flat_map.hpp>
    1314#include <boost/range/adaptor/reversed.hpp>
  • icGREP/icgrep-devel/icgrep/re/re_multiplex.cpp

    r5897 r6160  
    142142};
    143143
     144    RE * CC_multiplexer::transformCC(CC * cc) {
     145        if (cc->getAlphabet() == mMultiplexedAlphabet->getSourceAlphabet()) {
     146            return mMultiplexedAlphabet->transformCC(cc);
     147        }
     148        return cc;
     149    }
     150   
     151    RE * CC_multiplexer::transformName(Name * name) {
     152        if (LLVM_LIKELY(name->getDefinition() != nullptr)) {
     153            RE * xfrm = transform(name->getDefinition());
     154            if (name->getType() == Name::Type::ZeroWidth)
     155                return makeZeroWidth(name->getName(), xfrm);
     156            else if (name->getType() == Name::Type::Capture)
     157                return makeCapture(name->getName(), xfrm);
     158            else
     159                return makeName(name->getName(), xfrm);
     160        } else {
     161            UndefinedNameError(name);
     162        }
     163    }
     164   
    144165
    145166}
  • icGREP/icgrep-devel/icgrep/re/re_multiplex.h

    r5801 r6160  
    44#include <UCD/ucd_compiler.hpp>
    55#include <cc/multiplex_CCs.h>
     6#include <re/re_utility.h>
    67
    78namespace re {
     
    1718    RE * transformCCs(cc::MultiplexedAlphabet * mpx, RE * r);
    1819
     20   
     21    class CC_multiplexer : public RE_Transformer {
     22    public:
     23        CC_multiplexer(cc::MultiplexedAlphabet * mpx) : RE_Transformer(), mMultiplexedAlphabet(mpx) {}
     24        RE * transformCC(CC *) override;
     25        RE * transformName(Name *) override;
     26    private:
     27        cc::MultiplexedAlphabet * mMultiplexedAlphabet;
     28    };
    1929
    2030}
  • icGREP/icgrep-devel/icgrep/re/re_name_resolve.cpp

    r5911 r6160  
    1515#include <re/re_end.h>
    1616#include <re/re_any.h>
     17#include <re/re_utility.h>
    1718#include <re/re_memoizer.hpp>
    1819#include <UCD/resolve_properties.h>
  • icGREP/icgrep-devel/icgrep/re/re_star_normal.cpp

    r5866 r6160  
    1818namespace re {
    1919
    20 RE * RE_Star_Normal::star_rule(RE * re) {
     20RE * star_rule(RE * re) {
    2121    if (Seq * seq = dyn_cast<Seq>(re)) {
    2222        if (RE_Nullable::isNullable(re)) {
     
    3838}
    3939
    40 RE * RE_Star_Normal::star_normal(RE * re) {
    41     if (Alt * alt = dyn_cast<Alt>(re)) {
    42         std::vector<RE *> list;
    43         list.reserve(alt->size());
    44         for (RE * re : *alt) {
    45             list.push_back(star_normal(re));
    46         }
    47         re = makeAlt(list.begin(), list.end());
    48     } else if (Seq * seq = dyn_cast<Seq>(re)) {
    49         std::vector<RE *> list;
    50         list.reserve(seq->size());
    51         for (RE * re : *seq) {
    52             list.push_back(star_normal(re));
    53         }
    54         re = makeSeq(list.begin(), list.end());
    55     } else if (Assertion * a = dyn_cast<Assertion>(re)) {
    56         re = makeAssertion(star_normal(a->getAsserted()), a->getKind(), a->getSense());
    57     } else if (Rep * rep = dyn_cast<Rep>(re)) {
    58         RE * expr = star_normal(rep->getRE());
    59         if (rep->getLB() == 0 && rep->getUB() == Rep::UNBOUNDED_REP) {
    60             re = makeRep(star_rule(expr), 0, rep->getUB());
    61         } else {
    62             re = makeRep(expr, rep->getLB(), rep->getUB());
    63         }
    64     } else if (Diff * diff = dyn_cast<Diff>(re)) {
    65         re = makeDiff(star_normal(diff->getLH()), star_normal(diff->getRH()));
    66     } else if (Intersect * e = dyn_cast<Intersect>(re)) {
    67         re = makeIntersect(star_normal(e->getLH()), star_normal(e->getRH()));
    68     } else if (Name * name = dyn_cast<Name>(re)) {
    69         if (name->getDefinition()) {
    70             name->setDefinition(star_normal(name->getDefinition()));
    71         }
     40RE * RE_Star_Normal::transformRep(Rep * rep) {
     41    RE * e0 = rep->getRE();
     42    RE * e = transform(e0);
     43    if (rep->getLB() == 0 && rep->getUB() == Rep::UNBOUNDED_REP) {
     44        e = star_rule(e);
    7245    }
    73     return re;
     46    if (e == e0) return rep;
     47    return makeRep(e, rep->getLB(), rep->getUB());
    7448}
    7549
  • icGREP/icgrep-devel/icgrep/re/re_star_normal.h

    r5866 r6160  
    22#define RE_STAR_NORMAL_H
    33
     4#include <re/re_utility.h>
     5
    46namespace re {
    57
    6 class RE;
     8class RE; class Rep;
    79
    810//A regular expression E is in star normal form if, for each starred
     
    1517//
    1618
     19// Usage:  RE_Star_Normal().transform(regexp);
    1720
    18 class RE_Star_Normal {
     21class RE_Star_Normal : public RE_Transformer {
     22   
    1923public:
    20         static RE * star_normal(RE * re);
    21 private:
    22     static RE * star_rule(RE * re);
     24    RE * transformRep(Rep * rep) override;
    2325};
    2426
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp

    r5951 r6160  
    101101        errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(r) << '\n';
    102102    }
    103     r = RE_Star_Normal::star_normal(r);
     103    r = RE_Star_Normal().transform(r);
    104104    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
    105105        //Print to the terminal the AST that was transformed to the star normal form.
  • icGREP/icgrep-devel/icgrep/re/re_utility.cpp

    r5998 r6160  
    88#include <re/re_any.h>
    99#include <re/re_name.h>
     10#include <re/re_cc.h>
     11#include <re/re_start.h>
     12#include <re/re_end.h>
    1013#include <re/re_alt.h>
    1114#include <re/re_seq.h>
    1215#include <re/re_diff.h>
     16#include <re/re_intersect.h>
     17#include <re/re_group.h>
     18#include <re/re_range.h>
    1319#include <re/re_assertion.h>
     20#include <re/printer_re.h>
     21#include <llvm/Support/Casting.h>
     22#include <llvm/Support/raw_ostream.h>
     23#include <llvm/Support/ErrorHandling.h>
    1424
    1525namespace re {
     
    6272}
    6373   
    64 }
     74   
     75RE * RE_Transformer::transform(RE * re) {
     76    if (llvm::isa<CC>(re)) return transformCC(llvm::cast<CC>(re));
     77    else if (llvm::isa<Start>(re)) return transformStart(llvm::cast<Start>(re));
     78    else if (llvm::isa<End>(re)) return transformEnd(llvm::cast<End>(re));
     79    else if (llvm::isa<Name>(re)) return transformName(llvm::cast<Name>(re));
     80    else if (llvm::isa<Seq>(re)) return transformSeq(llvm::cast<Seq>(re));
     81    else if (llvm::isa<Alt>(re)) return transformAlt(llvm::cast<Alt>(re));
     82    else if (llvm::isa<Rep>(re)) return transformRep(llvm::cast<Rep>(re));
     83    else if (llvm::isa<Intersect>(re)) return transformIntersect(llvm::cast<Intersect>(re));
     84    else if (llvm::isa<Diff>(re)) return transformDiff(llvm::cast<Diff>(re));
     85    else if (llvm::isa<Range>(re)) return transformRange(llvm::cast<Range>(re));
     86    else if (llvm::isa<Group>(re)) return transformGroup(llvm::cast<Group>(re));
     87    else if (llvm::isa<Assertion>(re)) return transformAssertion(llvm::cast<Assertion>(re));
     88    else {
     89        llvm_unreachable("Unknown RE type");
     90        return nullptr;
     91    }
     92}
     93   
     94RE * RE_Transformer::transformName(Name * nm) {
     95    if (mNameTransform == NameTransformationMode::None) return nm;
     96    RE * d = nm->getDefinition();
     97    if (d) return transform(d);
     98    UndefinedNameError(nm);
     99    return nullptr;
     100}
     101 
     102RE * RE_Transformer::transformCC(CC * cc) {
     103    return cc;
     104}
     105
     106RE * RE_Transformer::transformStart(Start * s) {
     107    return s;
     108}
     109
     110RE * RE_Transformer::transformEnd(End * e) {
     111    return e;
     112}
     113
     114RE * RE_Transformer::transformSeq(Seq * seq) {
     115    std::vector<RE *> elems;
     116    elems.reserve(seq->size());
     117    bool any_changed = false;
     118    for (RE * e : *seq) {
     119        RE * e1 = transform(e);
     120        if (e1 != e) any_changed = true;
     121        elems.push_back(e1);
     122    }
     123    if (!any_changed) return seq;
     124    return makeSeq(elems.begin(), elems.end());
     125}
     126
     127RE * RE_Transformer::transformAlt(Alt * alt) {
     128    std::vector<RE *> elems;
     129    elems.reserve(alt->size());
     130    bool any_changed = false;
     131    for (RE * e : *alt) {
     132        RE * e1 = transform(e);
     133        if (e1 != e) any_changed = true;
     134        elems.push_back(e1);
     135    }
     136    if (!any_changed) return alt;
     137    return makeAlt(elems.begin(), elems.end());
     138}
     139   
     140RE * RE_Transformer::transformRep(Rep * r) {
     141    RE * x0 = r->getRE();
     142    RE * x = transform(x0);
     143    if (x == x0) {
     144        return r;
     145    } else {
     146        return makeRep(x, r->getLB(), r->getUB());
     147    }
     148}
     149
     150RE * RE_Transformer::transformIntersect(Intersect * ix) {
     151    RE * x0 = ix->getLH();
     152    RE * y0 = ix->getRH();
     153    RE * x = transform(x0);
     154    RE * y = transform(y0);
     155    if ((x == x0) && (y == y0)) {
     156        return ix;
     157    } else {
     158        return makeIntersect(x, y);
     159    }
     160}
     161
     162RE * RE_Transformer::transformDiff(Diff * d) {
     163    RE * x0 = d->getLH();
     164    RE * y0 = d->getRH();
     165    RE * x = transform(x0);
     166    RE * y = transform(y0);
     167    if ((x == x0) && (y == y0)) {
     168        return d;
     169    } else {
     170        return makeDiff(x, y);
     171    }
     172}
     173
     174RE * RE_Transformer::transformRange(Range * rg) {
     175    RE * x0 = rg->getLo();
     176    RE * y0 = rg->getHi();
     177    RE * x = transform(x0);
     178    RE * y = transform(y0);
     179    if ((x == x0) && (y == y0)) {
     180        return rg;
     181    } else {
     182        return makeRange(x, y);
     183    }
     184}
     185
     186RE * RE_Transformer::transformGroup(Group * g) {
     187    RE * x0 = g->getRE();
     188    RE * x = transform(x0);
     189    if (x == x0) {
     190        return g;
     191    } else {
     192        return makeGroup(g->getMode(), x, g->getSense());
     193    }
     194}
     195
     196RE * RE_Transformer::transformAssertion(Assertion * a) {
     197    RE * x0 = a->getAsserted();
     198    RE * x = transform(x0);
     199    if (x == x0) {
     200        return a;
     201    } else {
     202        return makeAssertion(x, a->getKind(), a->getSense());
     203    }
     204}
     205
     206}
  • icGREP/icgrep-devel/icgrep/re/re_utility.h

    r5998 r6160  
    99
    1010namespace re { class RE; }
    11 namespace re { class Name; }
     11namespace re { class Name; class Start; class End; class CC; class Seq; class Alt;
     12               class Rep; class Intersect; class Diff; class Range; class Group;
     13               class Assertion;
     14}
    1215
    1316namespace re {
     
    2427RE * makeUnicodeBreak();
    2528
     29void UndefinedNameError (const Name * n);
     30
     31enum class NameTransformationMode {None, TransformDefinition};
     32
     33class RE_Transformer {
     34public:
     35    RE_Transformer(NameTransformationMode m = NameTransformationMode::None) : mNameTransform(m) {}
     36    RE * transform(RE * r);
     37    virtual RE * transformName(Name * n);
     38    virtual RE * transformStart(Start * s);
     39    virtual RE * transformEnd(End * e);
     40    virtual RE * transformCC(CC * cc);
     41    virtual RE * transformSeq(Seq * s);
     42    virtual RE * transformAlt(Alt * a);
     43    virtual RE * transformRep(Rep * rep);
     44    virtual RE * transformIntersect(Intersect * e);
     45    virtual RE * transformDiff(Diff * d);
     46    virtual RE * transformRange(Range * rg);
     47    virtual RE * transformGroup(Group * g);
     48    virtual RE * transformAssertion(Assertion * a);
     49protected:
     50    NameTransformationMode mNameTransform;
     51};
     52
    2653}
    27 
    2854#endif // RE_UTILITY_H
  • icGREP/icgrep-devel/icgrep/re/to_utf8.cpp

    r6140 r6160  
    6363}
    6464
    65 RE * toUTF8(RE * r, bool convertName) {
    66     if (const Name*  n = dyn_cast<Name>(r)) {
    67         if (convertName) {
    68             return toUTF8(n->getDefinition(), convertName);
    69         } else {
    70             return r;
    71         }
     65RE * UTF8_Transformer::transformCC(CC * cc) {
     66    if (cc->getAlphabet() != &cc::Unicode) return cc;
     67    std::vector<RE *> alt;
     68    for (const interval_t & i : *cc) {
     69        alt.push_back(rangeToUTF8(lo_codepoint(i), hi_codepoint(i)));
    7270    }
    73     else if (isa<Name>(r) || isa<Start>(r) || isa<End>(r)) {
    74         return r;
    75     } else if (const CC * cc = dyn_cast<CC>(r)) {
    76         if (cc->getAlphabet() != &cc::Unicode) return r;
    77         std::vector<RE *> alt;
    78         for (const interval_t & i : *cc) {
    79             alt.push_back(rangeToUTF8(lo_codepoint(i), hi_codepoint(i)));
    80         }
    81         return makeAlt(alt.begin(), alt.end());
    82     } else if (Alt * alt = dyn_cast<Alt>(r)) {
    83         std::vector<RE *> list;
    84         list.reserve(alt->size());
    85         for (RE * a : *alt) {
    86             list.push_back(toUTF8(a, convertName));
    87         }
    88         return makeAlt(list.begin(), list.end());
    89     } else if (Seq * seq = dyn_cast<Seq>(r)) {
    90         std::vector<RE *> list;
    91         list.reserve(seq->size());
    92         for (RE * s : *seq) {
    93             list.push_back(toUTF8(s, convertName));
    94         }
    95         return makeSeq(list.begin(), list.end());
    96     } else if (Assertion * a = dyn_cast<Assertion>(r)) {
    97         return makeAssertion(toUTF8(a->getAsserted(), convertName), a->getKind(), a->getSense());
    98     } else if (Rep * rep = dyn_cast<Rep>(r)) {
    99         RE * expr = toUTF8(rep->getRE(), convertName);
    100         return makeRep(expr, rep->getLB(), rep->getUB());
    101     } else if (Diff * diff = dyn_cast<Diff>(r)) {
    102         return makeDiff(toUTF8(diff->getLH(), convertName), toUTF8(diff->getRH(), convertName));
    103     } else if (Intersect * e = dyn_cast<Intersect>(r)) {
    104         return makeIntersect(toUTF8(e->getLH(), convertName), toUTF8(e->getRH(), convertName));
    105     }
    106     llvm_unreachable("unexpected RE type given to toUTF8");
    107     return nullptr;
     71    return makeAlt(alt.begin(), alt.end());
    10872}
    10973
  • icGREP/icgrep-devel/icgrep/re/to_utf8.h

    r6140 r6160  
    11/*
    2  *  Copyright (c) 2017 International Characters.
     2 *  Copyright (c) 2018 International Characters.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters.
     
    99
    1010#include <re/re_re.h>
     11#include <re/re_utility.h>
    1112
    1213namespace re {
     14class CC;
    1315
    14 RE * toUTF8(RE * ast, bool convertName = false);
     16class UTF8_Transformer : public RE_Transformer {
     17public:
     18    UTF8_Transformer(NameTransformationMode m = NameTransformationMode::None) : RE_Transformer(m) {}
     19    RE * transformCC(CC * cc) override;
     20};
     21
     22inline RE * toUTF8(RE * r, bool convertName = false) {
     23    return UTF8_Transformer(convertName ? NameTransformationMode::TransformDefinition : NameTransformationMode::None ).transform(r);}
    1524}
    1625#endif // TO_UTF8_H
Note: See TracChangeset for help on using the changeset viewer.