Changeset 5784


Ignore:
Timestamp:
Dec 16, 2017, 9:06:44 AM (9 months ago)
Author:
cameron
Message:

Restructuring step/tidy-up for re_passes

Location:
icGREP/icgrep-devel/icgrep
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5782 r5784  
    120120    const auto n = REs.size();
    121121    std::vector<std::vector<re::CC *>> charclasses(n);
    122     for (unsigned i = 0; i < n; i++) {
    123         REs[i] = resolveCaseInsensitiveMode(REs[i], grep::IgnoreCaseFlag);
    124         REs[i] = resolveGraphemeMode(REs[i], false /* not in grapheme mode at top level*/);
    125         REs[i] = re::resolveNames(REs[i]);
    126         REs[i] = exclude_CC(REs[i], re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029))));
    127 
    128         const auto UnicodeSets = re::collectUnicodeSets(REs[i]);
    129         std::vector<std::vector<unsigned>> exclusiveSetIDs;
    130         doMultiplexCCs(UnicodeSets, exclusiveSetIDs, charclasses[i]);
    131         REs[i] = multiplex(REs[i], UnicodeSets, exclusiveSetIDs);
    132         REs[i] = regular_expression_passes(REs[i]);
    133   }
    134 
    135122    std::vector<StreamSetBuffer *> MatchResultsBufs(n);
    136123
    137124    for(unsigned i = 0; i < n; ++i){
     125        std::tie<re::RE*, std::vector<re::CC *>>(REs[i], charclasses[i]) = multiplexing_passes(REs[i]);
    138126        const auto numOfCharacterClasses = charclasses[i].size();
    139127        StreamSetBuffer * CharClasses = mGrepDriver->addBuffer<CircularBuffer>(idb, idb->getStreamSetTy(numOfCharacterClasses), segmentSize * bufferSegments);
  • icGREP/icgrep-devel/icgrep/re/casing.cpp

    r5782 r5784  
    2525    if (isa<CC>(re)) {
    2626        if (inCaseInsensitiveMode) {
    27             return makeCC(std::move(caseInsensitize(*cast<CC>(re))));
     27            return makeCC(caseInsensitize(*cast<CC>(re)));
    2828        }
    2929        return re;
  • icGREP/icgrep-devel/icgrep/re/re_alt.h

    r5782 r5784  
    7575        newAlt->push_back(unionCC);
    7676    }
     77    if (newAlt->size() == 1) return newAlt->front();
    7778    return newAlt;
    7879}
  • icGREP/icgrep-devel/icgrep/re/re_multiplex.cpp

    r5781 r5784  
    2323namespace re {
    2424 
    25 static inline CC * extractCC(RE * re) {
    26     if (isa<CC>(re)) {
    27         return cast<CC>(re);
    28     } else if (isa<Name>(re)) {
    29         return extractCC(cast<Name>(re)->getDefinition());
    30     }
    31     return nullptr;
    32 }
    33 
    3425RE * multiplex(RE * const re,
    3526               const std::vector<const CC *> & UnicodeSets,
     
    6354                    }
    6455                } else {
    65                     throw std::runtime_error("All non-unicode-property Name objects should have been defined prior to Unicode property resolution.");
     56                    UndefinedNameError(name);
    6657                }
    6758                return memoizer.memoize(name);
     
    7465            }
    7566        } else if (Alt * alt = dyn_cast<Alt>(re)) {
    76             CC * unionCC = nullptr;
    77             std::stringstream name;
    78             for (auto ai = alt->begin(); ai != alt->end(); ) {
    79                 RE * re = multiplex(*ai);
    80                 if (CC * cc = extractCC(re)) {
    81                     if (unionCC == nullptr) {
    82                         unionCC = cc;
    83                     } else {
    84                         unionCC = makeCC(unionCC, cc);
    85                         name << '+';
    86                     }
    87                     if (LLVM_LIKELY(isa<Name>(re))) {
    88                         Name * n = cast<Name>(re);
    89                         if (n->hasNamespace()) {
    90                             name << n->getNamespace() << ':';
    91                         }
    92                         name << n->getName();
    93                     } else if (isa<CC>(re)) {
    94                         name << cast<CC>(re)->canonicalName(CC_type::UnicodeClass);
    95                     }
    96                     ai = alt->erase(ai);
    97                 } else {
    98                     *ai++ = re;
    99                 }
    100             }
    101             if (unionCC) {
    102                 alt->push_back(multiplex(makeName(name.str(), unionCC)));
    103             }
    104             if (alt->size() == 1) {
    105                 return alt->front();
     67            for (auto ai = alt->begin(); ai != alt->end(); ++ai) {
     68                *ai = multiplex(*ai);
    10669            }
    10770        } else if (Rep * rep = dyn_cast<Rep>(re)) {
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.cpp

    r5780 r5784  
    66
    77#include <toolchain/toolchain.h>
     8#include <grep_interface.h>
    89#include <re/re_toolchain.h>
    910#include <cc/cc_compiler.h>            // for CC_Compiler
     
    1718#include <re/printer_re.h>
    1819#include <re/re_analysis.h>
     20#include <re/re_cc.h>
     21#include <re/casing.h>
     22#include <re/exclude_CC.h>
     23#include <re/re_name_resolve.h>
     24#include <re/re_collect_unicodesets.h>
     25#include <re/re_multiplex.h>
     26#include <re/grapheme_clusters.h>
     27#include <cc/multiplex_CCs.h>
    1928#include <llvm/Support/raw_ostream.h>
    2029
     
    5665
    5766
     67std::pair<RE *, std::vector<re::CC *>> multiplexing_passes(RE * r) {
     68    std::vector<re::CC *> charclasses;
     69    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
     70        errs() << "Parser:\n" << Printer_RE::PrintRE(r) << '\n';
     71    }
     72    //Optimization passes to simplify the AST.
     73    r = RE_Nullable::removeNullablePrefix(r);
     74    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
     75        errs() << "RemoveNullablePrefix:\n" << Printer_RE::PrintRE(r) << '\n';
     76    }
     77    r = RE_Nullable::removeNullableSuffix(r);
     78    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
     79        errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(r) << '\n';
     80    }
     81    r = RE_Nullable::removeNullableAssertion(r);
     82    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
     83        errs() << "RemoveNullableAssertion:\n" << Printer_RE::PrintRE(r) << '\n';
     84    }
     85    r = RE_Star_Normal::star_normal(r);
     86   
     87    r = RE_Simplifier::simplify(r);
     88   
     89    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
     90        //Print to the terminal the AST that was generated by the simplifier.
     91        errs() << "Simplifier:\n" << Printer_RE::PrintRE(r) << '\n';
     92    }
     93    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
     94        //Print to the terminal the AST that was transformed to the star normal form.
     95        errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(r) << '\n';
     96    }
     97    r = resolveCaseInsensitiveMode(r, grep::IgnoreCaseFlag);
     98    if (PrintOptions.isSet(ShowAllREs)) {
     99        errs() << "resolveCaseInsensitiveMode:\n" << Printer_RE::PrintRE(r) << '\n';
     100    }
     101    r = resolveGraphemeMode(r, false /* not in grapheme mode at top level*/);
     102    if (PrintOptions.isSet(ShowAllREs)) {
     103        errs() << "resolveGraphemeMode:\n" << Printer_RE::PrintRE(r) << '\n';
     104    }
     105    r = re::resolveNames(r);
     106    if (PrintOptions.isSet(ShowAllREs)) {
     107        errs() << "resolveNames:\n" << Printer_RE::PrintRE(r) << '\n';
     108    }
     109    r = exclude_CC(r, re::makeCC(re::makeCC(0x0A, 0x0D), re::makeCC(re::makeCC(0x85), re::makeCC(0x2028, 0x2029))));
     110    if (PrintOptions.isSet(ShowAllREs)) {
     111        errs() << "exclude_CC:\n" << Printer_RE::PrintRE(r) << '\n';
     112    }
     113    const auto UnicodeSets = re::collectUnicodeSets(r);
     114    std::vector<std::vector<unsigned>> exclusiveSetIDs;
     115    doMultiplexCCs(UnicodeSets, exclusiveSetIDs, charclasses);
     116    r = multiplex(r, UnicodeSets, exclusiveSetIDs);
     117    if (PrintOptions.isSet(ShowAllREs)) {
     118        errs() << "multiplex:\n" << Printer_RE::PrintRE(r) << '\n';
     119    }
     120    return std::tie<RE *, std::vector<re::CC *>>(r, charclasses);
     121}
    58122
    59 RE * regular_expression_passes(RE * re)  {
     123RE * regular_expression_passes(RE * r)  {
    60124
    61125    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowREs)) {
    62         errs() << "Parser:\n" << Printer_RE::PrintRE(re) << '\n';
     126        errs() << "Parser:\n" << Printer_RE::PrintRE(r) << '\n';
    63127    }
    64128
    65129    //Optimization passes to simplify the AST.
    66     re = RE_Nullable::removeNullablePrefix(re);
     130    r = RE_Nullable::removeNullablePrefix(r);
    67131    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
    68         errs() << "RemoveNullablePrefix:\n" << Printer_RE::PrintRE(re) << '\n';
     132        errs() << "RemoveNullablePrefix:\n" << Printer_RE::PrintRE(r) << '\n';
    69133    }
    70     re = RE_Nullable::removeNullableSuffix(re);
     134    r = RE_Nullable::removeNullableSuffix(r);
    71135    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
    72         errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(re) << '\n';
     136        errs() << "RemoveNullableSuffix:\n" << Printer_RE::PrintRE(r) << '\n';
    73137    }
    74     re = RE_Nullable::removeNullableAssertion(re);
     138    r = RE_Nullable::removeNullableAssertion(r);
    75139    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
    76         errs() << "RemoveNullableAssertion:\n" << Printer_RE::PrintRE(re) << '\n';
     140        errs() << "RemoveNullableAssertion:\n" << Printer_RE::PrintRE(r) << '\n';
    77141    }
    78     //re = RE_Nullable::removeNullableAfterAssertion(re);
     142    //r = RE_Nullable::removeNullableAfterAssertion(r);
    79143    //if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowStrippedREs)) {
    80     //    errs() << "RemoveNullableAfterAssertion\n" << Printer_RE::PrintRE(re) << '\n';
     144    //    errs() << "RemoveNullableAfterAssertion\n" << Printer_RE::PrintRE(r) << '\n';
    81145    //}
    82146
    83     re = RE_Simplifier::simplify(re);
     147    r = RE_Simplifier::simplify(r);
    84148
    85149    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
    86150        //Print to the terminal the AST that was generated by the simplifier.
    87         errs() << "Simplifier:\n" << Printer_RE::PrintRE(re) << '\n';
     151        errs() << "Simplifier:\n" << Printer_RE::PrintRE(r) << '\n';
    88152    }
    89153   
    90 //    re = RE_Minimizer::minimize(re);
     154//    r = RE_Minimizer::minimize(r);
    91155
    92156//    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
    93157//        //Print to the terminal the AST that was generated by the simplifier.
    94 //        errs() << "Minimizer:\n" << Printer_RE::PrintRE(re) << '\n';
     158//        errs() << "Minimizer:\n" << Printer_RE::PrintRE(r) << '\n';
    95159//    }
    96160
    97     re = RE_Star_Normal::star_normal(re);
     161    r = RE_Star_Normal::star_normal(r);
    98162
    99163    if (PrintOptions.isSet(ShowAllREs) || PrintOptions.isSet(ShowSimplifiedREs)) {
    100164        //Print to the terminal the AST that was transformed to the star normal form.
    101         errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(re) << '\n';
     165        errs() << "Star_Normal_Form:\n" << Printer_RE::PrintRE(r) << '\n';
    102166    }
    103167
    104     return re;
     168    return r;
    105169}
    106170   
  • icGREP/icgrep-devel/icgrep/re/re_toolchain.h

    r5732 r5784  
    1010namespace llvm { namespace cl { class OptionCategory; } }
    1111namespace pablo { class PabloKernel; class PabloAST; }
    12 namespace re { class RE; }
     12namespace re { class RE; class CC;}
     13#include <vector>
    1314
    1415namespace re {
     
    3132RE * regular_expression_passes(RE * re_ast);
    3233
     34std::pair<RE *, std::vector<re::CC *>> multiplexing_passes(RE * r);
     35
    3336pablo::PabloAST * re2pablo_compiler(pablo::PabloKernel * kernel, RE * re_ast);
    3437   
Note: See TracChangeset for help on using the changeset viewer.