Changeset 5765


Ignore:
Timestamp:
Dec 8, 2017, 8:20:54 PM (17 months ago)
Author:
cameron
Message:

Regular expression group nodes; case-insensitive logic

Location:
icGREP/icgrep-devel/icgrep
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5763 r5765  
    9595add_library(PabloADT ${PABLO_SRC})
    9696add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/re_range.cpp re/re_assertion.cpp re/printer_re.cpp)
    97 add_library(RegExpCompiler re/to_utf8.cpp re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/parse_fixed_strings.cpp re/re_utility.cpp ${GREP_CORE_SRC})
     97add_library(RegExpCompiler re/casing.cpp re/to_utf8.cpp re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/parse_fixed_strings.cpp re/re_utility.cpp ${GREP_CORE_SRC})
    9898add_library(CCADT cc/cc_compiler.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/CaseFolding.cpp cc/alphabet.cpp cc/multiplex_CCs.cpp)
    9999add_library(UCDlib UCD/unicode_set.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
  • icGREP/icgrep-devel/icgrep/re/printer_re.cpp

    r5663 r5765  
    1717#include <re/re_seq.h>
    1818#include <re/re_start.h>
     19#include <re/re_range.h>
    1920#include <re/re_diff.h>
    2021#include <re/re_intersect.h>
    2122#include <re/re_assertion.h>
     23#include <re/re_group.h>
    2224
    2325using namespace re;
     
    4749        for (const auto & i : *re_cc) {
    4850            retVal += "[";
    49             retVal += std::to_string(lo_codepoint(i)) + ",";
    50             retVal += std::to_string(hi_codepoint(i));
     51            retVal += std::to_string(lo_codepoint(i));
     52            if (hi_codepoint(i) != lo_codepoint(i))
     53                retVal += "-" + std::to_string(hi_codepoint(i));
    5154            retVal += "]";
    5255        }
     
    6265            retVal += "=(" + PrintRE(re_name->getDefinition()) + ")";
    6366        }
     67    } else if (const Range* rg = dyn_cast<const Range>(re)) {
     68        retVal = "Range (";
     69        retVal += PrintRE(rg->getLo());
     70        retVal += " , ";
     71        retVal += PrintRE(rg->getHi());
     72        retVal += ") ";
    6473    } else if (const Assertion * a = dyn_cast<const Assertion>(re)) {
    6574        retVal = (a->getSense() == Assertion::Sense::Positive) ? "" : "Negative";
     
    116125        }
    117126        retVal.append("])");
     127    } else if (const Group * g = dyn_cast<const Group>(re)) {
     128        retVal = "Group(";
     129        if (g->getMode() == Group::Mode::GraphemeMode) {
     130            retVal.append((g->getSense() == Group::Sense::On) ? "+g:" : "-g:");
     131        }
     132        else if (g->getMode() == Group::Mode::CaseInsensitiveMode) {
     133            retVal.append((g->getSense() == Group::Sense::On) ? "+i:" : "-i:");
     134        }
     135        retVal.append(PrintRE(g->getRE()));
     136        retVal.append(")");
    118137    } else if (isa<const Start>(re)) {
    119138        retVal = "Start";
  • icGREP/icgrep-devel/icgrep/re/re_memoizer.cpp

    r5763 r5765  
    44#include <re/re_seq.h>
    55#include <re/re_rep.h>
     6#include <re/re_group.h>
    67#include <re/re_range.h>
    78#include <re/re_diff.h>
     
    6364}
    6465
     66inline bool lessThan(const Group * const lh, const Group * const rh) {
     67    if (lh->getMode() != rh->getMode()) {
     68        return lh->getMode() < rh->getMode();
     69    }
     70    if (lh->getSense() != rh->getSense()) {
     71        return lh->getSense() < rh->getSense();
     72    }
     73    return compare(lh->getRE(), rh->getRE());
     74}
     75
    6576inline bool compare(const RE * const lh, const RE * const rh) {
    6677    using Type = RE::ClassTypeId;
     
    90101        case Type::Name:
    91102            return *cast<Name>(lh) < *cast<Name>(rh);
     103        case Type::Group:
     104            return lessThan(cast<Group>(lh), cast<Group>(rh));
    92105        case Type::Range:
    93106            return lessThan(cast<Range>(lh), cast<Range>(rh));
  • icGREP/icgrep-devel/icgrep/re/re_multiplex.cpp

    r5748 r5765  
    3939
    4040    std::function<RE *(RE *)> multiplex = [&](RE * const re) -> RE * {
    41         if (Name * name = dyn_cast<Name>(re)) {
     41        if (CC * cc = dyn_cast<CC>(re)) {
     42            const auto index = find(UnicodeSets.begin(), UnicodeSets.end(), cc) - UnicodeSets.begin();
     43            const auto exclusive_IDs = exclusiveSetIDs[index];
     44            CC * CC_union = makeCC();
     45            for (auto i : exclusive_IDs) {
     46                CC_union = makeCC(CC_union, makeCC(i));
     47            }
     48            return CC_union;
     49        } else if (Name * name = dyn_cast<Name>(re)) {
    4250            auto f = memoizer.find(name);
    4351            if (f == memoizer.end()) {
  • icGREP/icgrep-devel/icgrep/re/re_re.h

    r5763 r5765  
    2727        , Intersect
    2828        , Name
     29        , Group
    2930        , Rep
    3031        , Seq
Note: See TracChangeset for help on using the changeset viewer.