Changeset 5772 for icGREP


Ignore:
Timestamp:
Dec 9, 2017, 5:14:40 PM (16 months ago)
Author:
cameron
Message:

resolveGraphemeMode

Location:
icGREP/icgrep-devel/icgrep
Files:
2 added
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5770 r5772  
    9898# RegExpADT is the core library for representing, parsing and printing regular expressions
    9999add_library(RegExpADT re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/re_range.cpp re/re_assertion.cpp re/printer_re.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/parse_fixed_strings.cpp)
    100 add_library(RegExpCompiler re/casing.cpp re/to_utf8.cpp re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_utility.cpp ${GREP_CORE_SRC})
     100add_library(RegExpCompiler re/casing.cpp re/to_utf8.cpp re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_utility.cpp re/grapheme_clusters.cpp ${GREP_CORE_SRC})
    101101add_library(UCDlib UCD/CaseFolding.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp)
    102102
  • icGREP/icgrep-devel/icgrep/grep_engine.cpp

    r5771 r5772  
    2929#include <re/re_collect_unicodesets.h>
    3030#include <re/re_multiplex.h>
     31#include <re/grapheme_clusters.h>
    3132#include <toolchain/toolchain.h>
    3233#include <toolchain/cpudriver.h>
     
    115116    for (unsigned i = 0; i < n; i++) {
    116117        REs[i] = resolveCaseInsensitiveMode(REs[i], grep::IgnoreCaseFlag);
     118        REs[i] = resolveGraphemeMode(REs[i], false /* not in grapheme mode at top level*/);
    117119        REs[i] = re::resolveNames(REs[i]);
    118120        const auto UnicodeSets = re::collectUnicodeSets(REs[i]);
  • icGREP/icgrep-devel/icgrep/re/re_any.h

    r5747 r5772  
    3030
    3131inline RE * makeAny() {
    32     return makeName(".", Name::Type::UnicodeProperty);
     32    Name * dot = makeName(".", Name::Type::UnicodeProperty);
     33    dot->setDefinition(makeCC(0, 0x10FFFF));
     34    return dot;
    3335}
    3436
  • icGREP/icgrep-devel/icgrep/re/re_collect_unicodesets.cpp

    r5748 r5772  
    66#include <re/re_seq.h>
    77#include <re/re_rep.h>
     8#include <re/re_range.h>
    89#include <re/re_diff.h>
    910#include <re/re_intersect.h>
     
    2930            UnicodeSets.push_back(cc);
    3031        } else if (isa<Name>(re)) {
    31             collect(cast<Name>(re)->getDefinition());
     32            auto def = cast<Name>(re)->getDefinition();
     33            if (def != nullptr)
     34                collect(def);
    3235        } else if (isa<Seq>(re)) {
    3336            for (auto item : *cast<Seq>(re)) {
     
    4851            collect(cast<Intersect>(re)->getLH());
    4952            collect(cast<Intersect>(re)->getRH());
    50         } else if (isa<Any>(re)) {
    51             UnicodeSets.push_back(makeCC(0x00, 0x10FFFF));
    5253        }
    5354    }
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5770 r5772  
    2525#include <re/re_assertion.h>
    2626#include <re/printer_re.h>
    27 #include <UCD/resolve_properties.h>
    28 #include <UCD/CaseFolding.h>
    2927#include <sstream>
    3028#include <string>
     
    123121        RE * re = parse_next_item();
    124122        if (re == nullptr) {
    125             if (fGraphemeBoundaryPending == true) {
    126                 seq.push_back(makeZeroWidth("GCB"));
    127                 fGraphemeBoundaryPending = false;
    128             }
    129123            break;
    130124        }
     
    184178                mCursor++;
    185179                re = parse_charset();
    186                 if ((fModeFlagSet & ModeFlagType::GRAPHEME_CLUSTER_MODE) != 0) {
    187                     re = makeSeq({re, makeZeroWidth("GCB")});
    188                 }
    189180                break;
    190181            case '.': // the 'any' metacharacter
     
    196187            default:
    197188                re = createCC(parse_literal_codepoint());
    198                 if ((fModeFlagSet & ModeFlagType::GRAPHEME_CLUSTER_MODE) != 0) {
    199                     fGraphemeBoundaryPending = true;
    200                 }
    201189        }
    202190    }
     
    283271                                               (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) == 0 ? Group::Sense::Off : Group::Sense::On);
    284272                    }
     273                    if ((changed & GRAPHEME_CLUSTER_MODE) != 0) {
     274                        group_expr = makeGroup(Group::Mode::GraphemeMode, group_expr,
     275                                               (fModeFlagSet & GRAPHEME_CLUSTER_MODE) == 0 ? Group::Sense::Off : Group::Sense::On);
     276                    }
    285277                    fModeFlagSet = savedModeFlagSet;
    286278                    break;
     
    288280                    ++mCursor;
    289281                    auto changed = fModeFlagSet ^ savedModeFlagSet;
    290                     if ((changed & CASE_INSENSITIVE_MODE_FLAG) != 0) {
     282                    if ((changed & (CASE_INSENSITIVE_MODE_FLAG|GRAPHEME_CLUSTER_MODE)) != 0) {
    291283                        group_expr = parse_seq();
    292                         return makeGroup(Group::Mode::CaseInsensitiveMode, group_expr,
    293                                                (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) == 0 ? Group::Sense::Off : Group::Sense::On);
     284                        if ((changed & CASE_INSENSITIVE_MODE_FLAG) != 0) {
     285                            group_expr = makeGroup(Group::Mode::CaseInsensitiveMode, group_expr,
     286                                                   (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) == 0 ? Group::Sense::Off : Group::Sense::On);
     287                        }
     288                        if ((changed & GRAPHEME_CLUSTER_MODE) != 0) {
     289                            group_expr = makeGroup(Group::Mode::GraphemeMode, group_expr,
     290                                                   (fModeFlagSet & GRAPHEME_CLUSTER_MODE) == 0 ? Group::Sense::Off : Group::Sense::On);
     291                        }
     292                        return group_expr;
    294293                    }
    295294                    else return parse_next_item();
Note: See TracChangeset for help on using the changeset viewer.