Changeset 5648


Ignore:
Timestamp:
Sep 24, 2017, 3:08:06 PM (4 weeks ago)
Author:
cameron
Message:

Regular expressions for property values: allow aliases, do not canonicalize (Unicode TR 18 - RL2.6)

Location:
icGREP/icgrep-devel
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/QA/greptest.xml

    r5563 r5648  
    838838<grepcase regexp="\b{script=hira}a" datafile="hira_border" grepcount="2"/>
    839839<grepcase regexp="\b{script=hira}" datafile="hira_border" grepcount="3"/>
    840 <grepcase regexp="\p{script=/hir./}" datafile="hira_border" grepcount="3"/>
    841 <grepcase regexp="\p{script=/.*hir.*/}" datafile="hira_border" grepcount="3"/>
    842 <grepcase regexp="\p{script=/hir.gana/}" datafile="hiragana_and_katakana" grepcount="3"/>
    843 <grepcase regexp="\p{script=/kat.kana/}" datafile="hiragana_and_katakana" grepcount="2"/>
    844 <grepcase regexp="\p{script=/(kata|Hira).ana/}" datafile="hiragana_and_katakana" grepcount="5"/>
    845 
     840<grepcase regexp="\p{script=/Hir./}" datafile="hira_border" grepcount="3"/>
     841<grepcase regexp="\p{script=/.*Hir.*/}" datafile="hira_border" grepcount="3"/>
     842<grepcase regexp="\p{script=/Hir.gana/}" datafile="hiragana_and_katakana" grepcount="3"/>
     843<grepcase regexp="\p{script=/Kat.kana/}" datafile="hiragana_and_katakana" grepcount="2"/>
     844<grepcase regexp="\p{script=/(Kata|Hira).ana/}" datafile="hiragana_and_katakana" grepcount="5"/>
     845<grepcase regexp="\p{script=/(kata|Hira).ana/}" datafile="hiragana_and_katakana" grepcount="3"/>
    846846<grepcase regexp="(?:\p{greek}\p{greek}\p{greek})" datafile="upper_lower_greek" grepcount="3"/>
    847847<grepcase regexp="(?\p{upper}:\p{greek}\p{greek}\p{greek})" datafile="upper_lower_greek" grepcount="1"/>
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r5236 r5648  
    8383        std::stringstream buffer;
    8484        for (unsigned i = 0; i != property_value_full_names.size(); i++) {
    85             buffer << canonicalize_value_name(property_value_full_names[i]) + "\n";
     85            buffer << property_value_full_names[i] + "\n";
    8686        }
    8787        for (unsigned i = 0; i != property_value_enum_names.size(); i++) {
    88             buffer << canonicalize_value_name(property_value_enum_names[i]) + "\n";
     88            if (property_value_enum_names[i] == property_value_full_names[i]) continue;
     89            buffer << property_value_enum_names[i] + "\n";
     90        }
     91        for (auto & a : property_value_aliases) {
     92            buffer << a.first + "\n";
    8993        }
    9094        mPropertyValueGrepString = buffer.str();
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5630 r5648  
    676676            }
    677677            ++mCursor;
    678             return parseRegexPropertyValue(canonicalize(start, prop_end), canonicalize(val_start, current));
     678            return parseRegexPropertyValue(canonicalize(start, prop_end), std::string(val_start, current));
    679679        }
    680680    }
Note: See TracChangeset for help on using the changeset viewer.