Ignore:
Timestamp:
Nov 7, 2016, 3:54:09 PM (3 years ago)
Author:
xwa163
Message:
  1. Extend Regex Syntax, include: (a) RL2.6 of UTS#18, support regex in property value. e.g. \p{script=/.*hir.*/} (b) Support syntax of property expression when parsing boundary. e.g. \b{greek} (c) Extend property expression in non capture group. e.g. (?\p{upper}:\p{greek}\p{script=hira})
  2. Add related test cases
Location:
icGREP/icgrep-devel/icgrep/UCD
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r5159 r5206  
    3030int PropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
    3131    throw std::runtime_error("Property " + value_spec + " unsupported.");
     32}
     33const std::string& PropertyObject::GetPropertyValueGrepString() {
     34    throw std::runtime_error("Property Value Grep String unsupported.");
    3235}
    3336
     
    7376};
    7477
    75    
     78const std::string& EnumeratedPropertyObject::GetPropertyValueGrepString() {
     79    if (!property_value_grep_string.size()) {
     80        for (unsigned i = 0; i != property_value_full_names.size(); i++) {
     81            property_value_grep_string += canonicalize_value_name(property_value_full_names[i]) + "\n";
     82        }
     83        for (unsigned i = 0; i != property_value_enum_names.size(); i++) {
     84            property_value_grep_string += canonicalize_value_name(property_value_enum_names[i]) + "\n";
     85        }
     86    }
     87    return property_value_grep_string;
     88}
     89
    7690int EnumeratedPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
    7791    // The canonical full names are not stored in the precomputed alias map,
     
    123137}
    124138
     139const std::string& ExtensionPropertyObject::GetPropertyValueGrepString() {
     140    return property_object_table[base_property]->GetPropertyValueGrepString();
     141}
     142
    125143const UnicodeSet & BinaryPropertyObject::GetCodepointSet(const std::string & value_spec) {
    126144    int property_enum_val = Binary_ns::Y;
     
    146164}
    147165
     166const std::string& BinaryPropertyObject::GetPropertyValueGrepString() {
     167    if (!property_value_grep_string.size()) {
     168        for (auto iter = Binary_ns::aliases_only_map.begin(), end = Binary_ns::aliases_only_map.end(); iter != end; ++iter) {
     169            property_value_grep_string += iter->first + "\n";
     170        }
     171    }
     172    return property_value_grep_string;
    148173}
     174
     175}
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.h

    r5159 r5206  
    4141    PropertyObject(property_t p, ClassTypeId k) : the_property(p), the_kind(k) {}
    4242    virtual int GetPropertyValueEnumCode(const std::string & value_spec);
     43    virtual const std::string& GetPropertyValueGrepString();
    4344    property_t the_property;
    4445    ClassTypeId the_kind;
     
    8889
    8990    virtual int GetPropertyValueEnumCode(const std::string & value_spec);
     91    virtual const std::string& GetPropertyValueGrepString();
    9092    const UnicodeSet & GetCodepointSet(const std::string & value_spec);
    9193    const UnicodeSet & GetCodepointSet(const int property_enum_val) const;
     
    107109    const std::vector<std::string> & property_value_full_names;  // never changes
    108110    std::unordered_map<std::string, int> & property_value_aliases;
     111    std::string property_value_grep_string;
    109112    bool uninitialized; // full names must be added dynamically.
    110113    const std::vector<const UnicodeSet *> property_value_sets;
     
    136139
    137140    virtual int GetPropertyValueEnumCode(const std::string & value_spec);
     141    virtual const std::string& GetPropertyValueGrepString();
    138142    const UnicodeSet & GetCodepointSet(const std::string & value_spec);
    139143    const UnicodeSet & GetCodepointSet(const int property_enum_val) const;
     
    161165    const UnicodeSet & GetCodepointSet(const std::string & value_spec);
    162166    const UnicodeSet & GetCodepointSet(const int property_enum_val);
     167    virtual const std::string& GetPropertyValueGrepString();
    163168private:
    164169    bool mNoUninitialized;
    165170    UnicodeSet mY;
    166171    UnicodeSet mN;
     172    std::string property_value_grep_string;
    167173};
    168174
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r5091 r5206  
    220220}
    221221
     222const std::string& getPropertyValueGrepString(const std::string & prop) {
     223    auto propName = canonicalize_value_name(prop);
     224    auto propit = alias_map.find(propName);
     225    if (propit == alias_map.end()) {
     226        throw UnicodePropertyExpressionError("Expected a property name, but '" + prop + "' found instead");
     227    }
     228    auto theprop = propit->second;
     229    if (EnumeratedPropertyObject * p = dyn_cast<EnumeratedPropertyObject>(property_object_table[theprop])){
     230        return p->GetPropertyValueGrepString();
     231    } else if (BinaryPropertyObject * p = dyn_cast<BinaryPropertyObject>(property_object_table[theprop])) {
     232        return p->GetPropertyValueGrepString();
     233    }
     234
     235    throw UnicodePropertyExpressionError("Property " + property_full_name[theprop] + " recognized but not supported in icgrep 1.0");
     236}
     237
    222238UnicodeSet resolveUnicodeSet(Name * const name) {
    223239    if (name->getType() == Name::Type::UnicodeProperty) {
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.h

    r5091 r5206  
    2525std::string resolvePropertyFunction(re::Name * const property);
    2626UCD::UnicodeSet resolveUnicodeSet(re::Name * const name);
     27const std::string& getPropertyValueGrepString(const std::string & prop);
    2728
    2829}
  • icGREP/icgrep-devel/icgrep/UCD/ucd_compiler.cpp

    r5202 r5206  
    66#include <utf16_encoder.h>
    77#include <iostream>
     8#include <array>
    89
    910using namespace cc;
Note: See TracChangeset for help on using the changeset viewer.