Ignore:
Timestamp:
Sep 30, 2015, 3:18:09 PM (4 years ago)
Author:
nmedfort
Message:

Refactored UCD property resolution.

Location:
icGREP/icgrep-devel/icgrep/UCD
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r4737 r4809  
    2828namespace UCD {
    2929
    30 Name * resolveProperty(const std::string prop, const std::string value, re::RE_Parser * parser) {
    31     auto propit = alias_map.find(prop);
    32     if (propit == alias_map.end()) {
    33         throw UnicodePropertyExpressionError("Expected a property name but '" + prop + "' was found instead");
     30RE * resolvePropertyDefinition(Name * const property) {
     31    if (property->hasNamespace()) {
     32        auto propit = alias_map.find(property->getNamespace());
     33        if (propit == alias_map.end()) {
     34            throw UnicodePropertyExpressionError("Expected a property name but '" + property->getNamespace() + "' was found instead");
     35        }
     36        auto theprop = propit->second;
     37        if (isa<BinaryPropertyObject>(property_object_table[theprop])){
     38            auto valit = Binary_ns::aliases_only_map.find(property->getName());
     39            if (valit != Binary_ns::aliases_only_map.end()) {
     40                if (valit->second == Binary_ns::N) {
     41                    Name * binprop = makeName(property_enum_name[theprop], Name::Type::UnicodeProperty);
     42                    property->setDefinition(makeDiff(makeAny(), binprop));
     43                }
     44            }
     45        }
     46    } else {
     47        const std::string value = property->getName();
     48        // Try special cases of Unicode TR #18
     49        if (value == "any") {
     50            property->setDefinition(makeAny());
     51        }
     52        else if (value == "ascii") {
     53            property->setDefinition(makeName("blk", "ascii", Name::Type::UnicodeProperty));
     54        }
     55        else if (value == "assigned") {
     56            Name * unassigned = makeName("cn", Name::Type::UnicodeProperty);
     57            property->setDefinition(makeDiff(makeAny(), unassigned));
     58        }
     59        // Now compatibility properties of UTR #18 Annex C
     60        else if (value == "xdigit") {
     61            Name * digit = makeName("nd", Name::Type::UnicodeProperty);
     62            Name * hexdigit = makeName("hexdigit", Name::Type::UnicodeProperty);
     63            property->setDefinition(makeAlt({digit, hexdigit}));
     64        }
     65        else if (value == "alnum") {
     66            Name * digit = makeName("nd", Name::Type::UnicodeProperty);
     67            Name * alpha = makeName("alphabetic", Name::Type::UnicodeProperty);
     68            property->setDefinition(makeAlt({digit, alpha}));
     69        }
     70        else if (value == "blank") {
     71            Name * space_sep = makeName("space_separator", Name::Type::UnicodeProperty);
     72            CC * tab = makeCC(0x09);
     73            property->setDefinition(makeAlt({space_sep, tab}));
     74        }
     75        else if (value == "graph") {
     76            Name * space = makeName("space", Name::Type::UnicodeProperty);
     77            Name * ctrl = makeName("control", Name::Type::UnicodeProperty);
     78            Name * surr = makeName("surrogate", Name::Type::UnicodeProperty);
     79            Name * unassigned = makeName("cn", Name::Type::UnicodeProperty);
     80            property->setDefinition(makeDiff(makeAny(), makeAlt({space, ctrl, surr, unassigned})));
     81        }
     82        else if (value == "print") {
     83            Name * graph = makeName("graph", Name::Type::UnicodeProperty);
     84            Name * space_sep = makeName("space_separator", Name::Type::UnicodeProperty);
     85            property->setDefinition(makeAlt({graph, space_sep}));
     86        }
     87        else if (value == "word") {
     88            Name * alnum = makeName("alnum", Name::Type::UnicodeProperty);
     89            Name * mark = makeName("mark", Name::Type::UnicodeProperty);
     90            Name * conn = makeName("connectorpunctuation", Name::Type::UnicodeProperty);
     91            Name * join = makeName("joincontrol", Name::Type::UnicodeProperty);
     92            property->setDefinition(makeAlt({alnum, mark, conn, join}));
     93        }
    3494    }
    35 
    36     Name * property = makeName(prop, value, Name::Type::UnicodeProperty);
    37 
    38     auto theprop = propit->second;
    39    
    40    
    41     if (EnumeratedPropertyObject * p = dyn_cast<EnumeratedPropertyObject>(property_object_table[theprop])){
    42         int valcode = p->GetPropertyValueEnumCode(value);
    43         if (valcode < 0) {
    44             throw UnicodePropertyExpressionError("Erroneous property value '" + value + "' for " + property_full_name[theprop] + " property");
    45         }
    46         property->setFunctionName("__get_" + property_enum_name[theprop] + "_" + p->GetValueEnumName(valcode));
    47     }
    48     else if (theprop == scx) {
    49         // Script extension property identified
    50         int valcode = GetPropertyValueEnumCode(sc, value);
    51         if (valcode < 0) {
    52             throw UnicodePropertyExpressionError("Erroneous property value for script_extension property");
    53         }
    54         property->setFunctionName("__get_scx_" + SC_ns::enum_names[valcode]);
    55     }
    56     else if (isa<BinaryPropertyObject>(property_object_table[theprop])){
    57         auto valit = Binary_ns::aliases_only_map.find(value);
    58         if (valit == Binary_ns::aliases_only_map.end()) {
    59             throw UnicodePropertyExpressionError("Erroneous property value for binary property " + property_full_name[theprop]);
    60         }
    61         if (valit->second == Binary_ns::Y) {
    62             property->setFunctionName("__get_" + property_enum_name[theprop] + "_Y");
     95    return property->getDefinition();
     96}
     97
     98std::string resolvePropertyFunction(Name * const property) {
     99    const std::string value = property->getName();
     100    std::string functionName;
     101    if (property->hasNamespace()) {
     102        auto propit = alias_map.find(property->getNamespace());
     103        if (propit == alias_map.end()) {
     104            throw UnicodePropertyExpressionError("Expected a property name but '" + property->getNamespace() + "' was found instead");
     105        }
     106        auto theprop = propit->second;
     107        if (EnumeratedPropertyObject * p = dyn_cast<EnumeratedPropertyObject>(property_object_table[theprop])){
     108            int valcode = p->GetPropertyValueEnumCode(value);
     109            if (valcode < 0) {
     110                throw UnicodePropertyExpressionError("Erroneous property value '" + value + "' for " + property_full_name[theprop] + " property");
     111            }
     112            functionName = "__get_" + property_enum_name[theprop] + "_" + p->GetValueEnumName(valcode);
     113        }
     114        else if (theprop == scx) {
     115            // Script extension property identified
     116            int valcode = GetPropertyValueEnumCode(sc, value);
     117            if (valcode < 0) {
     118                throw UnicodePropertyExpressionError("Erroneous property value for script_extension property");
     119            }
     120            functionName = "__get_scx_" + SC_ns::enum_names[valcode];
     121        }
     122        else if (isa<BinaryPropertyObject>(property_object_table[theprop])){
     123            auto valit = Binary_ns::aliases_only_map.find(value);
     124            if (valit == Binary_ns::aliases_only_map.end()) {
     125                throw UnicodePropertyExpressionError("Erroneous property value for binary property " + property_full_name[theprop]);
     126            }
     127            if (valit->second == Binary_ns::Y) {
     128                functionName = "__get_" + property_enum_name[theprop] + "_Y";
     129            } else {
     130                throw UnicodePropertyExpressionError("Unexpected property value for binary property " + property_full_name[theprop]);
     131            }
    63132        }
    64133        else {
    65             Name * binprop = parser->createName("__get_" + property_enum_name[theprop] + "_Y");
    66             property->setDefinition(makeDiff(makeAny(), binprop));
    67         }
    68     }
    69     else {
    70         throw UnicodePropertyExpressionError("Property " + property_full_name[theprop] + " recognized but not supported in icgrep 1.0");
    71     }
    72     return property;
    73 }
    74 
    75 Name * resolveProperty(const std::string value, re::RE_Parser * parser) {
    76 
    77     // No namespace (property) name.
    78 
    79     Name * property = makeName(value, Name::Type::UnicodeProperty);
    80 
    81     // Try special cases of Unicode TR #18
    82     if (value == "any") {
    83         property->setDefinition(makeAny());
    84     }
    85     else if (value == "ascii") {
    86         property->setDefinition(parser->createName("blk", "ascii"));
    87     }
    88     else if (value == "assigned") {
    89         Name * unassigned = parser->createName("cn");
    90         property->setDefinition(makeDiff(makeAny(), unassigned));
    91     }
    92     // Now compatibility properties of UTR #18 Annex C
    93     else if (value == "xdigit") {
    94         Name * digit = parser->createName("nd");
    95         Name * hexdigit = parser->createName("hexdigit");
    96         property->setDefinition(makeAlt({digit, hexdigit}));
    97     }
    98     else if (value == "alnum") {
    99         Name * digit = parser->createName("nd");
    100         Name * alpha = parser->createName("alphabetic");
    101         property->setDefinition(makeAlt({digit, alpha}));
    102     }
    103     else if (value == "blank") {
    104         Name * space_sep = parser->createName("space_separator");
    105         CC * tab = makeCC(0x09);
    106         property->setDefinition(makeAlt({space_sep, tab}));
    107     }
    108     else if (value == "graph") {
    109         Name * space = parser->createName("space");
    110         Name * ctrl = parser->createName("control");
    111         Name * surr = parser->createName("surrogate");
    112         Name * unassigned = parser->createName("cn");
    113         property->setDefinition(makeDiff(makeAny(), makeAlt({space, ctrl, surr, unassigned})));
    114     }
    115     else if (value == "print") {
    116         Name * graph = parser->createName("graph");
    117         Name * space_sep = parser->createName("space_separator");
    118         property->setDefinition(makeAlt({graph, space_sep}));
    119     }
    120     else if (value == "word") {
    121         Name * alnum = parser->createName("alnum");
    122         Name * mark = parser->createName("mark");
    123         Name * conn = parser->createName("connectorpunctuation");
    124         Name * join = parser->createName("joincontrol");
    125         property->setDefinition(makeAlt({alnum, mark, conn, join}));
    126     }
    127     else { // Try as a general category, script or binary property.
     134            throw UnicodePropertyExpressionError("Property " + property_full_name[theprop] + " recognized but not supported in icgrep 1.0");
     135        }
     136    } else { // No namespace (property) name.
     137        // Try as a general category, script or binary property.
    128138        int valcode;
    129139        if ((valcode = GetPropertyValueEnumCode(gc, value)) >= 0) {
    130             property->setFunctionName("__get_gc_" + GC_ns::enum_names[valcode]);
     140            functionName = "__get_gc_" + GC_ns::enum_names[valcode];
    131141        }
    132142        else if ((valcode = GetPropertyValueEnumCode(sc, value)) >= 0) {
    133             property->setFunctionName("__get_sc_" + SC_ns::enum_names[valcode]);
     143            functionName = "__get_sc_" + SC_ns::enum_names[valcode];
    134144        }
    135145        else { // Try as a binary property.
     
    138148                auto theprop = propit->second;
    139149                if (isa<BinaryPropertyObject>(property_object_table[theprop])) {
    140                     property->setFunctionName("__get_" + property_enum_name[theprop] + "_Y");
     150                    functionName = "__get_" + property_enum_name[theprop] + "_Y";
    141151                }
    142152                else {
     
    149159        }
    150160    }
    151     return property;
     161    assert (functionName.length() > 0);
     162    return std::move(functionName);
    152163}
    153164
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.h

    r4737 r4809  
    55
    66namespace re {
    7     class Name;
    8     class RE_Parser;
     7    class RE;
     8    class Name;   
    99}
    1010
     
    1919    const std::string _msg;
    2020};
    21 re::Name * resolveProperty(const std::string value, re::RE_Parser * parser);
    22 re::Name * resolveProperty(const std::string prop, const std::string value, re::RE_Parser * parser);
     21
     22re::RE * resolvePropertyDefinition(re::Name * const property);
     23std::string resolvePropertyFunction(re::Name * const property);
    2324UCD::UnicodeSet resolveUnicodeSet(re::Name * const name);
    2425
Note: See TracChangeset for help on using the changeset viewer.