Ignore:
Timestamp:
Jul 15, 2015, 12:30:25 PM (4 years ago)
Author:
nmedfort
Message:

Moved resolveProperty responsibilities out of RE_Parser but kept expansion of Name objects with definitions in it.

Location:
icGREP/icgrep-devel/icgrep
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r4671 r4673  
    1717#include <re/re_start.h>
    1818#include <re/re_end.h>
     19#include <re/re_parser.h>
    1920#include <cc/cc_namemap.hpp>
    2021#include "UCD/PropertyAliases.h"
     
    4748}
    4849
    49 void resolveProperty(Name * name) {
    50     const std::string prop = canonicalize_value_name(name->getNamespace());
    51     const std::string value = canonicalize_value_name(name->getName());
    52     if (prop.length() != 0) {
    53         auto propit = alias_map.find(prop);
    54         if (propit == alias_map.end()) {
    55             throw UnicodePropertyExpressionError("Expected a property name, but '" + name->getNamespace() + "' found instead");
    56         }
    57         auto theprop = propit->second;
    58         if (theprop == gc) {
    59             // General Category
    60             int valcode = GetPropertyValueEnumCode(gc, value);
    61             if (valcode < 0) {
    62                 throw UnicodePropertyExpressionError("Erroneous property value for general_category property");
    63             }
    64             name->setFunctionName("__get_gc_" + GC_ns::enum_names[valcode]);
    65         }
    66         else if (theprop == sc) {
    67             // Script property identified
    68             int valcode = GetPropertyValueEnumCode(sc, value);
    69             if (valcode < 0) {
    70                 throw UnicodePropertyExpressionError("Erroneous property value for script property");
    71             }
    72             name->setFunctionName("__get_sc_" + SC_ns::enum_names[valcode]);
    73         }
    74         else if (theprop == scx) {
    75             // Script extension property identified
    76             int valcode = GetPropertyValueEnumCode(sc, value);
    77             if (valcode < 0) {
    78                 throw UnicodePropertyExpressionError("Erroneous property value for script_extension property");
    79             }
    80             name->setFunctionName("__get_scx_" + SC_ns::enum_names[valcode]);
    81         }
    82         else if (theprop == blk) {
    83             // Block property identified
    84             int valcode = GetPropertyValueEnumCode(blk, value);
    85             if (valcode < 0) {
    86                  throw UnicodePropertyExpressionError("Erroneous property value for block property");
    87             }
    88             name->setFunctionName("__get_blk_" + BLK_ns::enum_names[valcode]);
    89         }
    90         else if (isa<BinaryPropertyObject>(property_object_table[theprop])){
    91             auto valit = Binary_ns::aliases_only_map.find(value);
    92             if (valit == Binary_ns::aliases_only_map.end()) {
    93                 throw UnicodePropertyExpressionError("Erroneous property value for binary property " + property_full_name[theprop]);
    94             }
    95             if (valit->second == Binary_ns::Y) {
    96                 name->setFunctionName("__get_" + lowercase(property_enum_name[theprop]) + "_Y");
     50namespace UCD {
     51
     52Name * resolveProperty(const std::string prop, const std::string value, re::RE_Parser * parser) {
     53    auto propit = alias_map.find(prop);
     54    if (propit == alias_map.end()) {
     55        throw UnicodePropertyExpressionError("Expected a property name but '" + prop + "' was found instead");
     56    }
     57
     58    Name * property = makeName(prop, value, Name::Type::UnicodeProperty);
     59
     60    auto theprop = propit->second;
     61    if (theprop == gc) {
     62        // General Category
     63        int valcode = GetPropertyValueEnumCode(gc, value);
     64        if (valcode < 0) {
     65            throw UnicodePropertyExpressionError("Erroneous property value for general_category property");
     66        }
     67        property->setFunctionName("__get_gc_" + GC_ns::enum_names[valcode]);
     68    }
     69    else if (theprop == sc) {
     70        // Script property identified
     71        int valcode = GetPropertyValueEnumCode(sc, value);
     72        if (valcode < 0) {
     73            throw UnicodePropertyExpressionError("Erroneous property value for script property");
     74        }
     75        property->setFunctionName("__get_sc_" + SC_ns::enum_names[valcode]);
     76    }
     77    else if (theprop == scx) {
     78        // Script extension property identified
     79        int valcode = GetPropertyValueEnumCode(sc, value);
     80        if (valcode < 0) {
     81            throw UnicodePropertyExpressionError("Erroneous property value for script_extension property");
     82        }
     83        property->setFunctionName("__get_scx_" + SC_ns::enum_names[valcode]);
     84    }
     85    else if (theprop == blk) {
     86        // Block property identified
     87        int valcode = GetPropertyValueEnumCode(blk, value);
     88        if (valcode < 0) {
     89             throw UnicodePropertyExpressionError("Erroneous property value for block property");
     90        }
     91        property->setFunctionName("__get_blk_" + BLK_ns::enum_names[valcode]);
     92    }
     93    else if (isa<BinaryPropertyObject>(property_object_table[theprop])){
     94        auto valit = Binary_ns::aliases_only_map.find(value);
     95        if (valit == Binary_ns::aliases_only_map.end()) {
     96            throw UnicodePropertyExpressionError("Erroneous property value for binary property " + property_full_name[theprop]);
     97        }
     98        if (valit->second == Binary_ns::Y) {
     99            property->setFunctionName("__get_" + lowercase(property_enum_name[theprop]) + "_Y");
     100        }
     101        else {
     102            Name * binprop = parser->createName("__get_" + lowercase(property_enum_name[theprop]) + "_Y");
     103            property->setDefinition(makeDiff(makeAny(), binprop));
     104        }
     105    }
     106    else {
     107        throw UnicodePropertyExpressionError("Property " + property_full_name[theprop] + " recognized but not supported in icgrep 1.0");
     108    }
     109
     110    return property;
     111}
     112
     113Name * resolveProperty(const std::string value, re::RE_Parser * parser) {
     114
     115    // No namespace (property) name.
     116
     117    Name * property = makeName(value, Name::Type::UnicodeProperty);
     118
     119    // Try special cases of Unicode TR #18
     120    if (value == "any") {
     121        property->setDefinition(makeAny());
     122    }
     123    else if (value == "ascii") {
     124        property->setDefinition(parser->createName("blk", "ascii"));
     125    }
     126    else if (value == "assigned") {
     127        Name * unassigned = parser->createName("cn");
     128        property->setDefinition(makeDiff(makeAny(), unassigned));
     129    }
     130    // Now compatibility properties of UTR #18 Annex C
     131    else if (value == "xdigit") {
     132        Name * digit = parser->createName("nd");
     133        Name * hexdigit = parser->createName("hexdigit");
     134        property->setDefinition(makeAlt({digit, hexdigit}));
     135    }
     136    else if (value == "alnum") {
     137        Name * digit = parser->createName("nd");
     138        Name * alpha = parser->createName("alphabetic");
     139        property->setDefinition(makeAlt({digit, alpha}));
     140    }
     141    else if (value == "blank") {
     142        Name * space_sep = parser->createName("space_separator");
     143        CC * tab = makeCC(0x09);
     144        property->setDefinition(makeAlt({space_sep, tab}));
     145    }
     146    else if (value == "graph") {
     147        Name * space = parser->createName("space");
     148        Name * ctrl = parser->createName("control");
     149        Name * surr = parser->createName("surrogate");
     150        Name * unassigned = parser->createName("cn");
     151        property->setDefinition(makeDiff(makeAny(), makeAlt({space, ctrl, surr, unassigned})));
     152    }
     153    else if (value == "print") {
     154        Name * graph = parser->createName("graph");
     155        Name * space_sep = parser->createName("space_separator");
     156        property->setDefinition(makeAlt({graph, space_sep}));
     157    }
     158    else if (value == "word") {
     159        Name * alnum = parser->createName("alnum");
     160        Name * mark = parser->createName("mark");
     161        Name * conn = parser->createName("connectorpunctuation");
     162        Name * join = parser->createName("joincontrol");
     163        property->setDefinition(makeAlt({alnum, mark, conn, join}));
     164    }
     165    else { // Try as a general category, script or binary property.
     166        int valcode;
     167        if ((valcode = GetPropertyValueEnumCode(gc, value)) >= 0) {
     168            property->setFunctionName("__get_gc_" + GC_ns::enum_names[valcode]);
     169        }
     170        else if ((valcode = GetPropertyValueEnumCode(sc, value)) >= 0) {
     171            property->setFunctionName("__get_sc_" + SC_ns::enum_names[valcode]);
     172        }
     173        else { // Try as a binary property.
     174            auto propit = alias_map.find(value);
     175            if (propit != alias_map.end()) {
     176                auto theprop = propit->second;
     177                if (isa<BinaryPropertyObject>(property_object_table[theprop])) {
     178                    property->setFunctionName("__get_" + lowercase(property_enum_name[theprop]) + "_Y");
     179                }
     180                else {
     181                    throw UnicodePropertyExpressionError("Error: property " + property_full_name[theprop] + " specified without a value");
     182                }
    97183            }
    98184            else {
    99                 Name * binprop = makeName("__get_" + lowercase(property_enum_name[theprop]) + "_Y", Name::Type::UnicodeProperty);
    100                 name->setDefinition(makeDiff(makeAny(), binprop));
    101             }
    102         }
    103         else {
    104             throw UnicodePropertyExpressionError("Property " + property_full_name[theprop] + " recognized, but not supported in icgrep 1.0");
    105         }
    106     }
    107     else {
    108 
    109         // No namespace (property) name.   Try as a general category.
    110 
    111         int valcode;
    112 
    113         if ((valcode = GetPropertyValueEnumCode(gc, value)) >= 0) {
    114             name->setFunctionName("__get_gc_" + GC_ns::enum_names[valcode]);
    115             return;
    116         }
    117 
    118         if ((valcode = GetPropertyValueEnumCode(sc, value)) >= 0) {
    119             name->setFunctionName("__get_sc_" + SC_ns::enum_names[valcode]);
    120             return;
    121         }
    122 
    123         // Try as a binary property.
    124         auto propit = alias_map.find(value);
    125         if (propit != alias_map.end()) {
    126             auto theprop = propit->second;
    127             if (isa<BinaryPropertyObject>(property_object_table[theprop])) {
    128                 name->setFunctionName("__get_" + lowercase(property_enum_name[theprop]) + "_Y");
    129             }
    130             else {
    131                 throw UnicodePropertyExpressionError("Error: property " + property_full_name[theprop] + " specified without a value");
    132             }
    133         }
    134         else {
    135             throw UnicodePropertyExpressionError("Expected a general category, script or binary property name, but '" + name->getName() + "' found instead");
    136         }
    137     }
     185                throw UnicodePropertyExpressionError("Expected a general category, script or binary property name but '" + value + "' was found instead");
     186            }
     187        }
     188    }
     189    return property;
    138190}
    139191
     
    199251    throw UnicodePropertyExpressionError("Expected a general category, script or binary property name, but '" + name->getName() + "' found instead");
    200252}
     253
     254}
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.h

    r4660 r4673  
    66namespace re {
    77    class Name;
     8    class RE_Parser;
    89}
    910
    10 void resolveProperty(re::Name * const name);
     11namespace UCD {
     12
     13re::Name * resolveProperty(const std::string value, re::RE_Parser * parser);
     14re::Name * resolveProperty(const std::string prop, const std::string value, re::RE_Parser * parser);
    1115UCD::UnicodeSet resolveUnicodeSet(re::Name * const name);
    1216
     17}
     18
    1319#endif // RESOLVE_PROPERTIES_H
  • icGREP/icgrep-devel/icgrep/cc/cc_namemap.cpp

    r4671 r4673  
    4747        }
    4848        else {
    49 
    5049            std::string classname = name->getName();
    5150            auto f = mNameMap.find(classname);
    5251            if (f != mNameMap.end()) {
    5352                return f->second;
    54             }
    55             insert(std::move(classname), name);
    56             if (name->getType() == Name::Type::UnicodeProperty) {
    57                 resolveProperty(name);
    58                 RE * def = name->getDefinition();
    59                 if (def) {
    60                     name->setDefinition(process(def, CC_type::UnicodeClass));
    61                 }
    62             }
    63 
    64             return name;
     53            }           
     54            return insert(std::move(classname), name);
    6555        }
    6656    }
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4660 r4673  
    279279        }
    280280        else {
    281             var = mUCDCompiler.generateWithDefaultIfHierarchy(resolveUnicodeSet(name), pb);
     281            var = mUCDCompiler.generateWithDefaultIfHierarchy(UCD::resolveUnicodeSet(name), pb);
    282282        }
    283283    }
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r4671 r4673  
    1616#include <re/re_assertion.h>
    1717#include <re/parsefailure.h>
     18#include <UCD/resolve_properties.h>
    1819#include <UCD/CaseFolding_txt.h>
    1920#include <sstream>
     
    486487        }
    487488        // We have a property-name = value expression
    488         return resolvePropertyExpression(canonicalize(start, prop_end), canonicalize(val_start, _cursor));
    489     }
    490     return resolvePropertyExpression(canonicalize(start, _cursor));
    491 }
    492 
    493 Name * RE_Parser::resolvePropertyExpression(std::string value) {
     489        return createName(canonicalize(start, prop_end), canonicalize(val_start, _cursor));
     490    }
     491    return createName(canonicalize(start, _cursor));
     492}
     493
     494Name * RE_Parser::createName(const std::string value) {
    494495
    495496    auto key = std::make_pair("", value);
     
    499500    }
    500501
    501     Name * property = makeName(value, Name::Type::UnicodeProperty);
    502 
    503     // Try special cases of Unicode TR #18
    504     if (value == "any") {
    505         property->setDefinition(makeAny());
    506     }
    507     else if (value == "ascii") {
    508         property->setDefinition(resolvePropertyExpression("blk", "ascii"));
    509     }
    510     else if (value == "assigned") {
    511         Name * unassigned = resolvePropertyExpression("cn");
    512         property->setDefinition(makeDiff(makeAny(), unassigned));
    513     }
    514     // Now compatibility properties of UTR #18 Annex C
    515     else if (value == "xdigit") {
    516         Name * digit = resolvePropertyExpression("nd");
    517         Name * hexdigit = resolvePropertyExpression("hexdigit");
    518         property->setDefinition(makeAlt({digit, hexdigit}));
    519     }
    520     else if (value == "alnum") {
    521         Name * digit = resolvePropertyExpression("nd");
    522         Name * alpha = resolvePropertyExpression("alphabetic");
    523         property->setDefinition(makeAlt({digit, alpha}));
    524     }
    525     else if (value == "blank") {
    526         Name * space_sep = resolvePropertyExpression("space_separator");
    527         CC * tab = makeCC(0x09);
    528         property->setDefinition(makeAlt({space_sep, tab}));
    529     }
    530     else if (value == "graph") {
    531         Name * space = resolvePropertyExpression("space");
    532         Name * ctrl = resolvePropertyExpression("control");
    533         Name * surr = resolvePropertyExpression("surrogate");
    534         Name * unassigned = resolvePropertyExpression("cn");
    535         property->setDefinition(makeDiff(makeAny(), makeAlt({space, ctrl, surr, unassigned})));
    536     }
    537     else if (value == "print") {
    538         Name * graph = resolvePropertyExpression("graph");
    539         Name * space_sep = resolvePropertyExpression("space_separator");
    540         property->setDefinition(makeAlt({graph, space_sep}));
    541     }
    542     else if (value == "word") {
    543         Name * alnum = resolvePropertyExpression("alnum");
    544         Name * mark = resolvePropertyExpression("mark");
    545         Name * conn = resolvePropertyExpression("connectorpunctuation");
    546         Name * join = resolvePropertyExpression("joincontrol");
    547         property->setDefinition(makeAlt({alnum, mark, conn, join}));
    548     }
    549 
    550     mNameMap.emplace(std::move(key), property);
     502    Name * property = UCD::resolveProperty(value, this);
     503
     504    mNameMap.insert(std::make_pair(std::move(key), property));
    551505
    552506    return property;
    553507}
    554508
    555 Name * RE_Parser::resolvePropertyExpression(std::string namespaceValue, std::string nameValue) {
    556 
    557     auto key = std::make_pair(namespaceValue, nameValue);
     509Name * RE_Parser::createName(const std::string prop, const std::string value) {
     510
     511    auto key = std::make_pair(prop, value);
    558512
    559513    auto f = mNameMap.find(key);
     
    562516    }
    563517
    564 
    565 
    566     Name * property = makeName(namespaceValue, nameValue, Name::Type::UnicodeProperty);
    567 
    568     mNameMap.emplace(std::move(key), property);
     518    Name * property = UCD::resolveProperty(prop, value, this);
     519
     520    mNameMap.insert(std::make_pair(std::move(key), property));
    569521
    570522    return property;
     
    969921
    970922inline Name * RE_Parser::makeDigitSet() {
    971     return resolvePropertyExpression("nd");
     923    return createName("nd");
    972924}
    973925
    974926inline Name * RE_Parser::makeAlphaNumeric() {
    975     return resolvePropertyExpression("alnum");
     927    return createName("alnum");
    976928}
    977929
    978930inline Name * RE_Parser::makeWhitespaceSet() {
    979     return resolvePropertyExpression("whitespace");
     931    return createName("whitespace");
    980932}
    981933
    982934inline Name * RE_Parser::makeWordSet() {
    983     return resolvePropertyExpression("word");
    984 }
    985 
    986 }
     935    return createName("word");
     936}
     937
     938}
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r4671 r4673  
    88#define RE_PARSER_H
    99
    10 #include "re_re.h"
    11 #include "re_any.h"
    12 #include "re_name.h"
    13 
     10#include <re/re_re.h>
     11#include <re/re_any.h>
     12#include <re/re_name.h>
     13#include <UCD/resolve_properties.h>
    1414#include <string>
    1515#include <list>
     
    1717#include <map>
    1818
     19
    1920namespace re {
    20        
     21
    2122enum CharsetOperatorKind
    2223        {intersectOp, setDiffOp, ampChar, hyphenChar, rangeHyphen, posixPropertyOpener, setOpener, setCloser, backSlash, emptyOperator};
     
    3738{
    3839public:
     40
     41    friend Name * UCD::resolveProperty(const std::string, RE_Parser *);
     42    friend Name * UCD::resolveProperty(const std::string, const std::string, RE_Parser *);
    3943
    4044    static RE * parse(const std::string &input_string, ModeFlagSet initialFlags);
     
    7579       
    7680    RE * makeComplement(RE * s);
    77     RE * makeWordBoundary ();
    78     RE * makeWordNonBoundary ();
     81    RE * makeWordBoundary();
     82    RE * makeWordNonBoundary();
    7983    Name * makeDigitSet();
    8084    Name * makeAlphaNumeric();
    8185    Name * makeWhitespaceSet();
    8286    Name * makeWordSet();
    83     Name * resolvePropertyExpression(std::string nameValue);
    8487
    85     Name * resolvePropertyExpression(std::string namespaceValue, std::string nameValue);
     88    Name * createName(const std::string value);
     89    Name * createName(const std::string prop, const std::string value);
    8690
    8791        CharsetOperatorKind getCharsetOperator();
Note: See TracChangeset for help on using the changeset viewer.