Changeset 5667


Ignore:
Timestamp:
Oct 5, 2017, 7:56:16 AM (22 months ago)
Author:
cameron
Message:

Fix Mac OS compile bugs and some property object issues

Location:
icGREP/icgrep-devel/icgrep/UCD
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r5663 r5667  
    11/*
    2  *  Copyright (c) 2014 International Characters, Inc.
     2 *  Copyright (c) 2017 International Characters, Inc.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters, Inc.
     
    1414#include <llvm/Support/raw_ostream.h>
    1515#include <llvm/Support/ErrorHandling.h>
    16 #include <uchar.h>
    17 
    1816using namespace llvm;
    1917
     
    3230}
    3331
    34 int PropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
    35     llvm::report_fatal_error("Property " + value_spec + " unsupported.");
    36 }
    3732const std::string & PropertyObject::GetPropertyValueGrepString() {
    3833    llvm::report_fatal_error("Property Value Grep String unsupported.");
    3934}
    4035
    41 const UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const std::string &) {
    42     llvm::report_fatal_error("Property " + UCD::property_full_name[the_property] + " unsupported.");
    43 }
    44 
    45 UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const int) {
     36const UnicodeSet PropertyObject::GetCodepointSet(const std::string &) {
    4637    llvm::report_fatal_error("Property " + UCD::property_full_name[the_property] + " unsupported.");
    4738}
     
    146137
    147138int ExtensionPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
    148     return property_object_table[base_property]->GetPropertyValueEnumCode(value_spec);
     139    return cast<EnumeratedPropertyObject>(property_object_table[base_property])->GetPropertyValueEnumCode(value_spec);
    149140}
    150141
     
    187178}
    188179   
    189 
    190 
     180const unsigned firstCodepointLengthAndVal(const std::string & s, codepoint_t & cp) {
     181    size_t lgth = s.length();
     182    if (lgth == 0) return 0;
     183    unsigned char s0 = s[0];
     184    cp = static_cast<codepoint_t>(s0);
     185    if (s0 < 0x80) return 1;
     186    if (lgth == 1) return 0;  // invalid UTF-8
     187    cp = ((cp & 0x1F) << 6) | (s[1] & 0x3F);
     188    if ((s0 >= 0xC2) && (s0 <= 0xDF)) return 2;
     189    if (lgth == 2) return 0;  // invalid UTF-8
     190    cp = ((cp & 0x3FFF) << 6) | (s[2] & 0x3F);
     191    if ((s0 >= 0xE0) && (s0 <= 0xEF)) return 3;
     192    if (lgth == 3) return 0;  // invalid UTF-8
     193    cp = ((cp & 0x7FFF) << 6) | (s[3] & 0x3F);
     194    if ((s0 >= 0xF0) && (s0 <= 0xF4)) return 4;
     195    return 0;
     196}
     197   
    191198const UnicodeSet StringPropertyObject::GetCodepointSet(const std::string & value_spec) {
    192199    if (value_spec == "") return mNullCodepointSet;
     
    194201        UnicodeSet result_set;
    195202        unsigned val_bytes = value_spec.length();
     203        codepoint_t cp;
     204        if (val_bytes == firstCodepointLengthAndVal(value_spec, cp)) {
     205            if (mSelfCodepointSet.contains(cp)) {
     206                result_set.insert(cp);
     207            }
     208        }
    196209        const char * value_str = value_spec.c_str();
    197         std::mbstate_t state{};
    198         char32_t c32;
    199         size_t cvtcode = mbrtoc32(&c32, value_str, val_bytes, &state);
    200         if (cvtcode == val_bytes) {
    201             // A single Unicode character.  Check the reflexive set.
    202             if (mSelfCodepointSet.contains(static_cast<codepoint_t>(c32))) {
    203                 result_set.insert(static_cast<codepoint_t>(c32));
    204             }
    205         }
    206210        const char * search_str = mStringBuffer;
    207211        unsigned buffer_line = 0;
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.h

    r5663 r5667  
    4242    PropertyObject(property_t p, ClassTypeId k) : the_property(p), the_kind(k) {}
    4343    virtual const UnicodeSet GetCodepointSet(const std::string &);
    44     virtual int GetPropertyValueEnumCode(const std::string & value_spec);
    4544    virtual const std::string & GetPropertyValueGrepString();
    4645    property_t the_property;
     
    6160
    6261    }
    63     const UnicodeSet GetCodepointSet(const std::string &) override;
    64     UnicodeSet GetCodepointSet(const int);
    6562};
    6663
     
    9188
    9289    virtual int GetPropertyValueEnumCode(const std::string & value_spec);
    93     virtual const std::string & GetPropertyValueGrepString();
     90    const std::string & GetPropertyValueGrepString() override;
    9491    const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
    9592    const UnicodeSet & GetCodepointSet(const int property_enum_val) const;
     
    140137    iterator end() const;
    141138
    142     virtual int GetPropertyValueEnumCode(const std::string & value_spec);
    143     virtual const std::string & GetPropertyValueGrepString();
     139    int GetPropertyValueEnumCode(const std::string & value_spec);
     140    const std::string & GetPropertyValueGrepString() override;
    144141    const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
    145142    const UnicodeSet & GetCodepointSet(const int property_enum_val) const;
     
    167164    const UnicodeSet GetCodepointSet(const std::string & value_spec) override;
    168165    const UnicodeSet & GetCodepointSet(const int property_enum_val);
    169     virtual const std::string & GetPropertyValueGrepString();
     166    const std::string & GetPropertyValueGrepString() override;
    170167private:
    171168    bool mNoUninitialized;
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r5663 r5667  
    2424using namespace llvm;
    2525
    26 inline int GetPropertyValueEnumCode(const UCD::property_t type, const std::string & value) {
    27     return property_object_table[type]->GetPropertyValueEnumCode(value);
    28 }
    29 
    3026namespace UCD {
    3127   
     
    119115}
    120116
    121 std::string resolvePropertyFunction(Name * const property) {
    122     const std::string value = property->getName();
    123     std::string functionName;
    124     if (property->hasNamespace()) {
    125         auto propit = alias_map.find(property->getNamespace());
    126         if (propit == alias_map.end()) {
    127             UnicodePropertyExpressionError("Expected a property name but '" + property->getNamespace() + "' was found instead");
    128         }
    129         auto theprop = propit->second;
    130         if (EnumeratedPropertyObject * p = dyn_cast<EnumeratedPropertyObject>(property_object_table[theprop])){
    131             int valcode = p->GetPropertyValueEnumCode(value);
    132             if (valcode < 0) {
    133                 UnicodePropertyExpressionError("Erroneous property value '" + value + "' for " + property_full_name[theprop] + " property");
    134             }
    135             functionName = "__get_" + property_enum_name[theprop] + "_" + p->GetValueEnumName(valcode);
    136         }
    137         else if (theprop == scx) {
    138             // Script extension property identified
    139             int valcode = GetPropertyValueEnumCode(sc, value);
    140             if (valcode < 0) {
    141                 UnicodePropertyExpressionError("Erroneous property value for script_extension property");
    142             }
    143             functionName = "__get_scx_" + SC_ns::enum_names[valcode];
    144         }
    145         else if (isa<BinaryPropertyObject>(property_object_table[theprop])){
    146             auto valit = Binary_ns::aliases_only_map.find(value);
    147             if (valit == Binary_ns::aliases_only_map.end()) {
    148                 UnicodePropertyExpressionError("Erroneous property value for binary property " + property_full_name[theprop]);
    149             }
    150             if (valit->second == Binary_ns::Y) {
    151                 functionName = "__get_" + property_enum_name[theprop] + "_Y";
    152             } else {
    153                 UnicodePropertyExpressionError("Unexpected property value for binary property " + property_full_name[theprop]);
    154             }
    155         }
    156         else {
    157             UnicodePropertyExpressionError("Property " + property_full_name[theprop] + " recognized but not supported in icgrep 1.0");
    158         }
    159     } else { // No namespace (property) name.
    160         // Try as a general category, script or binary property.
    161         int valcode;
    162         if ((valcode = GetPropertyValueEnumCode(gc, value)) >= 0) {
    163             functionName = "__get_gc_" + GC_ns::enum_names[valcode];
    164         }
    165         else if ((valcode = GetPropertyValueEnumCode(sc, value)) >= 0) {
    166             functionName = "__get_sc_" + SC_ns::enum_names[valcode];
    167         }
    168         else { // Try as a binary property.
    169             auto propit = alias_map.find(value);
    170             if (propit != alias_map.end()) {
    171                 auto theprop = propit->second;
    172                 if (isa<BinaryPropertyObject>(property_object_table[theprop])) {
    173                     functionName = "__get_" + property_enum_name[theprop] + "_Y";
    174                 }
    175                 else {
    176                     UnicodePropertyExpressionError("Error: property " + property_full_name[theprop] + " specified without a value");
    177                 }
    178             }
    179             else {
    180                 UnicodePropertyExpressionError("Expected a general category, script or binary property name but '" + value + "' was found instead");
    181             }
    182         }
    183     }
    184     assert (functionName.length() > 0);
    185     return functionName;
    186 }
    187 
    188117const std::string & getPropertyValueGrepString(const std::string & prop) {
    189118    auto propit = alias_map.find(canonicalize_value_name(prop));
     
    216145        else {
    217146            // No namespace (property) name.   Try as a general category.
    218             int valcode = GetPropertyValueEnumCode(gc, value);
     147            const auto & gcobj = cast<EnumeratedPropertyObject>(property_object_table[gc]);
     148            int valcode = gcobj->GetPropertyValueEnumCode(value);
    219149            if (valcode >= 0) {
    220                 return cast<EnumeratedPropertyObject>(property_object_table[gc])->GetCodepointSet(valcode);
    221             }
    222             valcode = GetPropertyValueEnumCode(sc, value);
     150                return gcobj->GetCodepointSet(valcode);
     151            }
     152            const auto & scObj = cast<EnumeratedPropertyObject>(property_object_table[sc]);
     153            valcode = scObj->GetPropertyValueEnumCode(value);
    223154            if (valcode >= 0) {
    224                 return cast<EnumeratedPropertyObject>(property_object_table[sc])->GetCodepointSet(valcode);
     155                return scObj->GetCodepointSet(valcode);
    225156            }
    226157            // Try as a binary property.
Note: See TracChangeset for help on using the changeset viewer.