Ignore:
Timestamp:
Oct 5, 2017, 7:56:16 AM (2 years ago)
Author:
cameron
Message:

Fix Mac OS compile bugs and some property object issues

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp

    r5663 r5667  
    11/*
    2  *  Copyright (c) 2014 International Characters, Inc.
     2 *  Copyright (c) 2017 International Characters, Inc.
    33 *  This software is licensed to the public under the Open Software License 3.0.
    44 *  icgrep is a trademark of International Characters, Inc.
     
    1414#include <llvm/Support/raw_ostream.h>
    1515#include <llvm/Support/ErrorHandling.h>
    16 #include <uchar.h>
    17 
    1816using namespace llvm;
    1917
     
    3230}
    3331
    34 int PropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
    35     llvm::report_fatal_error("Property " + value_spec + " unsupported.");
    36 }
    3732const std::string & PropertyObject::GetPropertyValueGrepString() {
    3833    llvm::report_fatal_error("Property Value Grep String unsupported.");
    3934}
    4035
    41 const UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const std::string &) {
    42     llvm::report_fatal_error("Property " + UCD::property_full_name[the_property] + " unsupported.");
    43 }
    44 
    45 UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const int) {
     36const UnicodeSet PropertyObject::GetCodepointSet(const std::string &) {
    4637    llvm::report_fatal_error("Property " + UCD::property_full_name[the_property] + " unsupported.");
    4738}
     
    146137
    147138int ExtensionPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
    148     return property_object_table[base_property]->GetPropertyValueEnumCode(value_spec);
     139    return cast<EnumeratedPropertyObject>(property_object_table[base_property])->GetPropertyValueEnumCode(value_spec);
    149140}
    150141
     
    187178}
    188179   
    189 
    190 
     180const unsigned firstCodepointLengthAndVal(const std::string & s, codepoint_t & cp) {
     181    size_t lgth = s.length();
     182    if (lgth == 0) return 0;
     183    unsigned char s0 = s[0];
     184    cp = static_cast<codepoint_t>(s0);
     185    if (s0 < 0x80) return 1;
     186    if (lgth == 1) return 0;  // invalid UTF-8
     187    cp = ((cp & 0x1F) << 6) | (s[1] & 0x3F);
     188    if ((s0 >= 0xC2) && (s0 <= 0xDF)) return 2;
     189    if (lgth == 2) return 0;  // invalid UTF-8
     190    cp = ((cp & 0x3FFF) << 6) | (s[2] & 0x3F);
     191    if ((s0 >= 0xE0) && (s0 <= 0xEF)) return 3;
     192    if (lgth == 3) return 0;  // invalid UTF-8
     193    cp = ((cp & 0x7FFF) << 6) | (s[3] & 0x3F);
     194    if ((s0 >= 0xF0) && (s0 <= 0xF4)) return 4;
     195    return 0;
     196}
     197   
    191198const UnicodeSet StringPropertyObject::GetCodepointSet(const std::string & value_spec) {
    192199    if (value_spec == "") return mNullCodepointSet;
     
    194201        UnicodeSet result_set;
    195202        unsigned val_bytes = value_spec.length();
     203        codepoint_t cp;
     204        if (val_bytes == firstCodepointLengthAndVal(value_spec, cp)) {
     205            if (mSelfCodepointSet.contains(cp)) {
     206                result_set.insert(cp);
     207            }
     208        }
    196209        const char * value_str = value_spec.c_str();
    197         std::mbstate_t state{};
    198         char32_t c32;
    199         size_t cvtcode = mbrtoc32(&c32, value_str, val_bytes, &state);
    200         if (cvtcode == val_bytes) {
    201             // A single Unicode character.  Check the reflexive set.
    202             if (mSelfCodepointSet.contains(static_cast<codepoint_t>(c32))) {
    203                 result_set.insert(static_cast<codepoint_t>(c32));
    204             }
    205         }
    206210        const char * search_str = mStringBuffer;
    207211        unsigned buffer_line = 0;
Note: See TracChangeset for help on using the changeset viewer.