- Timestamp:
- Oct 5, 2017, 7:56:16 AM (17 months ago)
- Location:
- icGREP/icgrep-devel/icgrep/UCD
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp
r5663 r5667 1 1 /* 2 * Copyright (c) 201 4International Characters, Inc.2 * Copyright (c) 2017 International Characters, Inc. 3 3 * This software is licensed to the public under the Open Software License 3.0. 4 4 * icgrep is a trademark of International Characters, Inc. … … 14 14 #include <llvm/Support/raw_ostream.h> 15 15 #include <llvm/Support/ErrorHandling.h> 16 #include <uchar.h>17 18 16 using namespace llvm; 19 17 … … 32 30 } 33 31 34 int PropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {35 llvm::report_fatal_error("Property " + value_spec + " unsupported.");36 }37 32 const std::string & PropertyObject::GetPropertyValueGrepString() { 38 33 llvm::report_fatal_error("Property Value Grep String unsupported."); 39 34 } 40 35 41 const UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const std::string &) { 42 llvm::report_fatal_error("Property " + UCD::property_full_name[the_property] + " unsupported."); 43 } 44 45 UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const int) { 36 const UnicodeSet PropertyObject::GetCodepointSet(const std::string &) { 46 37 llvm::report_fatal_error("Property " + UCD::property_full_name[the_property] + " unsupported."); 47 38 } … … 146 137 147 138 int ExtensionPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) { 148 return property_object_table[base_property]->GetPropertyValueEnumCode(value_spec);139 return cast<EnumeratedPropertyObject>(property_object_table[base_property])->GetPropertyValueEnumCode(value_spec); 149 140 } 150 141 … … 187 178 } 188 179 189 190 180 const unsigned firstCodepointLengthAndVal(const std::string & s, codepoint_t & cp) { 181 size_t lgth = s.length(); 182 if (lgth == 0) return 0; 183 unsigned char s0 = s[0]; 184 cp = static_cast<codepoint_t>(s0); 185 if (s0 < 0x80) return 1; 186 if (lgth == 1) return 0; // invalid UTF-8 187 cp = ((cp & 0x1F) << 6) | (s[1] & 0x3F); 188 if ((s0 >= 0xC2) && (s0 <= 0xDF)) return 2; 189 if (lgth == 2) return 0; // invalid UTF-8 190 cp = ((cp & 0x3FFF) << 6) | (s[2] & 0x3F); 191 if ((s0 >= 0xE0) && (s0 <= 0xEF)) return 3; 192 if (lgth == 3) return 0; // invalid UTF-8 193 cp = ((cp & 0x7FFF) << 6) | (s[3] & 0x3F); 194 if ((s0 >= 0xF0) && (s0 <= 0xF4)) return 4; 195 return 0; 196 } 197 191 198 const UnicodeSet StringPropertyObject::GetCodepointSet(const std::string & value_spec) { 192 199 if (value_spec == "") return mNullCodepointSet; … … 194 201 UnicodeSet result_set; 195 202 unsigned val_bytes = value_spec.length(); 203 codepoint_t cp; 204 if (val_bytes == firstCodepointLengthAndVal(value_spec, cp)) { 205 if (mSelfCodepointSet.contains(cp)) { 206 result_set.insert(cp); 207 } 208 } 196 209 const char * value_str = value_spec.c_str(); 197 std::mbstate_t state{};198 char32_t c32;199 size_t cvtcode = mbrtoc32(&c32, value_str, val_bytes, &state);200 if (cvtcode == val_bytes) {201 // A single Unicode character. Check the reflexive set.202 if (mSelfCodepointSet.contains(static_cast<codepoint_t>(c32))) {203 result_set.insert(static_cast<codepoint_t>(c32));204 }205 }206 210 const char * search_str = mStringBuffer; 207 211 unsigned buffer_line = 0; -
icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.h
r5663 r5667 42 42 PropertyObject(property_t p, ClassTypeId k) : the_property(p), the_kind(k) {} 43 43 virtual const UnicodeSet GetCodepointSet(const std::string &); 44 virtual int GetPropertyValueEnumCode(const std::string & value_spec);45 44 virtual const std::string & GetPropertyValueGrepString(); 46 45 property_t the_property; … … 61 60 62 61 } 63 const UnicodeSet GetCodepointSet(const std::string &) override;64 UnicodeSet GetCodepointSet(const int);65 62 }; 66 63 … … 91 88 92 89 virtual int GetPropertyValueEnumCode(const std::string & value_spec); 93 virtual const std::string & GetPropertyValueGrepString();90 const std::string & GetPropertyValueGrepString() override; 94 91 const UnicodeSet GetCodepointSet(const std::string & value_spec) override; 95 92 const UnicodeSet & GetCodepointSet(const int property_enum_val) const; … … 140 137 iterator end() const; 141 138 142 virtualint GetPropertyValueEnumCode(const std::string & value_spec);143 virtual const std::string & GetPropertyValueGrepString();139 int GetPropertyValueEnumCode(const std::string & value_spec); 140 const std::string & GetPropertyValueGrepString() override; 144 141 const UnicodeSet GetCodepointSet(const std::string & value_spec) override; 145 142 const UnicodeSet & GetCodepointSet(const int property_enum_val) const; … … 167 164 const UnicodeSet GetCodepointSet(const std::string & value_spec) override; 168 165 const UnicodeSet & GetCodepointSet(const int property_enum_val); 169 virtual const std::string & GetPropertyValueGrepString();166 const std::string & GetPropertyValueGrepString() override; 170 167 private: 171 168 bool mNoUninitialized; -
icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp
r5663 r5667 24 24 using namespace llvm; 25 25 26 inline int GetPropertyValueEnumCode(const UCD::property_t type, const std::string & value) {27 return property_object_table[type]->GetPropertyValueEnumCode(value);28 }29 30 26 namespace UCD { 31 27 … … 119 115 } 120 116 121 std::string resolvePropertyFunction(Name * const property) {122 const std::string value = property->getName();123 std::string functionName;124 if (property->hasNamespace()) {125 auto propit = alias_map.find(property->getNamespace());126 if (propit == alias_map.end()) {127 UnicodePropertyExpressionError("Expected a property name but '" + property->getNamespace() + "' was found instead");128 }129 auto theprop = propit->second;130 if (EnumeratedPropertyObject * p = dyn_cast<EnumeratedPropertyObject>(property_object_table[theprop])){131 int valcode = p->GetPropertyValueEnumCode(value);132 if (valcode < 0) {133 UnicodePropertyExpressionError("Erroneous property value '" + value + "' for " + property_full_name[theprop] + " property");134 }135 functionName = "__get_" + property_enum_name[theprop] + "_" + p->GetValueEnumName(valcode);136 }137 else if (theprop == scx) {138 // Script extension property identified139 int valcode = GetPropertyValueEnumCode(sc, value);140 if (valcode < 0) {141 UnicodePropertyExpressionError("Erroneous property value for script_extension property");142 }143 functionName = "__get_scx_" + SC_ns::enum_names[valcode];144 }145 else if (isa<BinaryPropertyObject>(property_object_table[theprop])){146 auto valit = Binary_ns::aliases_only_map.find(value);147 if (valit == Binary_ns::aliases_only_map.end()) {148 UnicodePropertyExpressionError("Erroneous property value for binary property " + property_full_name[theprop]);149 }150 if (valit->second == Binary_ns::Y) {151 functionName = "__get_" + property_enum_name[theprop] + "_Y";152 } else {153 UnicodePropertyExpressionError("Unexpected property value for binary property " + property_full_name[theprop]);154 }155 }156 else {157 UnicodePropertyExpressionError("Property " + property_full_name[theprop] + " recognized but not supported in icgrep 1.0");158 }159 } else { // No namespace (property) name.160 // Try as a general category, script or binary property.161 int valcode;162 if ((valcode = GetPropertyValueEnumCode(gc, value)) >= 0) {163 functionName = "__get_gc_" + GC_ns::enum_names[valcode];164 }165 else if ((valcode = GetPropertyValueEnumCode(sc, value)) >= 0) {166 functionName = "__get_sc_" + SC_ns::enum_names[valcode];167 }168 else { // Try as a binary property.169 auto propit = alias_map.find(value);170 if (propit != alias_map.end()) {171 auto theprop = propit->second;172 if (isa<BinaryPropertyObject>(property_object_table[theprop])) {173 functionName = "__get_" + property_enum_name[theprop] + "_Y";174 }175 else {176 UnicodePropertyExpressionError("Error: property " + property_full_name[theprop] + " specified without a value");177 }178 }179 else {180 UnicodePropertyExpressionError("Expected a general category, script or binary property name but '" + value + "' was found instead");181 }182 }183 }184 assert (functionName.length() > 0);185 return functionName;186 }187 188 117 const std::string & getPropertyValueGrepString(const std::string & prop) { 189 118 auto propit = alias_map.find(canonicalize_value_name(prop)); … … 216 145 else { 217 146 // No namespace (property) name. Try as a general category. 218 int valcode = GetPropertyValueEnumCode(gc, value); 147 const auto & gcobj = cast<EnumeratedPropertyObject>(property_object_table[gc]); 148 int valcode = gcobj->GetPropertyValueEnumCode(value); 219 149 if (valcode >= 0) { 220 return cast<EnumeratedPropertyObject>(property_object_table[gc])->GetCodepointSet(valcode); 221 } 222 valcode = GetPropertyValueEnumCode(sc, value); 150 return gcobj->GetCodepointSet(valcode); 151 } 152 const auto & scObj = cast<EnumeratedPropertyObject>(property_object_table[sc]); 153 valcode = scObj->GetPropertyValueEnumCode(value); 223 154 if (valcode >= 0) { 224 return cast<EnumeratedPropertyObject>(property_object_table[sc])->GetCodepointSet(valcode);155 return scObj->GetCodepointSet(valcode); 225 156 } 226 157 // Try as a binary property.
Note: See TracChangeset
for help on using the changeset viewer.