source: icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp @ 5233

Last change on this file since 5233 was 5233, checked in by nmedfort, 2 years ago

Bug fixes for Carry Manager and issues reported by Fahad

File size: 6.7 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters, Inc.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters, Inc.
5 *
6 */
7
8#include "PropertyObjects.h"
9#include "PropertyObjectTable.h"
10#include <sstream>
11#include <algorithm>
12#include <assert.h>
13#include <llvm/Support/Casting.h>
14
15using namespace llvm;
16
17namespace UCD {
18
19std::string canonicalize_value_name(const std::string & prop_or_val) {
20    std::locale loc;
21    std::stringstream s;
22    for (char c : prop_or_val) {
23        if ((c != '_') && (c != ' ') && (c != '-')) {
24            s << std::tolower(c, loc);
25        }
26    }
27    return s.str();
28}
29
30int PropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
31    throw std::runtime_error("Property " + value_spec + " unsupported.");
32}
33const std::string& PropertyObject::GetPropertyValueGrepString() {
34    throw std::runtime_error("Property Value Grep String unsupported.");
35}
36
37UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const std::string &) {
38    throw std::runtime_error("Property " + UCD::property_full_name[the_property] + " unsupported.");
39}
40
41UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const int) {
42    throw std::runtime_error("Property " + UCD::property_full_name[the_property] + " unsupported.");
43}
44
45const UnicodeSet & EnumeratedPropertyObject::GetCodepointSet(const std::string & value_spec) {
46    int property_enum_val = GetPropertyValueEnumCode(value_spec);
47    if (property_enum_val == -1) {
48        throw std::runtime_error("Enumerated Property " + UCD::property_full_name[the_property] +  ": unknown value: " + value_spec);
49    }
50    return GetCodepointSet(property_enum_val);
51}
52
53const UnicodeSet & EnumeratedPropertyObject::GetCodepointSet(const int property_enum_val) const {
54    assert (property_enum_val >= 0);
55    return *(property_value_sets[property_enum_val]);
56}
57
58std::vector<UnicodeSet> & EnumeratedPropertyObject::GetEnumerationBasisSets() {
59    // Return the previously computed vector of basis sets, if it exists.
60    if (LLVM_UNLIKELY(enumeration_basis_sets.empty())) {
61        // Otherwise compute and return.
62        // Basis set i is the set of all codepoints whose numerical enumeration code e
63        // has bit i set, i.e., (e >> i) & 1 == 1.
64        unsigned basis_count = 1;
65        while ((1 << basis_count) < independent_enum_count) basis_count++;
66        for (unsigned i = 0; i < basis_count; i++) {
67            enumeration_basis_sets.push_back(UnicodeSet());
68            for (unsigned e = 0; e < independent_enum_count; e++) {
69                if (((e >> i) & 1) == 0) {
70                    enumeration_basis_sets[i] = enumeration_basis_sets[i] + *property_value_sets[e];
71                }
72            }
73        }
74    }
75    return enumeration_basis_sets;
76};
77
78const std::string& EnumeratedPropertyObject::GetPropertyValueGrepString() {
79    if (!property_value_grep_string.size()) {
80        for (unsigned i = 0; i != property_value_full_names.size(); i++) {
81            property_value_grep_string += canonicalize_value_name(property_value_full_names[i]) + "\n";
82        }
83        for (unsigned i = 0; i != property_value_enum_names.size(); i++) {
84            property_value_grep_string += canonicalize_value_name(property_value_enum_names[i]) + "\n";
85        }
86    }
87    return property_value_grep_string;
88}
89
90int EnumeratedPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
91    // The canonical full names are not stored in the precomputed alias map,
92    // to save space in the executable.   Add them if the property is used.
93    if (uninitialized) {
94        for (unsigned i = 0; i != property_value_full_names.size(); i++) {
95            property_value_aliases.insert({canonicalize_value_name(property_value_full_names[i]), i});
96        }
97        for (unsigned i = 0; i != property_value_enum_names.size(); i++) {
98            property_value_aliases.insert({canonicalize_value_name(property_value_enum_names[i]), i});
99        }
100        uninitialized = false;
101    }
102    const auto valit = property_value_aliases.find(value_spec);
103    if (valit == property_value_aliases.end())
104        return -1;
105    return valit->second;
106}
107
108PropertyObject::iterator ExtensionPropertyObject::begin() const {
109    if (const auto * obj = dyn_cast<EnumeratedPropertyObject>(property_object_table[base_property])) {
110        return obj->begin();
111    }
112    throw std::runtime_error("Iterators unsupported for this type of PropertyObject.");
113}
114
115PropertyObject::iterator ExtensionPropertyObject::end() const {
116    if (const auto * obj = dyn_cast<EnumeratedPropertyObject>(property_object_table[base_property])) {
117        return obj->end();
118    }
119    throw std::runtime_error("Iterators unsupported for this type of PropertyObject.");
120}
121
122const UnicodeSet & ExtensionPropertyObject::GetCodepointSet(const std::string & value_spec) {
123    int property_enum_val = GetPropertyValueEnumCode(value_spec);
124    if (property_enum_val == -1) {
125        throw std::runtime_error("Extension Property " + UCD::property_full_name[the_property] +  ": unknown value: " + value_spec);
126    }
127    return GetCodepointSet(property_enum_val);
128}
129
130const UnicodeSet & ExtensionPropertyObject::GetCodepointSet(const int property_enum_val) const {
131    assert (property_enum_val >= 0);
132    return *(property_value_sets[property_enum_val]);
133}
134
135int ExtensionPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
136    return property_object_table[base_property]->GetPropertyValueEnumCode(value_spec);
137}
138
139const std::string& ExtensionPropertyObject::GetPropertyValueGrepString() {
140    return property_object_table[base_property]->GetPropertyValueGrepString();
141}
142
143const UnicodeSet & BinaryPropertyObject::GetCodepointSet(const std::string & value_spec) {
144    int property_enum_val = Binary_ns::Y;
145    if (value_spec.length() != 0) {
146        auto valit = Binary_ns::aliases_only_map.find(value_spec);
147        if (valit == Binary_ns::aliases_only_map.end()) {
148            throw std::runtime_error("Binary Property " + UCD::property_full_name[the_property] +  ": bad value: " + value_spec);
149        }
150        property_enum_val = valit->second;
151    }
152    return GetCodepointSet(property_enum_val);
153}
154
155const UnicodeSet & BinaryPropertyObject::GetCodepointSet(const int property_enum_val) {
156    if (property_enum_val == Binary_ns::Y) {
157        return mY;
158    }
159    if (mNoUninitialized) {
160        mN = ~mY;
161        mNoUninitialized = false;
162    }
163    return mN;
164}
165
166const std::string& BinaryPropertyObject::GetPropertyValueGrepString() {
167    if (!property_value_grep_string.size()) {
168        for (auto iter = Binary_ns::aliases_only_map.begin(), end = Binary_ns::aliases_only_map.end(); iter != end; ++iter) {
169            property_value_grep_string += iter->first + "\n";
170        }
171    }
172    return property_value_grep_string;
173}
174
175}
Note: See TracBrowser for help on using the repository browser.