source: icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp @ 5236

Last change on this file since 5236 was 5236, checked in by nmedfort, 3 years ago

Reverted last change to support gcc 4.9 prior to changing UCD property table structure.

File size: 6.8 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters, Inc.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters, Inc.
5 *
6 */
7
8#include "PropertyObjects.h"
9#include "PropertyObjectTable.h"
10#include <llvm/Support/Casting.h>
11#include <algorithm>
12#include <assert.h>
13#include <sstream>
14
15using namespace llvm;
16
17namespace UCD {
18
19std::string canonicalize_value_name(const std::string & prop_or_val) {
20    std::locale loc;
21    std::stringstream s;
22
23    for (char c : prop_or_val) {
24        if ((c != '_') && (c != ' ') && (c != '-')) {
25            s << std::tolower(c, loc);
26        }
27    }
28    return s.str();
29}
30
31int PropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
32    throw std::runtime_error("Property " + value_spec + " unsupported.");
33}
34const std::string & PropertyObject::GetPropertyValueGrepString() {
35    throw std::runtime_error("Property Value Grep String unsupported.");
36}
37
38UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const std::string &) {
39    throw std::runtime_error("Property " + UCD::property_full_name[the_property] + " unsupported.");
40}
41
42UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const int) {
43    throw std::runtime_error("Property " + UCD::property_full_name[the_property] + " unsupported.");
44}
45
46const UnicodeSet & EnumeratedPropertyObject::GetCodepointSet(const std::string & value_spec) {
47    const int property_enum_val = GetPropertyValueEnumCode(value_spec);
48    if (property_enum_val < 0) {
49        throw std::runtime_error("Enumerated Property " + UCD::property_full_name[the_property] + ": unknown value: " + value_spec);
50    }
51    return GetCodepointSet(property_enum_val);
52}
53
54const UnicodeSet & EnumeratedPropertyObject::GetCodepointSet(const int property_enum_val) const {
55    assert (property_enum_val >= 0);
56    return *(property_value_sets[property_enum_val]);
57}
58
59std::vector<UnicodeSet> & EnumeratedPropertyObject::GetEnumerationBasisSets() {
60    // Return the previously computed vector of basis sets, if it exists.
61    if (LLVM_UNLIKELY(enumeration_basis_sets.empty())) {
62        // Otherwise compute and return.
63        // Basis set i is the set of all codepoints whose numerical enumeration code e
64        // has bit i set, i.e., (e >> i) & 1 == 1.
65        unsigned basis_count = 1;
66        while ((1UL << basis_count) < independent_enum_count) {
67            basis_count++;
68        }
69        for (unsigned i = 0; i < basis_count; i++) {
70            enumeration_basis_sets.push_back(UnicodeSet());
71            for (unsigned e = 0; e < independent_enum_count; e++) {
72                if (((e >> i) & 1UL) == 0) {
73                    enumeration_basis_sets[i] = enumeration_basis_sets[i] + *property_value_sets[e];
74                }
75            }
76        }
77    }
78    return enumeration_basis_sets;
79}
80
81const std::string & EnumeratedPropertyObject::GetPropertyValueGrepString() {
82    if (LLVM_LIKELY(mPropertyValueGrepString.empty())) {
83        std::stringstream buffer;
84        for (unsigned i = 0; i != property_value_full_names.size(); i++) {
85            buffer << canonicalize_value_name(property_value_full_names[i]) + "\n";
86        }
87        for (unsigned i = 0; i != property_value_enum_names.size(); i++) {
88            buffer << canonicalize_value_name(property_value_enum_names[i]) + "\n";
89        }
90        mPropertyValueGrepString = buffer.str();
91    }
92    return mPropertyValueGrepString;
93}
94
95int EnumeratedPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
96    // The canonical full names are not stored in the precomputed alias map,
97    // to save space in the executable.   Add them if the property is used.
98    if (uninitialized) {
99        for (unsigned i = 0; i != property_value_full_names.size(); i++) {
100            property_value_aliases.insert({canonicalize_value_name(property_value_full_names[i]), i});
101        }
102        for (unsigned i = 0; i != property_value_enum_names.size(); i++) {
103            property_value_aliases.insert({canonicalize_value_name(property_value_enum_names[i]), i});
104        }
105        uninitialized = false;
106    }
107    const auto valit = property_value_aliases.find(value_spec);
108    if (valit == property_value_aliases.end())
109        return -1;
110    return valit->second;
111}
112
113PropertyObject::iterator ExtensionPropertyObject::begin() const {
114    if (const auto * obj = dyn_cast<EnumeratedPropertyObject>(property_object_table[base_property])) {
115        return obj->begin();
116    }
117    throw std::runtime_error("Iterators unsupported for this type of PropertyObject.");
118}
119
120PropertyObject::iterator ExtensionPropertyObject::end() const {
121    if (const auto * obj = dyn_cast<EnumeratedPropertyObject>(property_object_table[base_property])) {
122        return obj->end();
123    }
124    throw std::runtime_error("Iterators unsupported for this type of PropertyObject.");
125}
126
127const UnicodeSet & ExtensionPropertyObject::GetCodepointSet(const std::string & value_spec) {
128    int property_enum_val = GetPropertyValueEnumCode(value_spec);
129    if (property_enum_val == -1) {
130        throw std::runtime_error("Extension Property " + UCD::property_full_name[the_property] +  ": unknown value: " + value_spec);
131    }
132    return GetCodepointSet(property_enum_val);
133}
134
135const UnicodeSet & ExtensionPropertyObject::GetCodepointSet(const int property_enum_val) const {
136    assert (property_enum_val >= 0);
137    return *(property_value_sets[property_enum_val]);
138}
139
140int ExtensionPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
141    return property_object_table[base_property]->GetPropertyValueEnumCode(value_spec);
142}
143
144const std::string & ExtensionPropertyObject::GetPropertyValueGrepString() {
145    return property_object_table[base_property]->GetPropertyValueGrepString();
146}
147
148const UnicodeSet & BinaryPropertyObject::GetCodepointSet(const std::string & value_spec) {
149    int property_enum_val = Binary_ns::Y;
150    if (value_spec.length() != 0) {
151        auto valit = Binary_ns::aliases_only_map.find(value_spec);
152        if (valit == Binary_ns::aliases_only_map.end()) {
153            throw std::runtime_error("Binary Property " + UCD::property_full_name[the_property] +  ": bad value: " + value_spec);
154        }
155        property_enum_val = valit->second;
156    }
157    return GetCodepointSet(property_enum_val);
158}
159
160const UnicodeSet & BinaryPropertyObject::GetCodepointSet(const int property_enum_val) {
161    if (property_enum_val == Binary_ns::Y) {
162        return mY;
163    }
164    if (mNoUninitialized) {
165        mN = ~mY;
166        mNoUninitialized = false;
167    }
168    return mN;
169}
170
171const std::string & BinaryPropertyObject::GetPropertyValueGrepString() {
172    if (mPropertyValueGrepString.empty()) {
173        std::stringstream buffer;
174        for (const auto & prop : Binary_ns::aliases_only_map) {
175            buffer << std::get<0>(prop) + "\n";
176        }
177        mPropertyValueGrepString = buffer.str();
178    }
179    return mPropertyValueGrepString;
180}
181
182}
Note: See TracBrowser for help on using the repository browser.