source: icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.cpp @ 5158

Last change on this file since 5158 was 5158, checked in by cameron, 3 years ago

Support for enumeration basis sets

File size: 5.3 KB
Line 
1/*
2 *  Copyright (c) 2014 International Characters, Inc.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters, Inc.
5 *
6 */
7
8#include "PropertyObjects.h"
9#include "PropertyObjectTable.h"
10#include <sstream>
11#include <algorithm>
12#include <assert.h>
13#include <llvm/Support/Casting.h>
14
15using namespace llvm;
16
17namespace UCD {
18
19std::string canonicalize_value_name(const std::string & prop_or_val) {
20    std::locale loc;
21    std::stringstream s;
22    for (char c : prop_or_val) {
23        if ((c != '_') && (c != ' ') && (c != '-')) {
24            s << std::tolower(c, loc);
25        }
26    }
27    return s.str();
28}
29
30int PropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
31    throw std::runtime_error("Property " + value_spec + " unsupported.");
32}
33
34UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const std::string &) {
35    throw std::runtime_error("Property " + UCD::property_full_name[the_property] + " unsupported.");
36}
37
38UnicodeSet UnsupportedPropertyObject::GetCodepointSet(const int) {
39    throw std::runtime_error("Property " + UCD::property_full_name[the_property] + " unsupported.");
40}
41
42const UnicodeSet & EnumeratedPropertyObject::GetCodepointSet(const std::string & value_spec) {
43    int property_enum_val = GetPropertyValueEnumCode(value_spec);
44    if (property_enum_val == -1) {
45        throw std::runtime_error("Enumerated Property " + UCD::property_full_name[the_property] +  ": unknown value: " + value_spec);
46    }
47    return GetCodepointSet(property_enum_val);
48}
49
50const UnicodeSet & EnumeratedPropertyObject::GetCodepointSet(const int property_enum_val) const {
51    assert (property_enum_val >= 0);
52    return *(property_value_sets[property_enum_val]);
53}
54
55int EnumeratedPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
56    // The canonical full names are not stored in the precomputed alias map,
57    // to save space in the executable.   Add them if the property is used.
58    if (uninitialized) {
59        for (unsigned i = 0; i != property_value_full_names.size(); i++) {
60            property_value_aliases.insert({canonicalize_value_name(property_value_full_names[i]), i});
61        }
62        for (unsigned i = 0; i != property_value_enum_names.size(); i++) {
63            property_value_aliases.insert({canonicalize_value_name(property_value_enum_names[i]), i});
64        }
65        uninitialized = false;
66    }
67    const auto valit = property_value_aliases.find(value_spec);
68    if (valit == property_value_aliases.end())
69        return -1;
70    return valit->second;
71}
72
73std::vector<UnicodeSet *> EnumeratedPropertyObject::GetEnumerationBasisSets() {
74    // Basis set i is the set of all codepoints whose numerical enumeration code e
75    // has bit i set, i.e., (e >> i) & 1 == 1.
76    unsigned basis_count = 1;
77    while ((1 << basis_count) < independent_enum_limit) basis_count++;
78    std::vector<UnicodeSet *> basis_set;
79    for (unsigned i == 0; i < basis_count; i++) {
80        basis_set.push_back(new UnicodeSet());
81        for (unsigned e = 0; e < independent_enum_limit; e++) {
82            if (((e >> i) & 1) == 0) {
83                basis_set[i] = basis_set[i] + property_value_sets[e];
84            }
85        }
86    }
87};
88
89   
90   
91PropertyObject::iterator ExtensionPropertyObject::begin() const {
92    if (const auto * obj = dyn_cast<EnumeratedPropertyObject>(property_object_table[base_property])) {
93        return obj->begin();
94    }
95    throw std::runtime_error("Iterators unsupported for this type of PropertyObject.");
96}
97
98PropertyObject::iterator ExtensionPropertyObject::end() const {
99    if (const auto * obj = dyn_cast<EnumeratedPropertyObject>(property_object_table[base_property])) {
100        return obj->end();
101    }
102    throw std::runtime_error("Iterators unsupported for this type of PropertyObject.");
103}
104
105const UnicodeSet & ExtensionPropertyObject::GetCodepointSet(const std::string & value_spec) {
106    int property_enum_val = GetPropertyValueEnumCode(value_spec);
107    if (property_enum_val == -1) {
108        throw std::runtime_error("Extension Property " + UCD::property_full_name[the_property] +  ": unknown value: " + value_spec);
109    }
110    return GetCodepointSet(property_enum_val);
111}
112
113const UnicodeSet & ExtensionPropertyObject::GetCodepointSet(const int property_enum_val) const {
114    assert (property_enum_val >= 0);
115    return *(property_value_sets[property_enum_val]);
116}
117
118int ExtensionPropertyObject::GetPropertyValueEnumCode(const std::string & value_spec) {
119    return property_object_table[base_property]->GetPropertyValueEnumCode(value_spec);
120}
121
122const UnicodeSet & BinaryPropertyObject::GetCodepointSet(const std::string & value_spec) {
123    int property_enum_val = Binary_ns::Y;
124    if (value_spec.length() != 0) {
125        auto valit = Binary_ns::aliases_only_map.find(value_spec);
126        if (valit == Binary_ns::aliases_only_map.end()) {
127            throw std::runtime_error("Binary Property " + UCD::property_full_name[the_property] +  ": bad value: " + value_spec);
128        }
129        property_enum_val = valit->second;
130    }
131    return GetCodepointSet(property_enum_val);
132}
133
134const UnicodeSet & BinaryPropertyObject::GetCodepointSet(const int property_enum_val) {
135    if (property_enum_val == Binary_ns::Y) {
136        return mY;
137    }
138    if (mNoUninitialized) {
139        mN = ~mY;
140        mNoUninitialized = false;
141    }
142    return mN;
143}
144
145}
Note: See TracBrowser for help on using the repository browser.