[5279] | 1 | /* |
---|

| 2 | * Copyright (c) 2017 International Characters. |
---|

| 3 | * This software is licensed to the public under the Open Software License 3.0. |
---|

| 4 | * icgrep is a trademark of International Characters. |
---|

| 5 | */ |
---|

| 6 | |
---|

| 7 | |
---|

| 8 | #include "alphabet.h" |
---|

| 9 | |
---|

| 10 | |
---|

| 11 | // Default implementation for simple Unicode subsets. The codepoint value |
---|

| 12 | // of the nth character is just the given value n, if it is in range. |
---|

| 13 | |
---|

[5620] | 14 | UCD::codepoint_t Alphabet::toUnicode(const unsigned n) const { |
---|

[5279] | 15 | UCD::codepoint_t cp = n; |
---|

| 16 | if (mCharSet.contains(cp)) return cp; |
---|

| 17 | throw std::runtime_error("toUnicode: n too large."); |
---|

| 18 | } |
---|

| 19 | |
---|

| 20 | // Default implementation for simple Unicode subsets. The ord value |
---|

| 21 | // of a Unicode codepoint is just the given codepoint, if it is in range. |
---|

| 22 | |
---|

[5620] | 23 | unsigned Alphabet::fromUnicode(const UCD::codepoint_t codepoint) const { |
---|

[5279] | 24 | if (mCharSet.contains(codepoint)) return codepoint; |
---|

| 25 | throw std::runtime_error("fromUnicode: codepoint not found in alphabet."); |
---|

| 26 | } |
---|

| 27 | |
---|

| 28 | |
---|

| 29 | template <class uint_t> ExtendedASCII<uint_t>::ExtendedASCII(std::string alphabetName, const uint_t (& extendedTable)[128]) { |
---|

| 30 | mAlphabetName = alphabetName; |
---|

| 31 | mExtendedCharacterTable = extendedTable; |
---|

| 32 | mCharSet = UCD::UnicodeSet(0, 127); |
---|

| 33 | for (unsigned i = 0; i < 128; i++) { |
---|

| 34 | mCharSet.insert(extendedTable[i]); |
---|

| 35 | } |
---|

| 36 | } |
---|

| 37 | |
---|

[5620] | 38 | template <class uint_t> UCD::codepoint_t ExtendedASCII<uint_t>::toUnicode(const unsigned n) const { |
---|

[5279] | 39 | // The first 128 characters are just ASCII. |
---|

| 40 | if (n < 128) return n; |
---|

| 41 | if (n < 256) return mExtendedCharacterTable[n-128]; |
---|

| 42 | throw std::runtime_error("toUnicode: n too large."); |
---|

| 43 | } |
---|

| 44 | |
---|

[5620] | 45 | template <class uint_t> unsigned ExtendedASCII<uint_t>::fromUnicode(const UCD::codepoint_t codepoint) const { |
---|

[5279] | 46 | if (codepoint < 128) return codepoint; |
---|

| 47 | for (unsigned i = 0; i < 128; i++) { |
---|

| 48 | if (mExtendedCharacterTable[i] == codepoint) return i + 128; |
---|

| 49 | } |
---|

| 50 | throw std::runtime_error("fromUnicode: codepoint not found in alphabet."); |
---|

| 51 | } |
---|

| 52 | |
---|