source: icGREP/icgrep-devel/icgrep/cc/alphabet.cpp @ 5630

Last change on this file since 5630 was 5620, checked in by nmedfort, 22 months ago

Bug fixes for multigrep mode. Optional PabloKernel? branch hit counter added. Minor optimizations.

File size: 1.9 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7
8#include "alphabet.h"
9
10
11// Default implementation for simple Unicode subsets.  The codepoint value
12// of the nth character is just the given value n, if it is in range.
13
14UCD::codepoint_t Alphabet::toUnicode(const unsigned n) const {
15    UCD::codepoint_t cp = n;
16    if (mCharSet.contains(cp)) return cp;
17    throw std::runtime_error("toUnicode: n too large.");
18}   
19 
20// Default implementation for simple Unicode subsets.  The ord value
21// of a Unicode codepoint is just the given codepoint, if it is in range.
22
23unsigned Alphabet::fromUnicode(const UCD::codepoint_t codepoint) const {
24    if (mCharSet.contains(codepoint)) return codepoint;
25    throw std::runtime_error("fromUnicode: codepoint not found in alphabet.");
26}
27
28
29template <class uint_t> ExtendedASCII<uint_t>::ExtendedASCII(std::string alphabetName, const uint_t (& extendedTable)[128]) {
30    mAlphabetName = alphabetName;
31    mExtendedCharacterTable = extendedTable;
32    mCharSet = UCD::UnicodeSet(0, 127);
33    for (unsigned i = 0; i < 128; i++) {
34        mCharSet.insert(extendedTable[i]);
35    }
36}   
37
38template <class uint_t> UCD::codepoint_t ExtendedASCII<uint_t>::toUnicode(const unsigned n) const {
39    //  The first 128 characters are just ASCII.
40    if (n < 128) return n;
41    if (n < 256) return mExtendedCharacterTable[n-128];
42    throw std::runtime_error("toUnicode: n too large.");
43}   
44
45template <class uint_t> unsigned ExtendedASCII<uint_t>::fromUnicode(const UCD::codepoint_t codepoint) const {
46    if (codepoint < 128) return codepoint;
47    for (unsigned i = 0; i < 128; i++) {
48        if (mExtendedCharacterTable[i] == codepoint) return i + 128;
49    }
50    throw std::runtime_error("fromUnicode: codepoint not found in alphabet.");
51}
52
Note: See TracBrowser for help on using the repository browser.