source: icGREP/icgrep-devel/icgrep/cc/alphabet.cpp @ 5796

Last change on this file since 5796 was 5796, checked in by cameron, 22 months ago

Alphabet fixes

File size: 1.6 KB
RevLine 
[5279]1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7
8#include "alphabet.h"
[5795]9#include <llvm/Support/ErrorHandling.h>
[5279]10
[5795]11namespace cc {
12   
13UnicodeMappableAlphabet::UnicodeMappableAlphabet(std::string alphabetName,
14                                                 unsigned unicodeCommon,
15                                                 std::vector <UCD::codepoint_t> aboveCommon) :
16    Alphabet(alphabetName), mUnicodeCommon(unicodeCommon), mAboveCommon(aboveCommon) {}
[5279]17
[5795]18UCD::codepoint_t UnicodeMappableAlphabet::toUnicode(const unsigned n) const {
[5279]19    UCD::codepoint_t cp = n;
[5795]20    if (n < mUnicodeCommon) return cp;
21    assert(n < mUnicodeCommon + mAboveCommon.size());
22    return mAboveCommon[n - mUnicodeCommon];
23}
[5279]24 
[5795]25unsigned UnicodeMappableAlphabet::fromUnicode(const UCD::codepoint_t codepoint) const {
26    unsigned n = codepoint;
27    if (n < mUnicodeCommon) return n;
28    for (unsigned i = 0; i < mAboveCommon.size(); i++) {
29        if (mAboveCommon[i] == codepoint) return mUnicodeCommon + i;
30    }
31    llvm::report_fatal_error("fromUnicode: codepoint not found in alphabet.");
[5279]32}
33
[5795]34CodeUnitAlphabet::CodeUnitAlphabet(std::string alphabetName, uint8_t bits) :
35    Alphabet(alphabetName), mCodeUnitBits(bits) {}
[5279]36
[5796]37const UnicodeMappableAlphabet Unicode("Unicode", UCD::UNICODE_MAX, {});
38
39const UnicodeMappableAlphabet ASCII("ASCII", 0x7F, {});
40
41const UnicodeMappableAlphabet ISO_Latin1("ISO_Latin1", 0xFF, {});
42
43const CodeUnitAlphabet Byte("Byte", 8);
44
[5279]45}
Note: See TracBrowser for help on using the repository browser.