source: icGREP/icgrep-devel/icgrep/cc/alphabet.cpp

Last change on this file was 6297, checked in by cameron, 8 months ago

Merge branch 'master' of https://cs-git-research.cs.surrey.sfu.ca/cameron/parabix-devel

File size: 1.8 KB
Line 
1/*
2 *  Copyright (c) 2017 International Characters.
3 *  This software is licensed to the public under the Open Software License 3.0.
4 *  icgrep is a trademark of International Characters.
5 */
6
7
8#include "alphabet.h"
9#include <llvm/Support/ErrorHandling.h>
10
11namespace cc {
12   
13UnicodeMappableAlphabet::UnicodeMappableAlphabet(const std::string alphabetName,
14                                                 unsigned unicodeCommon,
15                                                 std::vector <UCD::codepoint_t> aboveCommon)
16: Alphabet(std::move(alphabetName), ClassTypeId::UnicodeMappableAlphabet),
17mUnicodeCommon(unicodeCommon),
18mAboveCommon(std::move(aboveCommon)) {
19
20}
21
22UCD::codepoint_t UnicodeMappableAlphabet::toUnicode(const unsigned n) const {
23    UCD::codepoint_t cp = n;
24    if (n < mUnicodeCommon) return cp;
25    assert(n < mUnicodeCommon + mAboveCommon.size());
26    return mAboveCommon[n - mUnicodeCommon];
27}
28 
29unsigned UnicodeMappableAlphabet::fromUnicode(const UCD::codepoint_t codepoint) const {
30    unsigned n = codepoint;
31    if (n < mUnicodeCommon) return n;
32    for (unsigned i = 0; i < mAboveCommon.size(); i++) {
33        if (mAboveCommon[i] == codepoint) return mUnicodeCommon + i;
34    }
35    llvm::report_fatal_error("fromUnicode: codepoint not found in alphabet.");
36}
37
38CodeUnitAlphabet::CodeUnitAlphabet(const std::string alphabetName, uint8_t bits) :
39Alphabet(std::move(alphabetName), ClassTypeId::CodeUnitAlphabet)
40, mCodeUnitBits(bits) {
41
42}
43
44const UnicodeMappableAlphabet Unicode("Unicode", UCD::UNICODE_MAX, {});
45
46const UnicodeMappableAlphabet ASCII("ASCII", 0x7F, {});
47
48const UnicodeMappableAlphabet ISO_Latin1("ISO_Latin1", 0xFF, {});
49
50const CodeUnitAlphabet Byte("Byte", 8);
51   
52const CodeUnitAlphabet UTF8("UTF8", 8);
53
54const CodeUnitAlphabet UTF16("UTF16", 16);
55   
56}
Note: See TracBrowser for help on using the repository browser.