Ignore:
Timestamp:
Dec 21, 2017, 3:22:41 PM (13 months ago)
Author:
cameron
Message:

Adding Alphabet to CCs: initial check-in

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/cc/alphabet.cpp

    r5620 r5795  
    77
    88#include "alphabet.h"
     9#include <llvm/Support/ErrorHandling.h>
    910
     11namespace cc {
     12   
     13UnicodeMappableAlphabet::UnicodeMappableAlphabet(std::string alphabetName,
     14                                                 unsigned unicodeCommon,
     15                                                 std::vector <UCD::codepoint_t> aboveCommon) :
     16    Alphabet(alphabetName), mUnicodeCommon(unicodeCommon), mAboveCommon(aboveCommon) {}
    1017
    11 // Default implementation for simple Unicode subsets.  The codepoint value
    12 // of the nth character is just the given value n, if it is in range.
    13 
    14 UCD::codepoint_t Alphabet::toUnicode(const unsigned n) const {
     18UCD::codepoint_t UnicodeMappableAlphabet::toUnicode(const unsigned n) const {
    1519    UCD::codepoint_t cp = n;
    16     if (mCharSet.contains(cp)) return cp;
    17     throw std::runtime_error("toUnicode: n too large.");
    18 }   
     20    if (n < mUnicodeCommon) return cp;
     21    assert(n < mUnicodeCommon + mAboveCommon.size());
     22    return mAboveCommon[n - mUnicodeCommon];
     23}
    1924 
    20 // Default implementation for simple Unicode subsets.  The ord value
    21 // of a Unicode codepoint is just the given codepoint, if it is in range.
    22 
    23 unsigned Alphabet::fromUnicode(const UCD::codepoint_t codepoint) const {
    24     if (mCharSet.contains(codepoint)) return codepoint;
    25     throw std::runtime_error("fromUnicode: codepoint not found in alphabet.");
     25unsigned UnicodeMappableAlphabet::fromUnicode(const UCD::codepoint_t codepoint) const {
     26    unsigned n = codepoint;
     27    if (n < mUnicodeCommon) return n;
     28    for (unsigned i = 0; i < mAboveCommon.size(); i++) {
     29        if (mAboveCommon[i] == codepoint) return mUnicodeCommon + i;
     30    }
     31    llvm::report_fatal_error("fromUnicode: codepoint not found in alphabet.");
    2632}
    2733
     34CodeUnitAlphabet::CodeUnitAlphabet(std::string alphabetName, uint8_t bits) :
     35    Alphabet(alphabetName), mCodeUnitBits(bits) {}
    2836
    29 template <class uint_t> ExtendedASCII<uint_t>::ExtendedASCII(std::string alphabetName, const uint_t (& extendedTable)[128]) {
    30     mAlphabetName = alphabetName;
    31     mExtendedCharacterTable = extendedTable;
    32     mCharSet = UCD::UnicodeSet(0, 127);
    33     for (unsigned i = 0; i < 128; i++) {
    34         mCharSet.insert(extendedTable[i]);
    35     }
    36 }   
    37 
    38 template <class uint_t> UCD::codepoint_t ExtendedASCII<uint_t>::toUnicode(const unsigned n) const {
    39     //  The first 128 characters are just ASCII.
    40     if (n < 128) return n;
    41     if (n < 256) return mExtendedCharacterTable[n-128];
    42     throw std::runtime_error("toUnicode: n too large.");
    43 }   
    44 
    45 template <class uint_t> unsigned ExtendedASCII<uint_t>::fromUnicode(const UCD::codepoint_t codepoint) const {
    46     if (codepoint < 128) return codepoint;
    47     for (unsigned i = 0; i < 128; i++) {
    48         if (mExtendedCharacterTable[i] == codepoint) return i + 128;
    49     }
    50     throw std::runtime_error("fromUnicode: codepoint not found in alphabet.");
    5137}
    52 
Note: See TracChangeset for help on using the changeset viewer.