Ignore:
Timestamp:
Jun 23, 2015, 9:29:39 AM (4 years ago)
Author:
nmedfort
Message:

Replaced CharSetItem? with a std::pair.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/utf8_encoder.cpp

    r4612 r4614  
    2727            else if (cc->size() > 1) {
    2828                std::vector<RE *> alt;
    29                 for (const CharSetItem & item : *cc) {
    30                     alt.push_back(rangeToUTF8(item));
     29                for (const interval_t & i : *cc) {
     30                    alt.push_back(rangeToUTF8(i));
    3131                }
    3232                name->setDefinition(makeAlt(alt.begin(), alt.end()));
     
    3838}
    3939
    40 RE * UTF8_Encoder::rangeToUTF8(const CharSetItem & item) {
    41     const auto min = lenUTF8(item.lo_codepoint);
    42     const auto max = lenUTF8(item.hi_codepoint);
     40RE * UTF8_Encoder::rangeToUTF8(const interval_t & item) {
     41    const auto min = lenUTF8(lo_codepoint(item));
     42    const auto max = lenUTF8(hi_codepoint(item));
    4343    if (min < max) {
    4444        const auto m = maxCodePoint(min);
    45         return makeAlt({rangeToUTF8(CharSetItem(item.lo_codepoint, m)), rangeToUTF8(CharSetItem(m + 1, item.hi_codepoint))});
     45        return makeAlt({rangeToUTF8(interval_t(lo_codepoint(item), m)), rangeToUTF8(interval_t(m + 1, hi_codepoint(item)))});
    4646    }
    4747    else {
    48         return rangeToUTF8(item.lo_codepoint, item.hi_codepoint, 1, max);
     48        return rangeToUTF8(lo_codepoint(item), hi_codepoint(item), 1, max);
    4949    }
    5050}
     
    7676}
    7777
    78 inline bool UTF8_Encoder::isUTF8Prefix(const unsigned cp) {
     78inline bool UTF8_Encoder::isUTF8Prefix(const codepoint_t cp) {
    7979    return (cp >= 0xC2) && (cp <= 0xF4);
    8080}
    8181
    82 inline codepoint_t UTF8_Encoder::u8byte(const codepoint_t codepoint, const unsigned n)
    83 {
     82inline codepoint_t UTF8_Encoder::u8byte(const codepoint_t cp, const unsigned n) {
    8483    codepoint_t retVal = 0;
    85 
    86     const unsigned len = lenUTF8(codepoint);
    87 
     84    const unsigned len = lenUTF8(cp);
    8885    if (n == 1) {
    8986        switch (len) {
    90             case 1: retVal = codepoint; break;
    91             case 2: retVal = 0xC0 | (codepoint >> 6); break;
    92             case 3: retVal = 0xE0 | (codepoint >> 12); break;
    93             case 4: retVal = 0xF0 | (codepoint >> 18); break;
     87            case 1: retVal = cp; break;
     88            case 2: retVal = 0xC0 | (cp >> 6); break;
     89            case 3: retVal = 0xE0 | (cp >> 12); break;
     90            case 4: retVal = 0xF0 | (cp >> 18); break;
    9491        }
    9592    }
    9693    else {
    97         retVal = 0x80 | ((codepoint >> (6 * (len - n))) & 0x3F);
     94        retVal = 0x80 | ((cp >> (6 * (len - n))) & 0x3F);
    9895    }
    99 
    10096    return retVal;
    10197}
    10298
    103 inline unsigned UTF8_Encoder::lenUTF8(const unsigned cp) {
     99inline unsigned UTF8_Encoder::lenUTF8(const codepoint_t cp) {
    104100    if (cp <= 0x7F) {
    105101        return 1;
     
    116112}
    117113
    118 inline unsigned UTF8_Encoder::maxCodePoint(const unsigned length) {
     114inline codepoint_t UTF8_Encoder::maxCodePoint(const unsigned length) {
    119115    if (length == 1) {
    120116        return 0x7F;
     
    132128}
    133129
     130inline bool UTF8_Encoder::isLowCodePointAfterByte(const codepoint_t cp, const unsigned index) {
     131    const auto l = lenUTF8(cp);
     132    for (auto i = index; i != l; ++i) {
     133        if (u8byte(cp, i + 1) != 0x80) {
     134            return false;
     135        }
     136    }
     137    return true;
     138}
     139
     140inline bool UTF8_Encoder::isHighCodePointAfterByte(const codepoint_t cp, const unsigned index) {
     141    const auto l = lenUTF8(cp);
     142    for (auto i = index; i != l; ++i) {
     143        if (u8byte(cp, i + 1) != 0xBF) {
     144            return false;
     145        }
     146    }
     147    return true;
     148}
     149
     150
    134151inline CC * UTF8_Encoder::makeByteRange(const codepoint_t lo, const codepoint_t hi) {
    135152    return makeCC(lo, hi);
Note: See TracChangeset for help on using the changeset viewer.