Changeset 4614


Ignore:
Timestamp:
Jun 23, 2015, 9:29:39 AM (4 years ago)
Author:
nmedfort
Message:

Replaced CharSetItem? with a std::pair.

Location:
icGREP/icgrep-devel/icgrep
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/cc/cc_compiler.cpp

    r4612 r4614  
    9393    if (cc->size() > 2) {
    9494        bool combine = true;
    95         for (const CharSetItem & item : *cc) {
    96             if (item.lo_codepoint != item.hi_codepoint) {
     95        for (const interval_t & i : *cc) {
     96            if (lo_codepoint(i) != hi_codepoint(i)) {
    9797                combine = false;
    9898                break;
     
    102102            auto i = cc->cbegin();
    103103            for (auto j = i; ++j != cc->cend(); i = j) {
    104                 const CharSetItem & curr_item = *i;
    105                 const CharSetItem & next_item = *j;
    106                 if ((curr_item.lo_codepoint + 2) != next_item.lo_codepoint) {
     104                if ((lo_codepoint(i) + 2) != lo_codepoint(j)) {
    107105                    combine  = false;
    108106                    break;
     
    110108            }
    111109            if (combine) {
    112                 codepoint_t lo = cc->front().lo_codepoint;
    113                 codepoint_t hi = cc->back().lo_codepoint;
     110                codepoint_t lo = lo_codepoint(cc->front());
     111                codepoint_t hi = lo_codepoint(cc->back());
    114112                const codepoint_t mask = mEncoding.getMask();
    115113                lo &= (mask - 1);
     
    125123    }
    126124    PabloAST * expr = nullptr;
    127     for (const CharSetItem & item : *cc) {
    128         PabloAST * temp = char_or_range_expr(item.lo_codepoint, item.hi_codepoint, pb);
     125    for (const interval_t & i : *cc) {
     126        PabloAST * temp = char_or_range_expr(lo_codepoint(i), hi_codepoint(i), pb);
    129127        expr = (expr == nullptr) ? temp : pb.createOr(expr, temp);
    130128    }
     
    140138
    141139    std::vector<PabloAST*> bit_terms;
    142     unsigned i = 0;
    143 
    144     while (selected_bits)
    145     {
    146         unsigned test_bit = 1 << i;
    147         if (selected_bits & test_bit)
    148         {
    149             if ((pattern & test_bit) == 0)
    150             {
     140    for (unsigned i = 0; selected_bits; ++i) {
     141        unsigned test_bit = static_cast<unsigned>(1) << i;
     142        if ((selected_bits & test_bit) != 0) {
     143            if ((pattern & test_bit) == 0) {
    151144                bit_terms.push_back(pb.createNot(getBasisVar(i)));
    152145            }
    153             else
    154             {
     146            else {
    155147                bit_terms.push_back(getBasisVar(i));
    156148            }
    157149        }
    158         else
    159         {
     150        else {
    160151            bit_terms.push_back(pb.createOnes());
    161152        }
    162153        selected_bits &= ~test_bit;
    163         i++;
    164154    }
    165155
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4613 r4614  
    321321../ucd/ucdcompiler.cpp
    322322../ucd/ucdcompiler.hpp
    323 ../ucd/utf8-util.hpp
  • icGREP/icgrep-devel/icgrep/re/printer_re.cpp

    r4405 r4614  
    4949        retVal += "\" ";
    5050
    51         for (const CharSetItem & item : *re_cc)
    52         {
     51        for (const auto & i : *re_cc) {
    5352            retVal += "[";
    54             retVal += std::to_string(item.lo_codepoint) + ",";
    55             retVal += std::to_string(item.hi_codepoint);
     53            retVal += std::to_string(lo_codepoint(i)) + ",";
     54            retVal += std::to_string(hi_codepoint(i));
    5655            retVal += "]";
    5756        }
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4613 r4614  
    99#include <UCD/CaseFolding_txt.h>
    1010#include <sstream>
    11 #include <iostream>
    1211
    1312namespace re {
    14 CC::CharSetAllocator CC::mCharSetAllocator;
     13CC::IntervalAllocator CC::mCharSetAllocator;
    1514
    1615CC::CC(const CC * cc1, const CC * cc2)
    1716: RE(ClassTypeId::CC)
    1817, mSparseCharSet(cc1->cbegin(), cc1->cend(), mCharSetAllocator) {
    19     for (const CharSetItem & i : cc2->mSparseCharSet) {
    20         insert_range(i.lo_codepoint, i.hi_codepoint);
     18    for (const interval_t & i : cc2->mSparseCharSet) {
     19        insert_range(lo_codepoint(i), hi_codepoint(i));
    2120    }
    2221}
     
    3029std::string CC::canonicalName(const CC_type type) const {
    3130    std::stringstream name;
    32     // name << std::hex;
     31    name << std::hex;
    3332    if ((type == ByteClass) && (max_codepoint() >= 0x80)) {
    3433        name << "BC";
     
    3837    }
    3938    char separator = '_';
    40     for (const CharSetItem & i : mSparseCharSet) {
     39    for (const interval_t & i : mSparseCharSet) {
    4140        name << separator;
    42         if (i.lo_codepoint == i.hi_codepoint) {
    43             name << i.lo_codepoint;
    44         }
    45         else {
    46             name << i.lo_codepoint << '_' << i.hi_codepoint;
     41        if (lo_codepoint(i) == hi_codepoint(i)) {
     42            name << lo_codepoint(i);
     43        }
     44        else {
     45            name << lo_codepoint(i) << '_' << hi_codepoint(i);
    4746        }
    4847        separator = ',';
     
    5150}
    5251
    53 void CC::insert_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint) {
     52void CC::insert_range(const codepoint_t lo, const codepoint_t hi) {
    5453    for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
    55         CharSetItem & range = *i;
    56         if (hi_codepoint < range.lo_codepoint - 1) {
    57             mSparseCharSet.emplace(i, lo_codepoint, hi_codepoint);
     54        if (hi < lo_codepoint(i) - 1) {
     55            mSparseCharSet.emplace(i, lo, hi);
    5856            return;
    5957        }
    60         else if (lo_codepoint > range.hi_codepoint + 1) {
     58        else if (lo > hi_codepoint(i) + 1) {
    6159            ++i;
    6260        }
     
    6462            // ranges overlap; expand the range to include the prior one and
    6563            // remove the old one from the list
    66             range.lo_codepoint = std::min(range.lo_codepoint, lo_codepoint);
    67             range.hi_codepoint = std::max(range.hi_codepoint, hi_codepoint);
     64            lo_codepoint(i) = std::min(lo_codepoint(i), lo);
     65            hi_codepoint(i) = std::max(hi_codepoint(i), hi);
    6866            return;
    6967        }
    7068    }
    71     mSparseCharSet.emplace_back(lo_codepoint, hi_codepoint);
    72 }
    73 
    74 void CC::remove_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint) {
     69    mSparseCharSet.emplace_back(lo, hi);
     70}
     71
     72void CC::remove_range(const codepoint_t lo, const codepoint_t hi) {
    7573    for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
    76         CharSetItem & range = *i;
    77         if (lo_codepoint > range.hi_codepoint + 1) {
    78             ++i;
    79         }
    80         else if (hi_codepoint < range.lo_codepoint - 1) {
     74        if (lo > hi_codepoint(i) + 1) {
     75            ++i;
     76        }
     77        else if (hi < lo_codepoint(i) - 1) {
    8178            break;
    8279        }
    83         else if (lo_codepoint <= range.lo_codepoint && hi_codepoint >= range.hi_codepoint) {
     80        else if (lo <= lo_codepoint(i) && hi >= hi_codepoint(i)) {
    8481            i = mSparseCharSet.erase(i);
    8582        }
    86         else if (lo_codepoint <= range.lo_codepoint) {
    87             range.lo_codepoint = hi_codepoint + 1;
     83        else if (lo <= lo_codepoint(i)) {
     84            lo_codepoint(i) = hi + 1;
    8885            break;
    8986        }
    90         else if (hi_codepoint >= range.hi_codepoint) {
    91             range.hi_codepoint = lo_codepoint - 1;
    92             ++i;
    93         }
    94         else {
    95             CharSetItem item(hi_codepoint + 1, range.hi_codepoint);
    96             range.hi_codepoint = lo_codepoint - 1;
    97             mSparseCharSet.insert(++i, std::move(item));
     87        else if (hi >= hi_codepoint(i)) {
     88            hi_codepoint(i) = lo - 1;
     89            ++i;
     90        }
     91        else {         
     92            mSparseCharSet.emplace(++i, hi + 1, hi_codepoint(i));
     93            hi_codepoint(i) = lo - 1;
    9894            break;
    9995        }
     
    10197}
    10298
    103 CC * subtractCC(const CC * cc1, const CC * cc2) {
     99CC * subtractCC(const CC * a, const CC * b) {
    104100    CC * diff = makeCC();
    105     auto ai = cc1->cbegin();
    106     const auto ai_end = cc1->cend();
    107     auto bi = cc2->cbegin();
    108     const auto bi_end = cc2->cend();
    109     while (ai != ai_end && bi != bi_end) {
    110         const CharSetItem & ra = *ai;
    111         const CharSetItem & rb = *bi;
    112         if (rb.hi_codepoint < ra.lo_codepoint) {
    113             ++bi;
     101    auto i = a->cbegin();
     102    const auto i_end = a->cend();
     103    auto j = b->cbegin();
     104    const auto j_end = b->cend();
     105    while (i != i_end && j != j_end) {
     106        if (hi_codepoint(j) < lo_codepoint(i)) {
     107            ++j;
    114108        }
    115109        else { // test whether the intervals overlap
    116             if (ra.lo_codepoint < rb.lo_codepoint) {
    117                 diff->insert_range(ra.lo_codepoint, std::min(rb.lo_codepoint - 1, ra.hi_codepoint));
    118             }
    119             if (ra.hi_codepoint > rb.hi_codepoint) {
    120                 diff->insert_range(std::max(rb.hi_codepoint + 1, ra.lo_codepoint), ra.hi_codepoint);
    121             }
    122             ++ai;
    123         }
    124     }
    125     for (; ai != ai_end; ++ai) {
    126         const CharSetItem & ra = *ai;
    127         diff->insert_range(ra.lo_codepoint, ra.hi_codepoint);
     110            if (lo_codepoint(i) < lo_codepoint(j)) {
     111                diff->insert_range(lo_codepoint(i), std::min(lo_codepoint(j) - 1, hi_codepoint(i)));
     112            }
     113            if (hi_codepoint(i) > hi_codepoint(j)) {
     114                diff->insert_range(std::max(hi_codepoint(j) + 1, lo_codepoint(i)), hi_codepoint(i));
     115            }
     116            ++i;
     117        }
     118    }
     119    for (; i != i_end; ++i) {
     120        diff->insert_range(lo_codepoint(i), hi_codepoint(i));
    128121    }
    129122    return diff;
     
    137130    const auto bi_end = b->cend();
    138131    while (ai != ai_end && bi != bi_end) {
    139         const CharSetItem & ra = *ai;
    140         const CharSetItem & rb = *bi;
    141         if (ra.hi_codepoint < rb.lo_codepoint) {
     132        if (hi_codepoint(ai) < lo_codepoint(bi)) {
    142133            ++ai;
    143134        }
    144         else if (rb.hi_codepoint < ra.lo_codepoint) {
     135        else if (hi_codepoint(bi) < lo_codepoint(ai)) {
    145136            ++bi;
    146137        }
    147138        else {
    148             isect->insert_range(std::max(ra.lo_codepoint, rb.lo_codepoint), std::min(ra.hi_codepoint, rb.hi_codepoint));
    149             if (ra.hi_codepoint < rb.hi_codepoint) {
     139            isect->insert_range(std::max(lo_codepoint(ai), lo_codepoint(bi)), std::min(hi_codepoint(ai), hi_codepoint(bi)));
     140            if (hi_codepoint(ai) < hi_codepoint(bi)) {
    150141                ++ai;
    151142            }
     
    160151CC * caseInsensitize(const CC * cc) {
    161152    CC * cci = makeCC();
    162     for (const CharSetItem & i : *cc) {
    163         caseInsensitiveInsertRange(cci, i.lo_codepoint, i.hi_codepoint);
     153    for (const interval_t & i : *cc) {
     154        caseInsensitiveInsertRange(cci, lo_codepoint(i), hi_codepoint(i));
    164155    }
    165156    return cci;
     
    175166    assert ("cc cannot be null" && cc);
    176167    CC * intersect = makeCC();
    177     for (const auto & p : *cc) {
    178         if ((p.lo_codepoint <= hi) && (p.hi_codepoint >= lo)) {
    179             intersect->insert_range(std::max(lo, p.lo_codepoint), std::min(hi, p.hi_codepoint));
     168    for (const auto & i : *cc) {
     169        if ((lo_codepoint(i) <= hi) && (hi_codepoint(i) >= lo)) {
     170            intersect->insert_range(std::max(lo, lo_codepoint(i)), std::min(hi, hi_codepoint(i)));
    180171        }
    181172    }
     
    196187        auto i = cc->cbegin(), end = cc->cend();
    197188        for (; i != end && cp < hi; ++i) {
    198             if (i->hi_codepoint < cp) {
     189            if (hi_codepoint(i) < cp) {
    199190                continue;
    200191            }
    201             else if (i->lo_codepoint > cp) {
    202                 gaps->insert_range(cp, i->lo_codepoint - 1);
    203             }
    204             cp = i->hi_codepoint + 1;
     192            else if (lo_codepoint(i) > cp) {
     193                gaps->insert_range(cp, lo_codepoint(i) - 1);
     194            }
     195            cp = hi_codepoint(i) + 1;
    205196        }
    206197        if (cp < hi) {
     
    221212    const auto end = cc->cend();
    222213    for (auto j = i; ++j != end; ) {
    223         if (j->hi_codepoint > i->hi_codepoint) {
    224             ranges->insert_range(i->lo_codepoint, i->hi_codepoint);
     214        if (hi_codepoint(j) > hi_codepoint(i)) {
     215            ranges->insert_range(lo_codepoint(i), hi_codepoint(i));
    225216            i = j;
    226217        }
     
    239230    const auto end = cc->cend();
    240231    for (auto j = i; ++j != end; ) {
    241         if (j->hi_codepoint <= i->hi_codepoint) {
    242             ranges->insert_range(j->lo_codepoint, j->hi_codepoint);
     232        if (hi_codepoint(j) <= hi_codepoint(i)) {
     233            ranges->insert_range(lo_codepoint(j), hi_codepoint(j));
    243234        }
    244235        else {
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r4612 r4614  
    1616namespace re {
    1717
    18 typedef unsigned codepoint_t;
    19 
    20 struct CharSetItem {
    21     constexpr CharSetItem() : lo_codepoint(0), hi_codepoint(0) {}
    22     constexpr CharSetItem(const codepoint_t lo, const codepoint_t hi) : lo_codepoint(lo), hi_codepoint(hi) {}
    23     constexpr codepoint_t operator [](const unsigned i) const {
    24         return (i == 0) ? lo_codepoint : (i == 1) ? hi_codepoint : throw std::runtime_error("CharSetItem[] can only accept 0 or 1.");
    25     }
    26     codepoint_t lo_codepoint;
    27     codepoint_t hi_codepoint;
    28 };
     18using codepoint_t = unsigned;
     19using interval_t = std::pair<codepoint_t, codepoint_t>;
    2920
    3021enum CC_type {UnicodeClass, ByteClass};
     
    4031    }
    4132
    42     using CharSetAllocator = SlabAllocator<CharSetItem>;
    43     using CharSetVector = std::vector<CharSetItem, CharSetAllocator>;
    44 
    45     typedef CharSetVector::iterator                 iterator;
    46     typedef CharSetVector::const_iterator           const_iterator;
    47     typedef CharSetVector::size_type                size_type;
    48     typedef CharSetVector::reference                reference;
    49     typedef CharSetVector::const_reference          const_reference;
     33    using IntervalAllocator = SlabAllocator<interval_t>;
     34    using IntervalVector = std::vector<interval_t, IntervalAllocator>;
     35
     36    using iterator = IntervalVector::iterator;
     37    using const_iterator = IntervalVector::const_iterator;
     38    using size_type = IntervalVector::size_type;
     39    using reference = IntervalVector::reference;
     40    using const_reference = IntervalVector::const_reference;
    5041
    5142    static const codepoint_t UNICODE_MAX = 0x10FFFF;
     
    5344    std::string canonicalName(const CC_type type) const;
    5445
    55     CharSetItem & operator [](unsigned i) {
     46    interval_t & operator [](unsigned i) {
    5647        return mSparseCharSet[i];
    5748    }
    5849
    59     const CharSetItem & operator [](unsigned i) const {
     50    const interval_t & operator [](unsigned i) const {
    6051        return mSparseCharSet[i];
    6152    }
    6253
    6354    inline codepoint_t min_codepoint() const {
    64         return mSparseCharSet.size() == 0 ? 0 : mSparseCharSet.front().lo_codepoint;
     55        return empty() ? 0 : std::get<0>(front());
    6556    }
    6657
    6758    inline codepoint_t max_codepoint() const {
    68         return mSparseCharSet.size() == 0 ? 0 : mSparseCharSet.back().hi_codepoint;
    69     }
    70 
    71     void insert_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint);
    72 
    73     void remove_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint);
     59        return empty() ? 0 : std::get<1>(back());
     60    }
     61
     62    void insert_range(const codepoint_t lo, const codepoint_t hi);
     63
     64    void remove_range(const codepoint_t lo, const codepoint_t hi);
    7465
    7566    inline void insert(const codepoint_t codepoint) {
     
    154145    }
    155146    CC(const CC * cc1, const CC * cc2);
     147
    156148private:   
    157     CharSetVector mSparseCharSet;
    158     static CharSetAllocator mCharSetAllocator;
     149    IntervalVector mSparseCharSet;
     150    static IntervalAllocator mCharSetAllocator;
    159151};
    160152
     
    175167}
    176168
     169inline codepoint_t & lo_codepoint(CC::reference i) {
     170    return std::get<0>(i);
     171}
     172inline codepoint_t lo_codepoint(CC::const_reference i) {
     173    return std::get<0>(i);
     174}
     175inline codepoint_t & lo_codepoint(const CC::iterator i) {
     176    return lo_codepoint(*i);
     177}
     178inline codepoint_t lo_codepoint(const CC::const_iterator i) {
     179    return lo_codepoint(*i);
     180}
     181
     182inline codepoint_t & hi_codepoint(CC::reference i) {
     183    return std::get<1>(i);
     184}
     185inline codepoint_t hi_codepoint(CC::const_reference i) {
     186    return std::get<1>(i);
     187}
     188inline codepoint_t & hi_codepoint(const CC::iterator i) {
     189    return hi_codepoint(*i);
     190}
     191inline codepoint_t hi_codepoint(const CC::const_iterator i) {
     192    return hi_codepoint(*i);
     193}
    177194
    178195
     
    201218}
    202219
    203 CC * subtractCC(const CC * cc1, const CC * cc2);
     220CC * subtractCC(const CC * a, const CC * b);
    204221   
    205222CC * intersectCC(const CC * cc1, const CC * cc2);
  • icGREP/icgrep-devel/icgrep/re/re_parser.h

    r4612 r4614  
    5959    RE * extend_item(RE * re);
    6060
    61     void parse_range_bound(int & lower_bound, int & upper_bound);
     61    void parse_range_bound(int & lo_codepoint, int & hi_codepoint);
    6262
    6363    unsigned parse_int();
  • icGREP/icgrep-devel/icgrep/utf8_encoder.cpp

    r4612 r4614  
    2727            else if (cc->size() > 1) {
    2828                std::vector<RE *> alt;
    29                 for (const CharSetItem & item : *cc) {
    30                     alt.push_back(rangeToUTF8(item));
     29                for (const interval_t & i : *cc) {
     30                    alt.push_back(rangeToUTF8(i));
    3131                }
    3232                name->setDefinition(makeAlt(alt.begin(), alt.end()));
     
    3838}
    3939
    40 RE * UTF8_Encoder::rangeToUTF8(const CharSetItem & item) {
    41     const auto min = lenUTF8(item.lo_codepoint);
    42     const auto max = lenUTF8(item.hi_codepoint);
     40RE * UTF8_Encoder::rangeToUTF8(const interval_t & item) {
     41    const auto min = lenUTF8(lo_codepoint(item));
     42    const auto max = lenUTF8(hi_codepoint(item));
    4343    if (min < max) {
    4444        const auto m = maxCodePoint(min);
    45         return makeAlt({rangeToUTF8(CharSetItem(item.lo_codepoint, m)), rangeToUTF8(CharSetItem(m + 1, item.hi_codepoint))});
     45        return makeAlt({rangeToUTF8(interval_t(lo_codepoint(item), m)), rangeToUTF8(interval_t(m + 1, hi_codepoint(item)))});
    4646    }
    4747    else {
    48         return rangeToUTF8(item.lo_codepoint, item.hi_codepoint, 1, max);
     48        return rangeToUTF8(lo_codepoint(item), hi_codepoint(item), 1, max);
    4949    }
    5050}
     
    7676}
    7777
    78 inline bool UTF8_Encoder::isUTF8Prefix(const unsigned cp) {
     78inline bool UTF8_Encoder::isUTF8Prefix(const codepoint_t cp) {
    7979    return (cp >= 0xC2) && (cp <= 0xF4);
    8080}
    8181
    82 inline codepoint_t UTF8_Encoder::u8byte(const codepoint_t codepoint, const unsigned n)
    83 {
     82inline codepoint_t UTF8_Encoder::u8byte(const codepoint_t cp, const unsigned n) {
    8483    codepoint_t retVal = 0;
    85 
    86     const unsigned len = lenUTF8(codepoint);
    87 
     84    const unsigned len = lenUTF8(cp);
    8885    if (n == 1) {
    8986        switch (len) {
    90             case 1: retVal = codepoint; break;
    91             case 2: retVal = 0xC0 | (codepoint >> 6); break;
    92             case 3: retVal = 0xE0 | (codepoint >> 12); break;
    93             case 4: retVal = 0xF0 | (codepoint >> 18); break;
     87            case 1: retVal = cp; break;
     88            case 2: retVal = 0xC0 | (cp >> 6); break;
     89            case 3: retVal = 0xE0 | (cp >> 12); break;
     90            case 4: retVal = 0xF0 | (cp >> 18); break;
    9491        }
    9592    }
    9693    else {
    97         retVal = 0x80 | ((codepoint >> (6 * (len - n))) & 0x3F);
     94        retVal = 0x80 | ((cp >> (6 * (len - n))) & 0x3F);
    9895    }
    99 
    10096    return retVal;
    10197}
    10298
    103 inline unsigned UTF8_Encoder::lenUTF8(const unsigned cp) {
     99inline unsigned UTF8_Encoder::lenUTF8(const codepoint_t cp) {
    104100    if (cp <= 0x7F) {
    105101        return 1;
     
    116112}
    117113
    118 inline unsigned UTF8_Encoder::maxCodePoint(const unsigned length) {
     114inline codepoint_t UTF8_Encoder::maxCodePoint(const unsigned length) {
    119115    if (length == 1) {
    120116        return 0x7F;
     
    132128}
    133129
     130inline bool UTF8_Encoder::isLowCodePointAfterByte(const codepoint_t cp, const unsigned index) {
     131    const auto l = lenUTF8(cp);
     132    for (auto i = index; i != l; ++i) {
     133        if (u8byte(cp, i + 1) != 0x80) {
     134            return false;
     135        }
     136    }
     137    return true;
     138}
     139
     140inline bool UTF8_Encoder::isHighCodePointAfterByte(const codepoint_t cp, const unsigned index) {
     141    const auto l = lenUTF8(cp);
     142    for (auto i = index; i != l; ++i) {
     143        if (u8byte(cp, i + 1) != 0xBF) {
     144            return false;
     145        }
     146    }
     147    return true;
     148}
     149
     150
    134151inline CC * UTF8_Encoder::makeByteRange(const codepoint_t lo, const codepoint_t hi) {
    135152    return makeCC(lo, hi);
  • icGREP/icgrep-devel/icgrep/utf8_encoder.h

    r4612 r4614  
    1616class CC_NameMap;
    1717
    18 class UTF8_Encoder
    19 {
     18class UTF8_Encoder {
    2019public:
    2120    static re::RE * toUTF8(CC_NameMap & nameMap, re::RE * ast);
     21
     22    static bool isUTF8Prefix(const re::codepoint_t cp);
     23    static unsigned lenUTF8(const re::codepoint_t cp);
     24    static re::codepoint_t maxCodePoint(const unsigned length);
     25    static re::codepoint_t u8byte(const re::codepoint_t cp, const unsigned n);
     26    static bool isLowCodePointAfterByte(const re::codepoint_t cp, const unsigned index);
     27    static bool isHighCodePointAfterByte(const re::codepoint_t cp, const unsigned index);
    2228private:
    23     static re::RE * rangeToUTF8(const re::CharSetItem & item);
     29    static re::RE * rangeToUTF8(const re::interval_t & item);
    2430    static re::RE * rangeToUTF8(const re::codepoint_t lo, const re::codepoint_t hi, const unsigned index, const unsigned max);
    2531    static re::CC * makeByteClass(const re::codepoint_t cp);
    2632    static re::CC * makeByteRange(const re::codepoint_t lo, const re::codepoint_t hi);
    27     static bool isUTF8Prefix(const unsigned cp);
    28     static unsigned lenUTF8(const unsigned cp);
    29     static unsigned maxCodePoint(const unsigned length);
    30     static re::codepoint_t u8byte(const re::codepoint_t codepoint, const unsigned n);
    3133};
    3234
Note: See TracChangeset for help on using the changeset viewer.