Changeset 4613 for icGREP/icgrep-devel


Ignore:
Timestamp:
Jun 22, 2015, 12:11:25 AM (4 years ago)
Author:
nmedfort
Message:

Minor bug fix for CC canonicalName(...) and slightly more efficient subtractCC function.

Location:
icGREP/icgrep-devel/icgrep
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/icgrep-devel.files

    r4586 r4613  
    319319UCD/unicode_set.cpp
    320320UCD/unicode_set.h
     321../ucd/ucdcompiler.cpp
     322../ucd/ucdcompiler.hpp
     323../ucd/utf8-util.hpp
  • icGREP/icgrep-devel/icgrep/icgrep-devel.includes

    r4592 r4613  
    88../cudd-2.5.1/util
    99UCD
     10../ucd
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4612 r4613  
    99#include <UCD/CaseFolding_txt.h>
    1010#include <sstream>
     11#include <iostream>
    1112
    1213namespace re {
     
    2930std::string CC::canonicalName(const CC_type type) const {
    3031    std::stringstream name;
    31     name << std::hex;
    32     if ((type == ByteClass) && (mSparseCharSet.back().hi_codepoint >= 0x80)) {
    33       name << "BC";
     32    // name << std::hex;
     33    if ((type == ByteClass) && (max_codepoint() >= 0x80)) {
     34        name << "BC";
    3435    }
    3536    else {
     
    5152
    5253void CC::insert_range(const codepoint_t lo_codepoint, const codepoint_t hi_codepoint) {
    53     CharSetItem item(lo_codepoint, hi_codepoint);
    5454    for (auto i = mSparseCharSet.begin(); i != mSparseCharSet.end(); ) {
    5555        CharSetItem & range = *i;
    56         if (item.hi_codepoint < range.lo_codepoint - 1) {
    57             mSparseCharSet.insert(i, item);
     56        if (hi_codepoint < range.lo_codepoint - 1) {
     57            mSparseCharSet.emplace(i, lo_codepoint, hi_codepoint);
    5858            return;
    5959        }
    60         else if (item.lo_codepoint > range.hi_codepoint + 1) {
     60        else if (lo_codepoint > range.hi_codepoint + 1) {
    6161            ++i;
    6262        }
     
    6464            // ranges overlap; expand the range to include the prior one and
    6565            // remove the old one from the list
    66             range.lo_codepoint = std::min(range.lo_codepoint, item.lo_codepoint);
    67             range.hi_codepoint = std::max(range.hi_codepoint, item.hi_codepoint);
     66            range.lo_codepoint = std::min(range.lo_codepoint, lo_codepoint);
     67            range.hi_codepoint = std::max(range.hi_codepoint, hi_codepoint);
    6868            return;
    6969        }
    7070    }
    71     mSparseCharSet.push_back(item);
     71    mSparseCharSet.emplace_back(lo_codepoint, hi_codepoint);
    7272}
    7373
     
    103103CC * subtractCC(const CC * cc1, const CC * cc2) {
    104104    CC * diff = makeCC();
    105     for (const CharSetItem & i : cc1->mSparseCharSet) {
    106         diff->insert_range(i.lo_codepoint, i.hi_codepoint);
    107     }
    108     for (const CharSetItem & i : cc2->mSparseCharSet) {
    109         diff->remove_range(i.lo_codepoint, i.hi_codepoint);
     105    auto ai = cc1->cbegin();
     106    const auto ai_end = cc1->cend();
     107    auto bi = cc2->cbegin();
     108    const auto bi_end = cc2->cend();
     109    while (ai != ai_end && bi != bi_end) {
     110        const CharSetItem & ra = *ai;
     111        const CharSetItem & rb = *bi;
     112        if (rb.hi_codepoint < ra.lo_codepoint) {
     113            ++bi;
     114        }
     115        else { // test whether the intervals overlap
     116            if (ra.lo_codepoint < rb.lo_codepoint) {
     117                diff->insert_range(ra.lo_codepoint, std::min(rb.lo_codepoint - 1, ra.hi_codepoint));
     118            }
     119            if (ra.hi_codepoint > rb.hi_codepoint) {
     120                diff->insert_range(std::max(rb.hi_codepoint + 1, ra.lo_codepoint), ra.hi_codepoint);
     121            }
     122            ++ai;
     123        }
     124    }
     125    for (; ai != ai_end; ++ai) {
     126        const CharSetItem & ra = *ai;
     127        diff->insert_range(ra.lo_codepoint, ra.hi_codepoint);
    110128    }
    111129    return diff;
     
    123141        if (ra.hi_codepoint < rb.lo_codepoint) {
    124142            ++ai;
    125             continue;
    126143        }
    127144        else if (rb.hi_codepoint < ra.lo_codepoint) {
    128145            ++bi;
    129             continue;
    130         }
    131         isect->insert_range(std::max(ra.lo_codepoint, rb.lo_codepoint), std::min(ra.hi_codepoint, rb.hi_codepoint));
    132         if (ra.hi_codepoint < rb.hi_codepoint) ++ai;
    133         else ++bi;
     146        }
     147        else {
     148            isect->insert_range(std::max(ra.lo_codepoint, rb.lo_codepoint), std::min(ra.hi_codepoint, rb.hi_codepoint));
     149            if (ra.hi_codepoint < rb.hi_codepoint) {
     150                ++ai;
     151            }
     152            else {
     153                ++bi;
     154            }
     155        }
    134156    }
    135157    return isect;
Note: See TracChangeset for help on using the changeset viewer.