Changeset 4320 for icGREP


Ignore:
Timestamp:
Dec 4, 2014, 9:41:21 AM (5 years ago)
Author:
cameron
Message:

Defer case-insensitization until character class is complete.

Location:
icGREP/icgrep-devel/icgrep/re
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4319 r4320  
    77#include "re_cc.h"
    88#include <llvm/Support/Compiler.h>
     9#include <UCD/CaseFolding_txt.h>
    910
    1011namespace re {
     
    230231}
    231232   
    232 }
     233CC * caseInsensitize(const CC * cc) {
     234    CC * cci = makeCC();
     235    for (auto i = cc->cbegin(); i != cc->cend(); i++) {
     236        caseInsensitiveInsertRange(cci, i->lo_codepoint, i->hi_codepoint);
     237    }
     238    return cci;
     239}
     240   
     241}
  • icGREP/icgrep-devel/icgrep/re/re_cc.h

    r4319 r4320  
    186186
    187187CC * subtractCC(const CC * cc1, const CC * cc2);
     188   
     189CC * intersectCC(const CC * cc1, const CC * cc2);
    188190
    189 CC * intersectCC(const CC * cc1, const CC * cc2);
     191CC * caseInsensitize(const CC * cc);
    190192}
    191193
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r4319 r4320  
    580580            case setCloser: {
    581581                if (lastItemKind == NoItem) throw ParseFailure("Set operator has no right operand.");
    582                 if (cc->begin() != cc->end()) subexprs.push_back(cc);
     582                if (cc->begin() != cc->end()) {
     583                    subexprs.push_back(cc);
     584                }
    583585                RE * newOperand = makeAlt(subexprs.begin(), subexprs.end());
    584586                if (havePendingOperation) {
     
    588590                    else {
    589591                        newOperand = makeDiff(pendingOperand, newOperand);
     592                    }
     593                }
     594                if (fModeFlagSet & CASE_INSENSITIVE_MODE_FLAG) {
     595                    if (CC * cc1 = dyn_cast<CC>(newOperand)) {
     596                        newOperand = caseInsensitize(cc1);
    590597                    }
    591598                }
     
    634641            case rangeHyphen:
    635642                if (lastItemKind != CodepointItem) throw ParseFailure("Range operator - has illegal left operand.");
    636                 CC_add_range(cc, lastCodepointItem, parse_codepoint());
     643                cc->insert_range(lastCodepointItem, parse_codepoint());
    637644                lastItemKind = RangeItem;
    638645                break;
     
    655662                else {
    656663                    lastCodepointItem = parse_escaped_codepoint();
    657                     CC_add_codepoint(cc, lastCodepointItem);
     664                    cc->insert(lastCodepointItem);
    658665                    lastItemKind = CodepointItem;
    659666                }
     
    661668            case emptyOperator:
    662669                lastCodepointItem = parse_utf8_codepoint();
    663                 CC_add_codepoint(cc, lastCodepointItem);
     670                cc->insert(lastCodepointItem);
    664671                lastItemKind = CodepointItem;
    665672                break;
     
    805812    if (count < mindigits) throw ParseFailure("Hexadecimal sequence has too few digits");
    806813    if (value > CC::UNICODE_MAX) throw ParseFailure("Hexadecimal value too large");
    807     std::cerr << value << " parsed.\n";
    808814    return value;
    809815}
Note: See TracChangeset for help on using the changeset viewer.