Ignore:
Timestamp:
Oct 9, 2017, 9:28:24 AM (23 months ago)
Author:
cameron
Message:

Refactoring progress: \N uses name property; delay resolution of recursive property expressions, property object regexp support

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5673 r5679  
    2222#include <re/re_assertion.h>
    2323#include <re/printer_re.h>
    24 #include <UCD/UnicodeNameData.h>
    2524#include <UCD/resolve_properties.h>
    2625#include <UCD/CaseFolding.h>
     
    676675            }
    677676            ++mCursor;
    678             return parseRegexPropertyValue(canonicalize(start, prop_end), std::string(val_start, current));
     677            //return parseRegexPropertyValue(canonicalize(start, prop_end), std::string(val_start, current));
     678            return createName(canonicalize(start, prop_end), std::string(val_start-1, current));
    679679        }
    680680    }
     
    682682}
    683683
    684 RE * RE_Parser::parseRegexPropertyValue(const std::string & propName, const std::string& regexValue) {
    685     RE * propValueRe = RE_Parser::parse("^" + regexValue + "$", fModeFlagSet, mReSyntax);
    686     const auto matches = grep::grepPropertyValues(propName, propValueRe);
    687     if (matches.empty()) {
    688         ParseFailure("regex " + regexValue + " match no property values");
    689     } else if (matches.size() == 1) {
    690         return createName(propName, matches.front());
    691     } else {
    692         std::vector<re::RE *> alt;
    693         for (auto value : matches) {
    694             alt.push_back(createName(propName, value));
    695         }
    696         return makeAlt(alt.begin(), alt.end());
    697     }
    698 }
    699 
    700684Name * RE_Parser::parseNamePatternExpression(){
    701685
    702     ModeFlagSet outerFlags = fModeFlagSet;
    703     fModeFlagSet = 1;
    704 
    705     bool outerNested = fNested;
    706     fNested = true;
    707 
    708     RE * nameRE = parse_RE();
    709 
    710     // Reset outer parsing state.
    711     fModeFlagSet = outerFlags;
    712     fNested = outerNested;
    713 
    714     // Embed the nameRE in ";.*$nameRE" to skip the codepoint field of Uname.txt
    715     RE * embedded = makeSeq({mMemoizer.memoize(makeCC(0x3B)), makeRep(makeAny(), 0, Rep::UNBOUNDED_REP), nameRE});
    716    
    717     CC * codepoints = grep::grepCodepoints(embedded, getUnicodeNameDataPtr(), getUnicodeNameDataSize());
    718    
    719     if (codepoints) {
    720         Name * const result = mMemoizer.memoize(codepoints);
    721         assert (*cast<CC>(result->getDefinition()) == *codepoints);
    722         return result;
    723     }
    724     return nullptr;
     686    const auto start = mCursor.pos();
     687    while (mCursor.more()) {
     688        if (*mCursor == '\\') {
     689            ++mCursor;
     690            if (!mCursor.more()) {
     691                break;
     692            }
     693        }
     694        else if (*mCursor == '}') {
     695            break;
     696        }
     697        ++mCursor;
     698    }
     699    std::string nameRegexp = "/(?i)" + std::string(start, mCursor.pos());
     700    return createName("na", nameRegexp);
    725701}
    726702
Note: See TracChangeset for help on using the changeset viewer.