Changeset 4377


Ignore:
Timestamp:
Dec 31, 2014, 9:19:55 AM (4 years ago)
Author:
cameron
Message:

Add support for \p{property=value}, Names are now namespace/name pairs.

Location:
icGREP/icgrep-devel/icgrep/re
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/re/re_compiler.cpp

    r4358 r4377  
    146146       
    147147PabloAST * RE_Compiler::character_class_strm(Name * name, PabloBlock & pb) {
    148     if (name->getType() == Name::Type::UnicodeCategory) {
     148    if (name->getType() == Name::Type::UnicodeProperty) {
    149149        return pb.createCall(name->getName());
    150150    }
     
    375375            return makePostPositionMarker("unbounded", pb.createMatchStar(base, cc), pb);
    376376        }
    377         else { // Name::Unicode and Name::UnicodeCategory
     377        else { // Name::Unicode and Name::UnicodeProperty
    378378            return makePostPositionMarker("unbounded", pb.createAnd(pb.createMatchStar(base, pb.createOr(mNonFinal, cc)), mInitial), pb);
    379379        }       
  • icGREP/icgrep-devel/icgrep/re/re_name.h

    r4337 r4377  
    2626        Byte
    2727        , Unicode
    28         , UnicodeCategory
     28        , UnicodeProperty
    2929        , Unknown
    3030    };
     31    const std::string & getNamespace() const;
    3132    const std::string & getName() const;
    3233    Type getType() const;
     
    4445    friend Name * makeByteName(const std::string, RE *);
    4546    friend Name * makeName(const std::string, const Type);
     47    friend Name * makeName(const std::string, const std::string, const Type);   
    4648    void* operator new (std::size_t size) noexcept {
    4749        return mAllocator.allocate(size);
    4850    }
    49     Name(const std::string && name, const Type type, RE * defn)
     51    Name(const std::string && nameSpace, const std::string && name, const Type type, RE * defn)
    5052    : RE(ClassTypeId::Name)
     53    , mNamespace(std::move(nameSpace))
    5154    , mName(std::move(name))
    5255    , mType(type)
     
    5861
    5962private:
     63    const std::string   mNamespace;
    6064    const std::string   mName;
    6165    const Type          mType;
     
    6367    pablo::Var *        mCompiled;
    6468};
     69
     70inline const std::string & Name::getNamespace() const {
     71    return mNamespace;
     72}
    6573
    6674inline const std::string & Name::getName() const {
     
    8189
    8290inline Name * makeName(const std::string name, const Name::Type type = Name::Type::Unicode) {
    83     return new Name(std::move(name), type, nullptr);
     91    return new Name("", std::move(name), type, nullptr);
     92}
     93
     94inline Name * makeName(const std::string property, const std::string value, const Name::Type type = Name::Type::Unicode) {
     95    return new Name(std::move(property), std::move(value), type, nullptr);
    8496}
    8597
     
    90102    else if (isa<CC>(cc)) {
    91103        Name::Type ccType = cast<CC>(cc)->max_codepoint() <= 0x7F ? Name::Type::Byte : Name::Type::Unicode;
    92         return new Name(std::move(name), ccType, cc);
     104        return new Name("", std::move(name), ccType, cc);
    93105    }
    94     else return new Name(std::move(name), Name::Type::Unknown, cc);
     106    else return new Name("", std::move(name), Name::Type::Unknown, cc);
    95107}
    96108
     
    100112    }
    101113    else {
    102         return new Name(std::move(name), Name::Type::Byte, cc);
     114        return new Name("", std::move(name), Name::Type::Byte, cc);
    103115    }
    104116}
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r4358 r4377  
    364364
    365365RE * makeDigitSet() {
    366   return makeName("Nd", Name::Type::UnicodeCategory);
     366  return makeName("Nd", Name::Type::UnicodeProperty);
    367367}
    368368
     
    463463Name * RE_Parser::parse_property_expression() {
    464464    const cursor_t start = _cursor;
    465     while (_cursor != _end && *_cursor != '}' and *_cursor != ':') {
     465    while (_cursor != _end && *_cursor != '}' and *_cursor != ':' and *_cursor != '=') {
    466466        _cursor++;
    467467    }
    468     return makeName(std::string(start, _cursor), Name::Type::UnicodeCategory);
    469 }
    470    
     468    if (_cursor != _end && *_cursor == '=') {
     469        const cursor_t prop_end = _cursor;
     470        _cursor++;
     471        const cursor_t val_start = _cursor;
     472        while (_cursor != _end && *_cursor != '}' and *_cursor != ':') {
     473            _cursor++;
     474        }
     475        // We have a property-name = value expression
     476        return makeName(std::string(start, prop_end), std::string(val_start, _cursor), Name::Type::UnicodeProperty);
     477    }
     478    else return makeName(std::string(start, _cursor), Name::Type::UnicodeProperty);
     479}
     480
    471481CharsetOperatorKind RE_Parser::getCharsetOperator() {
    472482    throw_incomplete_expression_error_if_end_of_stream();
Note: See TracChangeset for help on using the changeset viewer.