Changeset 4735


Ignore:
Timestamp:
Aug 21, 2015, 1:44:51 PM (4 years ago)
Author:
cameron
Message:

Support for Grapheme, Sentence, Word break properties

Files:
3 added
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjectTable.h

    r4636 r4735  
    1616#include "DerivedGeneralCategory.h"
    1717#include "EastAsianWidth.h"
     18#include "GraphemeBreakProperty.h"
    1819#include "HangulSyllableType.h"
    1920#include "LineBreak.h"
     
    2122#include "ScriptExtensions.h"
    2223#include "Scripts.h"
     24#include "SentenceBreakProperty.h"
     25#include "WordBreakProperty.h"
    2326
    2427namespace UCD {
     
    6972    &EA_ns::property_object,
    7073    &GC_ns::property_object,
    71     new UnsupportedPropertyObject(GCB, PropertyObject::ClassTypeId::EnumeratedProperty),
     74    &GCB_ns::property_object,
    7275    &HST_ns::property_object,
    7376    new UnsupportedPropertyObject(InPC, PropertyObject::ClassTypeId::EnumeratedProperty),
     
    8184    new UnsupportedPropertyObject(NFKD_QC, PropertyObject::ClassTypeId::EnumeratedProperty),
    8285    new UnsupportedPropertyObject(nt, PropertyObject::ClassTypeId::EnumeratedProperty),
    83     new UnsupportedPropertyObject(SB, PropertyObject::ClassTypeId::EnumeratedProperty),
    84     new UnsupportedPropertyObject(WB, PropertyObject::ClassTypeId::EnumeratedProperty),
     86    &SB_ns::property_object,
     87    &WB_ns::property_object,
    8588    &AHEX_ns::property_object,
    8689    &ALPHA_ns::property_object,
  • icGREP/icgrep-devel/icgrep/UCD/resolve_properties.cpp

    r4684 r4735  
    7777        property->setFunctionName("__get_blk_" + BLK_ns::enum_names[valcode]);
    7878    }
     79    else if (theprop == GCB) {
     80        // Grapheme Cluster Break property identified
     81        int valcode = GetPropertyValueEnumCode(GCB, value);
     82        if (valcode < 0) {
     83             throw UnicodePropertyExpressionError("Erroneous property value for grapheme cluster break property");
     84        }
     85        property->setFunctionName("__get_gcb_" + GCB_ns::enum_names[valcode]);
     86    }
     87    else if (theprop == WB) {
     88        // Word Break property identified
     89        int valcode = GetPropertyValueEnumCode(WB, value);
     90        if (valcode < 0) {
     91             throw UnicodePropertyExpressionError("Erroneous property value for word break property");
     92        }
     93        property->setFunctionName("__get_wb_" + WB_ns::enum_names[valcode]);
     94    }
     95    else if (theprop == lb) {
     96        // Line Break property identified
     97        int valcode = GetPropertyValueEnumCode(lb, value);
     98        if (valcode < 0) {
     99             throw UnicodePropertyExpressionError("Erroneous property value for line break property");
     100        }
     101        property->setFunctionName("__get_lb_" + LB_ns::enum_names[valcode]);
     102    }
     103    else if (theprop == SB) {
     104        // Sentence Break property identified
     105        int valcode = GetPropertyValueEnumCode(SB, value);
     106        if (valcode < 0) {
     107             throw UnicodePropertyExpressionError("Erroneous property value for sentence break property");
     108        }
     109        property->setFunctionName("__get_lb_" + SB_ns::enum_names[valcode]);
     110    }
    79111    else if (isa<BinaryPropertyObject>(property_object_table[theprop])){
    80112        auto valit = Binary_ns::aliases_only_map.find(value);
     
    187219            }
    188220            auto theprop = propit->second;
    189             if (theprop == gc) {
    190                 // General Category
    191                 return cast<EnumeratedPropertyObject>(property_object_table[gc])->GetCodepointSet(value);
    192             }
    193             else if (theprop == sc) {
    194                 // Script property identified
    195                 return cast<EnumeratedPropertyObject>(property_object_table[sc])->GetCodepointSet(value);
    196             }
    197             else if (theprop == scx) {
    198                 // Script extension property identified
    199                 return cast<ExtensionPropertyObject>(property_object_table[scx])->GetCodepointSet(value);
    200             }
    201             else if (theprop == blk) {
    202                 // Block property identified
    203                 return cast<EnumeratedPropertyObject>(property_object_table[blk])->GetCodepointSet(value);
     221            if (EnumeratedPropertyObject * p = dyn_cast<EnumeratedPropertyObject>(property_object_table[theprop])){
     222                return p->GetCodepointSet(value);
    204223            }
    205224            else if (BinaryPropertyObject * p = dyn_cast<BinaryPropertyObject>(property_object_table[theprop])){
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4634 r4735  
    1414from UCD_parser import *
    1515
    16 UCD_dir = "7.0.0"
     16UCD_dir = "8.0.0"
    1717
    1818PropertyAliases_template = r"""
     
    240240    ucd.generate_property_value_file('LineBreak', 'lb')
    241241    #
     242    # Grapheme Cluster Break property
     243    ucd.generate_property_value_file('auxiliary/GraphemeBreakProperty', 'GCB')
     244    #
     245    # Sentence Break property
     246    ucd.generate_property_value_file('auxiliary/SentenceBreakProperty', 'SB')
     247    #
     248    # Word Break property
     249    ucd.generate_property_value_file('auxiliary/WordBreakProperty', 'WB')
     250    #
    242251    # East Asian Width
    243252    ucd.generate_property_value_file('EastAsianWidth', 'ea')
Note: See TracChangeset for help on using the changeset viewer.