Changeset 5674


Ignore:
Timestamp:
Oct 6, 2017, 2:51:57 PM (20 months ago)
Author:
cameron
Message:

Fix upper vs title case confusion

Location:
icGREP/icgrep-devel
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/UCD_parser.py

    r5673 r5674  
    360360        (decval, digitval, numval) = (m.group(7), m.group(8), m.group(9))
    361361        (na1, isc) = (m.group(10), m.group(11))
    362         (suc, slc, stc) = (m.group(13), m.group(14), m.group(15))
     362        (upper, lower, title) = (m.group(13), m.group(14), m.group(15))
    363363        rangeMatch = NameRange_regexp.match(name)
    364364        if rangeMatch:
     
    376376        if not na1 == '':
    377377            property_object_map['na1'].addDataRecord(cp, cp, na1)
    378         if not suc == '':
    379             property_object_map['suc'].addDataRecord(cp, cp, suc)
    380             if stc == '':
    381                 property_object_map['stc'].addDataRecord(cp, cp, uc)
    382         if not slc == '':
    383             property_object_map['slc'].addDataRecord(cp, cp, slc)
    384         if not stc == '':
    385             property_object_map['stc'].addDataRecord(cp, cp, stc)
     378        if not upper == '':
     379            property_object_map['suc'].addDataRecord(cp, cp, upper)
     380        if not lower == '':
     381            property_object_map['slc'].addDataRecord(cp, cp, lower)
     382        if not title == '':
     383            property_object_map['stc'].addDataRecord(cp, cp, title)
     384        elif not upper == '':
     385            property_object_map['stc'].addDataRecord(cp, cp, upper)
    386386        if not decval == '':
    387387            property_object_map['nv'].addDataRecord(cp, cp, decval)
     
    409409        if len(fields) != 4: continue   #  Ignore context-dependent casing
    410410        if fields[3] != '': continue
    411         (lc, uc, tc) = (fields[0], fields[1], fields[2])
    412         if not uc == '':
    413             property_object_map['uc'].addDataRecord(cp, cp, uc)
    414         if not lc == '':
    415             property_object_map['lc'].addDataRecord(cp, cp, lc)
    416         if not tc == '':
    417             property_object_map['tc'].addDataRecord(cp, cp, tc)
     411        (lower, title, upper) = (fields[0], fields[1], fields[2])
     412        if not lower == '':
     413            property_object_map['lc'].addDataRecord(cp, cp, lower)
     414        if not title == '':
     415            property_object_map['tc'].addDataRecord(cp, cp, title)
     416        if not upper == '':
     417            property_object_map['uc'].addDataRecord(cp, cp, upper)
    418418    property_object_map['lc'].finalizeProperty()
     419    property_object_map['tc'].finalizeProperty()
    419420    property_object_map['uc'].finalizeProperty()
    420     property_object_map['tc'].finalizeProperty()
    421421
    422422
  • icGREP/icgrep-devel/icgrep/UCD/SpecialCasing.h

    r5673 r5674  
    168168    namespace UC_ns {
    169169        /** Code Point Ranges for uc (possibly overriding values from SUC)
    170         [00df, 00df], [0130, 0130], [0149, 0149], [01f0, 01f0],
    171         [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
    172         [1f50, 1f50], [1f52, 1f52], [1f54, 1f54], [1f56, 1f56],
    173         [1f80, 1faf], [1fb2, 1fb4], [1fb6, 1fb7], [1fbc, 1fbc],
    174         [1fc2, 1fc4], [1fc6, 1fc7], [1fcc, 1fcc], [1fd2, 1fd3],
    175         [1fd6, 1fd7], [1fe2, 1fe4], [1fe6, 1fe7], [1ff2, 1ff4],
    176         [1ff6, 1ff7], [1ffc, 1ffc], [fb00, fb06], [fb13, fb17]**/
    177 
    178         const UnicodeSet explicitly_defined_set
    179                     {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
    180               {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
    181               {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
    182               {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1},
    183               {Empty, 32807}},
    184              {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
    185               0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x10dcffff,
    186               0x00cc10dc, 0x10dc00dc, 0x00f8007f}};
    187 
    188         const unsigned buffer_length = 469;
    189         const static char __attribute__ ((aligned (32))) string_buffer[512] = u8R"__(Ss
    190 Ä°
    191 ÊŒN
    192 J̌
    193 Î™ÌˆÌ
    194 Î¥ÌˆÌ
    195 ÔµÖ‚
    196 H̱
    197 T̈
    198 W̊
    199 Y̊
    200 AÊŸ
    201 Î¥Ì“
    202 Î¥Ì“Ì€
    203 Î¥Ì“́
    204 Î¥Ì“Í‚
    205 áŸˆ
    206 áŸ‰
    207 áŸŠ
    208 áŸ‹
    209 áŸŒ
    210 áŸ
    211 áŸŽ
    212 áŸ
    213 áŸˆ
    214 áŸ‰
    215 áŸŠ
    216 áŸ‹
    217 áŸŒ
    218 áŸ
    219 áŸŽ
    220 áŸ
    221 áŸ˜
    222ʇ٪
    223 áŸš
    224 áŸ›
    225 áŸœ
    226 áŸ
    227 áŸž
    228 áŸŸ
    229 áŸ˜
    230ʇ٪
    231 áŸš
    232 áŸ›
    233 áŸœ
    234 áŸ
    235 áŸž
    236 áŸŸ
    237 áŸš
    238ʇ٩
    239ʇٻ
    240 áŸ«
    241 áŸ¬
    242 áŸ­
    243ʇ٨
    244 áŸ¯
    245 áŸš
    246ʇ٩
    247ʇٻ
    248 áŸ«
    249 áŸ¬
    250 áŸ­
    251ʇ٨
    252 áŸ¯
    253 áŸºÍ
    254 
    255 áŸŒ
    256 Î†Í
    257 
    258 Î‘Í‚
    259 Î‘Í‚Í
    260 
    261 áŸŒ
    262 á¿ŠÍ
    263 
    264 á¿Œ
    265 Î‰Í
    266 
    267 Î—Í‚
    268 Î—Í‚Í
    269 
    270 á¿Œ
    271 Î™ÌˆÌ€
    272 Î™ÌˆÌ
    273 Î™Í‚
    274 Î™ÌˆÍ‚
    275 Î¥ÌˆÌ€
    276 Î¥ÌˆÌ
    277 Î¡Ì“
    278 Î¥Í‚
    279 Î¥ÌˆÍ‚
    280 á¿ºÍ
    281 
    282 á¿Œ
    283 ÎÍ
    284 
    285 Î©Í‚
    286 Î©Í‚Í
    287 
    288 á¿Œ
    289 Ff
    290 Fi
    291 Fl
    292 Ffi
    293 Ffl
    294 St
    295 St
    296 Õ„Õ¶
    297 Õ„Õ¥
    298 Õ„Õ«
    299 ÕŽÕ¶
    300 Õ„Õ­
    301 )__";
    302 
    303         const static std::vector<codepoint_t> defined_cps = {
    304         0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96,
    305         0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56,
    306         0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87,
    307         0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
    308         0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97,
    309         0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
    310         0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7,
    311         0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
    312         0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2, 0x1fc3,
    313         0x1fc4, 0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7,
    314         0x1fe2, 0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4,
    315         0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04,
    316         0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
    317         static StringOverridePropertyObject property_object(uc,
    318                                                     SUC_ns::property_object,
    319                                                     explicitly_defined_set,
    320                                                     static_cast<const char *>(string_buffer),
    321                                                     buffer_length,
    322                                                     defined_cps);
    323     }
    324     namespace TC_ns {
    325         /** Code Point Ranges for tc (possibly overriding values from STC)
    326170        [00df, 00df], [0130, 0130], [0149, 0149], [01f0, 01f0],
    327171        [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
     
    462306        0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04,
    463307        0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
     308        static StringOverridePropertyObject property_object(uc,
     309                                                    SUC_ns::property_object,
     310                                                    explicitly_defined_set,
     311                                                    static_cast<const char *>(string_buffer),
     312                                                    buffer_length,
     313                                                    defined_cps);
     314    }
     315    namespace TC_ns {
     316        /** Code Point Ranges for tc (possibly overriding values from STC)
     317        [00df, 00df], [0130, 0130], [0149, 0149], [01f0, 01f0],
     318        [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
     319        [1f50, 1f50], [1f52, 1f52], [1f54, 1f54], [1f56, 1f56],
     320        [1f80, 1faf], [1fb2, 1fb4], [1fb6, 1fb7], [1fbc, 1fbc],
     321        [1fc2, 1fc4], [1fc6, 1fc7], [1fcc, 1fcc], [1fd2, 1fd3],
     322        [1fd6, 1fd7], [1fe2, 1fe4], [1fe6, 1fe7], [1ff2, 1ff4],
     323        [1ff6, 1ff7], [1ffc, 1ffc], [fb00, fb06], [fb13, fb17]**/
     324
     325        const UnicodeSet explicitly_defined_set
     326                    {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
     327              {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
     328              {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
     329              {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1},
     330              {Empty, 32807}},
     331             {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
     332              0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x10dcffff,
     333              0x00cc10dc, 0x10dc00dc, 0x00f8007f}};
     334
     335        const unsigned buffer_length = 469;
     336        const static char __attribute__ ((aligned (32))) string_buffer[512] = u8R"__(Ss
     337Ä°
     338ÊŒN
     339J̌
     340Ϊ́
     341Ϋ́
     342Եւ
     343H̱
     344T̈
     345W̊
     346Y̊
     347AÊŸ
     348Υ̓
     349Υ̓̀
     350Υ̓́
     351Υ̓͂
     352ៈ
     353៉
     354៊
     355់
     356៌
     357៍
     358៎
     359៏
     360ៈ
     361៉
     362៊
     363់
     364៌
     365៍
     366៎
     367៏
     368៘
     369៙
     370៚
     371៛
     372ៜ
     373៝
     374៞
     375៟
     376៘
     377៙
     378៚
     379៛
     380ៜ
     381៝
     382៞
     383៟
     384៚
     385៩
     386៪
     387៫
     388៬
     389៭
     390៮
     391៯
     392៚
     393៩
     394៪
     395៫
     396៬
     397៭
     398៮
     399៯
     400៺Í
     401
     402៌
     403ΆÍ
     404
     405Α͂
     406Α͂Í
     407
     408៌
     409ῊÍ
     410
     411ῌ
     412ΉÍ
     413
     414Η͂
     415Η͂Í
     416
     417ῌ
     418Ϊ̀
     419Ϊ́
     420Ι͂
     421Ϊ͂
     422Ϋ̀
     423Ϋ́
     424Ρ̓
     425Υ͂
     426Ϋ͂
     427ῺÍ
     428
     429á¿Œ
     430ΏÍ
     431
     432Ω͂
     433Ω͂Í
     434
     435á¿Œ
     436Ff
     437Fi
     438Fl
     439Ffi
     440Ffl
     441St
     442St
     443Մն
     444Մե
     445Մի
     446Վն
     447Մխ
     448)__";
     449
     450        const static std::vector<codepoint_t> defined_cps = {
     451        0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96,
     452        0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56,
     453        0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87,
     454        0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
     455        0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97,
     456        0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
     457        0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7,
     458        0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
     459        0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2, 0x1fc3,
     460        0x1fc4, 0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7,
     461        0x1fe2, 0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4,
     462        0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04,
     463        0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
    464464        static StringOverridePropertyObject property_object(tc,
    465465                                                    STC_ns::property_object,
Note: See TracChangeset for help on using the changeset viewer.