Ignore:
Timestamp:
Oct 6, 2017, 1:22:53 PM (21 months ago)
Author:
cameron
Message:

Case folding property objects

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/SpecialCasing.h

    r5672 r5673  
    1717namespace UCD {
    1818    namespace LC_ns {
    19         /** Code Point Ranges for lc overriding values from SLC
    20         [00df, 00df], [0149, 0149], [01f0, 01f0], [0307, 0307],
     19        /** Code Point Ranges for lc (possibly overriding values from SLC)
     20        [00df, 00df], [0130, 0130], [0149, 0149], [01f0, 01f0],
    2121        [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
    2222        [1f50, 1f50], [1f52, 1f52], [1f54, 1f54], [1f56, 1f56],
    23         [1f80, 1f87], [1f90, 1f97], [1fa0, 1fa7], [1fb2, 1fb4],
    24         [1fb6, 1fb7], [1fc2, 1fc4], [1fc6, 1fc7], [1fd2, 1fd3],
     23        [1f80, 1faf], [1fb2, 1fb4], [1fb6, 1fb7], [1fbc, 1fbc],
     24        [1fc2, 1fc4], [1fc6, 1fc7], [1fcc, 1fcc], [1fd2, 1fd3],
    2525        [1fd6, 1fd7], [1fe2, 1fe4], [1fe6, 1fe7], [1ff2, 1ff4],
    26         [1ff6, 1ff7], [fb00, fb06], [fb13, fb17]**/
    27 
    28         const UnicodeSet overridden_set
    29                     {{{Empty, 6}, {Mixed, 1}, {Empty, 3}, {Mixed, 1}, {Empty, 4},
    30               {Mixed, 1}, {Empty, 8}, {Mixed, 1}, {Empty, 3}, {Mixed, 2},
    31               {Empty, 14}, {Mixed, 1}, {Empty, 199}, {Mixed, 1}, {Empty, 5},
    32               {Mixed, 1}, {Empty, 1}, {Mixed, 4}, {Empty, 1752}, {Mixed, 1},
     26        [1ff6, 1ff7], [1ffc, 1ffc], [fb00, fb06], [fb13, fb17]**/
     27
     28        const UnicodeSet explicitly_defined_set
     29                    {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
     30              {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
     31              {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
     32              {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1},
    3333              {Empty, 32807}},
    34              {0x80000000, 0x00000200, 0x00010000, 0x00000080, 0x00010000,
    35               0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x00ff00ff,
    36               0x00dc00ff, 0x00cc00dc, 0x00dc00dc, 0x00f8007f}};
    37 
    38         const unsigned buffer_length = 298;
     34             {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
     35              0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x10dcffff,
     36              0x00cc10dc, 0x10dc00dc, 0x00f8007f}};
     37
     38        const unsigned buffer_length = 406;
    3939        const static char __attribute__ ((aligned (32))) string_buffer[512] = u8R"__(ß
     40i̇
    4041ʼn
    4142Ç°
    42 i̇
    4343ΐ
    4444ΰ
     
    6262ំ
    6363ះ
     64ៀ
     65េ
     66ែ
     67ៃ
     68ោ
     69áŸ
     70
     71ំ
     72ះ
     73័
     74៑
     75្
     76៓
     77។
     78៕
     79៖
     80ៗ
    6481័
    6582៑
     
    7895៊
    7996៧
     97០
     98១
     99២
     100៣
     101ៀ
     102៥
     103៊
     104៧
    80105៲
    81106៳
     
    83108៶
    84109៷
     110៳
    85111ῂ
    86112ῃ
     
    88114ῆ
    89115ῇ
     116ῃ
    90117ῒ
    91118ΐ
     
    102129ῶ
    103130á¿·
     131ῳ
    104132ff
    105133fi
     
    118146
    119147        const static std::vector<codepoint_t> defined_cps = {
    120         0x00df, 0x0149, 0x01f0, 0x0307, 0x0390, 0x03b0, 0x0587, 0x1e96,
     148        0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96,
    121149        0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56,
    122150        0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87,
     151        0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
    123152        0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97,
     153        0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
    124154        0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7,
    125         0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fc2, 0x1fc3, 0x1fc4,
    126         0x1fc6, 0x1fc7, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7, 0x1fe2, 0x1fe3,
    127         0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4, 0x1ff6, 0x1ff7,
    128         0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04, 0xfb05, 0xfb06, 0xfb13,
    129         0xfb14, 0xfb15, 0xfb16, 0xfb17};
     155        0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
     156        0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2, 0x1fc3,
     157        0x1fc4, 0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7,
     158        0x1fe2, 0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4,
     159        0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04,
     160        0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
    130161        static StringOverridePropertyObject property_object(lc,
    131162                                                    SLC_ns::property_object,
    132                                                     overridden_set,
     163                                                    explicitly_defined_set,
    133164                                                    static_cast<const char *>(string_buffer),
    134165                                                    buffer_length,
     
    136167    }
    137168    namespace UC_ns {
    138         /** Code Point Ranges for uc overriding values from SUC
    139         [004e, 004e], [0066, 0066], [0069, 0069], [006c, 006c],
    140         [0073, 0074], [0130, 0130], [02be, 02be], [0300, 0301],
    141         [0308, 0308], [030a, 030a], [030c, 030c], [0313, 0313],
    142         [0331, 0331], [0342, 0342], [0345, 0345], [0565, 0565],
    143         [056b, 056b], [056d, 056d], [0576, 0576], [0582, 0582],
    144         [1f88, 1f8f], [1f98, 1f9f], [1fa8, 1faf], [1fbc, 1fbc],
    145         [1fcc, 1fcc], [1ffc, 1ffc]**/
    146 
    147         const UnicodeSet overridden_set
    148                     {{{Empty, 2}, {Mixed, 2}, {Empty, 5}, {Mixed, 1}, {Empty, 11},
    149               {Mixed, 1}, {Empty, 2}, {Mixed, 3}, {Empty, 16}, {Mixed, 2},
    150               {Empty, 207}, {Mixed, 4}, {Empty, 34560}},
    151              {0x00004000, 0x00181240, 0x00010000, 0x40000000, 0x00081503,
    152               0x00020000, 0x00000024, 0x00402820, 0x00000004, 0xff00ff00,
    153               0x1000ff00, 0x00001000, 0x10000000}};
    154 
    155         const unsigned buffer_length = 208;
    156         const static char __attribute__ ((aligned (32))) string_buffer[256] = u8R"__(ÊŒN
    157 Ff
    158 Ffi
    159 Ffl
    160 Ss
    161 St
     169        /** Code Point Ranges for uc (possibly overriding values from SUC)
     170        [00df, 00df], [0130, 0130], [0149, 0149], [01f0, 01f0],
     171        [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
     172        [1f50, 1f50], [1f52, 1f52], [1f54, 1f54], [1f56, 1f56],
     173        [1f80, 1faf], [1fb2, 1fb4], [1fb6, 1fb7], [1fbc, 1fbc],
     174        [1fc2, 1fc4], [1fc6, 1fc7], [1fcc, 1fcc], [1fd2, 1fd3],
     175        [1fd6, 1fd7], [1fe2, 1fe4], [1fe6, 1fe7], [1ff2, 1ff4],
     176        [1ff6, 1ff7], [1ffc, 1ffc], [fb00, fb06], [fb13, fb17]**/
     177
     178        const UnicodeSet explicitly_defined_set
     179                    {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
     180              {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
     181              {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
     182              {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1},
     183              {Empty, 32807}},
     184             {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
     185              0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x10dcffff,
     186              0x00cc10dc, 0x10dc00dc, 0x00f8007f}};
     187
     188        const unsigned buffer_length = 469;
     189        const static char __attribute__ ((aligned (32))) string_buffer[512] = u8R"__(Ss
    162190Ä°
     191ÊŒN
     192J̌
     193Ϊ́
     194Ϋ́
     195Եւ
     196H̱
     197T̈
     198W̊
     199Y̊
    163200AÊŸ
    164 Î¥ÌˆÌ€
    165 Î¥ÌˆÌ
    166 T̈
    167 Y̊
    168 J̌
    169 Î¡Ì“
    170 H̱
    171 Î©Í‚
    172 Î©Í‚Í
    173 
    174 Õ„Õ¥
    175 Õ„Õ«
    176 Õ„Õ­
    177 ÕŽÕ¶
    178 ÔµÖ‚
     201Υ̓
     202Υ̓̀
     203Υ̓́
     204Υ̓͂
     205ៈ
     206៉
     207៊
     208់
     209៌
     210៍
     211៎
     212៏
    179213ៈ
    180214៉
     
    193227៞
    194228៟
     229៘
     230៙
     231៚
     232៛
     233ៜ
     234៝
     235៞
     236៟
    195237៚
    196238៩
     
    201243៮
    202244៯
     245៚
     246៩
     247៪
     248៫
     249៬
     250៭
     251៮
     252៯
     253៺Í
     254
    203255៌
     256ΆÍ
     257
     258Α͂
     259Α͂Í
     260
     261៌
     262ῊÍ
     263
    204264ῌ
     265ΉÍ
     266
     267Η͂
     268Η͂Í
     269
     270ῌ
     271Ϊ̀
     272Ϊ́
     273Ι͂
     274Ϊ͂
     275Ϋ̀
     276Ϋ́
     277Ρ̓
     278Υ͂
     279Ϋ͂
     280ῺÍ
     281
    205282ῌ
     283ΏÍ
     284
     285Ω͂
     286Ω͂Í
     287
     288á¿Œ
     289Ff
     290Fi
     291Fl
     292Ffi
     293Ffl
     294St
     295St
     296Մն
     297Մե
     298Մի
     299Վն
     300Մխ
    206301)__";
    207302
    208303        const static std::vector<codepoint_t> defined_cps = {
    209         0x004e, 0x0066, 0x0069, 0x006c, 0x0073, 0x0074, 0x0130, 0x02be,
    210         0x0300, 0x0301, 0x0308, 0x030a, 0x030c, 0x0313, 0x0331, 0x0342,
    211         0x0345, 0x0565, 0x056b, 0x056d, 0x0576, 0x0582, 0x1f88, 0x1f89,
    212         0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99,
    213         0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9,
    214         0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc,
    215         0x1ffc};
     304        0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96,
     305        0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56,
     306        0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87,
     307        0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
     308        0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97,
     309        0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
     310        0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7,
     311        0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
     312        0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2, 0x1fc3,
     313        0x1fc4, 0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7,
     314        0x1fe2, 0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4,
     315        0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04,
     316        0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
    216317        static StringOverridePropertyObject property_object(uc,
    217318                                                    SUC_ns::property_object,
    218                                                     overridden_set,
     319                                                    explicitly_defined_set,
    219320                                                    static_cast<const char *>(string_buffer),
    220321                                                    buffer_length,
     
    222323    }
    223324    namespace TC_ns {
    224         /** Code Point Ranges for tc overriding values from STC
    225         [0046, 0046], [0049, 0049], [004c, 004c], [004e, 004e],
    226         [0053, 0054], [0130, 0130], [02be, 02be], [0300, 0301],
    227         [0308, 0308], [030a, 030a], [030c, 030c], [0313, 0313],
    228         [0331, 0331], [0342, 0342], [0399, 0399], [0535, 0535],
    229         [053b, 053b], [053d, 053d], [0546, 0546], [0552, 0552]**/
    230 
    231         const UnicodeSet overridden_set
    232                     {{{Empty, 2}, {Mixed, 1}, {Empty, 6}, {Mixed, 1}, {Empty, 11},
    233               {Mixed, 1}, {Empty, 2}, {Mixed, 3}, {Empty, 1}, {Mixed, 1},
    234               {Empty, 12}, {Mixed, 2}, {Empty, 34773}},
    235              {0x00185240, 0x00010000, 0x40000000, 0x00081503, 0x00020000,
    236               0x00000004, 0x02000000, 0x28200000, 0x00040040}};
    237 
    238         const unsigned buffer_length = 100;
    239         const static char __attribute__ ((aligned (32))) string_buffer[256] = u8R"__(FF
     325        /** Code Point Ranges for tc (possibly overriding values from STC)
     326        [00df, 00df], [0130, 0130], [0149, 0149], [01f0, 01f0],
     327        [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
     328        [1f50, 1f50], [1f52, 1f52], [1f54, 1f54], [1f56, 1f56],
     329        [1f80, 1faf], [1fb2, 1fb4], [1fb6, 1fb7], [1fbc, 1fbc],
     330        [1fc2, 1fc4], [1fc6, 1fc7], [1fcc, 1fcc], [1fd2, 1fd3],
     331        [1fd6, 1fd7], [1fe2, 1fe4], [1fe6, 1fe7], [1ff2, 1ff4],
     332        [1ff6, 1ff7], [1ffc, 1ffc], [fb00, fb06], [fb13, fb17]**/
     333
     334        const UnicodeSet explicitly_defined_set
     335                    {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
     336              {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
     337              {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
     338              {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1},
     339              {Empty, 32807}},
     340             {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
     341              0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x10dcffff,
     342              0x00cc10dc, 0x10dc00dc, 0x00f8007f}};
     343
     344        const unsigned buffer_length = 571;
     345        const static char __attribute__ ((aligned (32))) string_buffer[768] = u8R"__(SS
     346Ä°
     347ÊŒN
     348J̌
     349Ϊ́
     350Ϋ́
     351ԵՒ
     352H̱
     353T̈
     354W̊
     355Y̊
     356AÊŸ
     357Υ̓
     358Υ̓̀
     359Υ̓́
     360Υ̓͂
     361ገΙ
     362ጉΙ
     363ጊΙ
     364ጋΙ
     365ጌΙ
     366ግΙ
     367ጎΙ
     368ጏΙ
     369ገΙ
     370ጉΙ
     371ጊΙ
     372ጋΙ
     373ጌΙ
     374ግΙ
     375ጎΙ
     376ጏΙ
     377ጚΙ
     378ጩΙ
     379ጪΙ
     380ጫΙ
     381ጬΙ
     382ጭΙ
     383ጮΙ
     384ጯΙ
     385ጚΙ
     386ጩΙ
     387ጪΙ
     388ጫΙ
     389ጬΙ
     390ጭΙ
     391ጮΙ
     392ጯΙ
     393᜚Ι
     394ᜩΙ
     395ᜪΙ
     396ᜫΙ
     397ᜬΙ
     398ᜭΙ
     399ᜮΙ
     400ᜯΙ
     401᜚Ι
     402ᜩΙ
     403ᜪΙ
     404ᜫΙ
     405ᜬΙ
     406ᜭΙ
     407ᜮΙ
     408ᜯΙ
     409៺Ι
     410ΑΙ
     411ΆΙ
     412Α͂
     413Α͂Ι
     414ΑΙ
     415ῊΙ
     416ΗΙ
     417ΉΙ
     418Η͂
     419Η͂Ι
     420ΗΙ
     421Ϊ̀
     422Ϊ́
     423Ι͂
     424Ϊ͂
     425Ϋ̀
     426Ϋ́
     427Ρ̓
     428Υ͂
     429Ϋ͂
     430ῺΙ
     431ΩΙ
     432ΏΙ
     433Ω͂
     434Ω͂Ι
     435ΩΙ
     436FF
     437FI
     438FL
    240439FFI
    241440FFL
    242 ÊŒN
    243 SS
    244441ST
    245 Ä°
    246 AÊŸ
    247 Î¥ÌˆÌ€
    248 Î¥ÌˆÌ
    249 T̈
    250 Y̊
    251 J̌
    252 Î¡Ì“
    253 H̱
    254 Î©Í‚
    255 Î©Í‚Ι
     442ST
     443ՄՆ
    256444ՄԵ
    257445ՄԻ
     446ՎՆ
    258447ՄԜ
    259 ÕŽÕ†
    260 ÔµÕ’
    261448)__";
    262449
    263450        const static std::vector<codepoint_t> defined_cps = {
    264         0x0046, 0x0049, 0x004c, 0x004e, 0x0053, 0x0054, 0x0130, 0x02be,
    265         0x0300, 0x0301, 0x0308, 0x030a, 0x030c, 0x0313, 0x0331, 0x0342,
    266         0x0399, 0x0535, 0x053b, 0x053d, 0x0546, 0x0552};
     451        0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96,
     452        0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56,
     453        0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87,
     454        0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
     455        0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97,
     456        0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
     457        0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7,
     458        0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
     459        0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2, 0x1fc3,
     460        0x1fc4, 0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7,
     461        0x1fe2, 0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4,
     462        0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04,
     463        0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
    267464        static StringOverridePropertyObject property_object(tc,
    268465                                                    STC_ns::property_object,
    269                                                     overridden_set,
     466                                                    explicitly_defined_set,
    270467                                                    static_cast<const char *>(string_buffer),
    271468                                                    buffer_length,
Note: See TracChangeset for help on using the changeset viewer.