Ignore:
Timestamp:
Oct 6, 2017, 11:36:55 AM (22 months ago)
Author:
cameron
Message:

StringOverride? properties (simple case conversion vs full case conversion)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/icgrep/UCD/SpecialCasing.h

    r5670 r5672  
    1212#include "PropertyObjects.h"
    1313#include "PropertyValueAliases.h"
     14#include "UnicodeData.h"
    1415#include "unicode_set.h"
    1516
    1617namespace UCD {
    1718    namespace LC_ns {
    18         /** Code Point Ranges for lc mapping to <none>
    19         **/
    20 
    21         const UnicodeSet null_codepoint_set
    22                     {{{Empty, 34816}},
    23              {}};
    24 
    25         /** Code Point Ranges for lc mapping to <codepoint>
    26         [0000, 012f], [0131, 1f87], [1f90, 1f97], [1fa0, 1fa7],
    27         [1fb0, 1fbb], [1fbd, 1fcb], [1fcd, 1ffb], [1ffd, 10ffff]**/
    28         const UnicodeSet reflexive_set
    29                     {{{Full, 9}, {Mixed, 1}, {Full, 242}, {Mixed, 4}, {Full, 34560}},
    30              {0xfffeffff, 0x00ff00ff, 0xefff00ff, 0xffffefff, 0xefffffff}};
    31 
    32         const unsigned buffer_length = 112;
    33         const static char __attribute__ ((aligned (32))) string_buffer[256] = u8R"__(i̇
     19        /** Code Point Ranges for lc overriding values from SLC
     20        [00df, 00df], [0149, 0149], [01f0, 01f0], [0307, 0307],
     21        [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
     22        [1f50, 1f50], [1f52, 1f52], [1f54, 1f54], [1f56, 1f56],
     23        [1f80, 1f87], [1f90, 1f97], [1fa0, 1fa7], [1fb2, 1fb4],
     24        [1fb6, 1fb7], [1fc2, 1fc4], [1fc6, 1fc7], [1fd2, 1fd3],
     25        [1fd6, 1fd7], [1fe2, 1fe4], [1fe6, 1fe7], [1ff2, 1ff4],
     26        [1ff6, 1ff7], [fb00, fb06], [fb13, fb17]**/
     27
     28        const UnicodeSet overridden_set
     29                    {{{Empty, 6}, {Mixed, 1}, {Empty, 3}, {Mixed, 1}, {Empty, 4},
     30              {Mixed, 1}, {Empty, 8}, {Mixed, 1}, {Empty, 3}, {Mixed, 2},
     31              {Empty, 14}, {Mixed, 1}, {Empty, 199}, {Mixed, 1}, {Empty, 5},
     32              {Mixed, 1}, {Empty, 1}, {Mixed, 4}, {Empty, 1752}, {Mixed, 1},
     33              {Empty, 32807}},
     34             {0x80000000, 0x00000200, 0x00010000, 0x00000080, 0x00010000,
     35              0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x00ff00ff,
     36              0x00dc00ff, 0x00cc00dc, 0x00dc00dc, 0x00f8007f}};
     37
     38        const unsigned buffer_length = 298;
     39        const static char __attribute__ ((aligned (32))) string_buffer[512] = u8R"__(ß
     40ʼn
     41Ç°
     42i̇
     43ΐ
     44ΰ
     45և
     46ẖ
     47ẗ
     48ẘ
     49ẙ
     50ẚ
     51ᜐ
     52ᜒ
     53᜔
     54᜖
    3455ៀ
    3556េ
     
    5778៊
    5879៧
     80៲
    5981៳
     82៎
     83៶
     84៷
     85ῂ
    6086ῃ
     87ῄ
     88ῆ
     89ῇ
     90ῒ
     91ΐ
     92ῖ
     93ῗ
     94á¿¢
     95á¿£
     96á¿€
     97á¿Š
     98ῧ
     99ῲ
    61100ῳ
     101á¿Ž
     102ῶ
     103á¿·
     104ff
     105fi
     106fl
     107ffi
     108ffl
     109ï¬
     110
     111st
     112ﬓ
     113ﬔ
     114ﬕ
     115ﬖ
     116ﬗ
    62117)__";
    63118
    64119        const static std::vector<codepoint_t> defined_cps = {
    65         0x0130, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e,
    66         0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e,
    67         0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae,
    68         0x1faf, 0x1fbc, 0x1fcc, 0x1ffc};
    69         static StringPropertyObject property_object(lc,
    70                                                     null_codepoint_set,
    71                                                     reflexive_set,
     120        0x00df, 0x0149, 0x01f0, 0x0307, 0x0390, 0x03b0, 0x0587, 0x1e96,
     121        0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56,
     122        0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87,
     123        0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97,
     124        0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7,
     125        0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fc2, 0x1fc3, 0x1fc4,
     126        0x1fc6, 0x1fc7, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7, 0x1fe2, 0x1fe3,
     127        0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4, 0x1ff6, 0x1ff7,
     128        0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04, 0xfb05, 0xfb06, 0xfb13,
     129        0xfb14, 0xfb15, 0xfb16, 0xfb17};
     130        static StringOverridePropertyObject property_object(lc,
     131                                                    SLC_ns::property_object,
     132                                                    overridden_set,
    72133                                                    static_cast<const char *>(string_buffer),
    73134                                                    buffer_length,
     
    75136    }
    76137    namespace UC_ns {
    77         /** Code Point Ranges for uc mapping to <none>
    78         **/
    79 
    80         const UnicodeSet null_codepoint_set
    81                     {{{Empty, 34816}},
    82              {}};
    83 
    84         /** Code Point Ranges for uc mapping to <codepoint>
    85         [0000, 00de], [00e0, 0148], [014a, 01ef], [01f1, 038f],
    86         [0391, 03af], [03b1, 0586], [0588, 1e95], [1e9b, 1f4f],
    87         [1f51, 1f51], [1f53, 1f53], [1f55, 1f55], [1f57, 1f7f],
    88         [1f88, 1f8f], [1f98, 1f9f], [1fa8, 1fb1], [1fb5, 1fb5],
    89         [1fb8, 1fc1], [1fc5, 1fc5], [1fc8, 1fd1], [1fd4, 1fd5],
    90         [1fd8, 1fe1], [1fe5, 1fe5], [1fe8, 1ff1], [1ff5, 1ff5],
    91         [1ff8, faff], [fb07, fb12], [fb18, 10ffff]**/
    92         const UnicodeSet reflexive_set
    93                     {{{Full, 6}, {Mixed, 1}, {Full, 3}, {Mixed, 1}, {Full, 4},
    94               {Mixed, 1}, {Full, 12}, {Mixed, 2}, {Full, 14}, {Mixed, 1},
    95               {Full, 199}, {Mixed, 1}, {Full, 5}, {Mixed, 1}, {Full, 1},
    96               {Mixed, 4}, {Full, 1752}, {Mixed, 1}, {Full, 32807}},
    97              {0x7fffffff, 0xfffffdff, 0xfffeffff, 0xfffeffff, 0xfffeffff,
    98               0xffffff7f, 0xf83fffff, 0xffaaffff, 0xff00ff00, 0xff23ff00,
    99               0xff33ff23, 0xff23ff23, 0xff07ff80}};
    100 
    101         const unsigned buffer_length = 358;
    102         const static char __attribute__ ((aligned (32))) string_buffer[512] = u8R"__(Ss
    103 ÊŒN
     138        /** Code Point Ranges for uc overriding values from SUC
     139        [004e, 004e], [0066, 0066], [0069, 0069], [006c, 006c],
     140        [0073, 0074], [0130, 0130], [02be, 02be], [0300, 0301],
     141        [0308, 0308], [030a, 030a], [030c, 030c], [0313, 0313],
     142        [0331, 0331], [0342, 0342], [0345, 0345], [0565, 0565],
     143        [056b, 056b], [056d, 056d], [0576, 0576], [0582, 0582],
     144        [1f88, 1f8f], [1f98, 1f9f], [1fa8, 1faf], [1fbc, 1fbc],
     145        [1fcc, 1fcc], [1ffc, 1ffc]**/
     146
     147        const UnicodeSet overridden_set
     148                    {{{Empty, 2}, {Mixed, 2}, {Empty, 5}, {Mixed, 1}, {Empty, 11},
     149              {Mixed, 1}, {Empty, 2}, {Mixed, 3}, {Empty, 16}, {Mixed, 2},
     150              {Empty, 207}, {Mixed, 4}, {Empty, 34560}},
     151             {0x00004000, 0x00181240, 0x00010000, 0x40000000, 0x00081503,
     152              0x00020000, 0x00000024, 0x00402820, 0x00000004, 0xff00ff00,
     153              0x1000ff00, 0x00001000, 0x10000000}};
     154
     155        const unsigned buffer_length = 208;
     156        const static char __attribute__ ((aligned (32))) string_buffer[256] = u8R"__(ÊŒN
     157Ff
     158Ffi
     159Ffl
     160Ss
     161St
     162Ä°
     163AÊŸ
     164Ϋ̀
     165Ϋ́
     166T̈
     167Y̊
    104168J̌
    105 Î™ÌˆÌ
    106 Î¥ÌˆÌ
     169Ρ̓
     170H̱
     171Ω͂
     172Ω͂Í
     173
     174Մե
     175Մի
     176Մխ
     177Վն
    107178Եւ
    108 H̱
    109 T̈
    110 W̊
    111 Y̊
    112 AÊŸ
    113 Î¥Ì“
    114 Î¥Ì“Ì€
    115 Î¥Ì“́
    116 Î¥Ì“Í‚
    117179ៈ
    118180៉
     
    139201៮
    140202៯
    141 áŸºÍ
    142 
    143203៌
    144 Î†Í
    145 
    146 Î‘Í‚
    147 Î‘Í‚Í
    148 
    149 á¿ŠÍ
    150 
    151204ῌ
    152 Î‰Í
    153 
    154 Î—Í‚
    155 Î—Í‚Í
    156 
    157 Î™ÌˆÌ€
    158 Î™ÌˆÌ
    159 Î™Í‚
    160 Î™ÌˆÍ‚
    161 Î¥ÌˆÌ€
    162 Î¥ÌˆÌ
    163 Î¡Ì“
    164 Î¥Í‚
    165 Î¥ÌˆÍ‚
    166 á¿ºÍ
    167 
    168205ῌ
    169 ÎÍ
    170 
    171 Î©Í‚
    172 Î©Í‚Í
    173 
    174 Ff
    175 Fi
    176 Fl
    177 Ffi
    178 Ffl
    179 St
    180 St
    181 Õ„Õ¶
    182 Õ„Õ¥
    183 Õ„Õ«
    184 ÕŽÕ¶
    185 Õ„Õ­
    186206)__";
    187207
    188208        const static std::vector<codepoint_t> defined_cps = {
    189         0x00df, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96, 0x1e97,
    190         0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56, 0x1f80,
    191         0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87, 0x1f90,
    192         0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97, 0x1fa0,
    193         0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7, 0x1fb2,
    194         0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fc2, 0x1fc3, 0x1fc4, 0x1fc6,
    195         0x1fc7, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7, 0x1fe2, 0x1fe3, 0x1fe4,
    196         0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4, 0x1ff6, 0x1ff7, 0xfb00,
    197         0xfb01, 0xfb02, 0xfb03, 0xfb04, 0xfb05, 0xfb06, 0xfb13, 0xfb14,
    198         0xfb15, 0xfb16, 0xfb17};
    199         static StringPropertyObject property_object(uc,
    200                                                     null_codepoint_set,
    201                                                     reflexive_set,
     209        0x004e, 0x0066, 0x0069, 0x006c, 0x0073, 0x0074, 0x0130, 0x02be,
     210        0x0300, 0x0301, 0x0308, 0x030a, 0x030c, 0x0313, 0x0331, 0x0342,
     211        0x0345, 0x0565, 0x056b, 0x056d, 0x0576, 0x0582, 0x1f88, 0x1f89,
     212        0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99,
     213        0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9,
     214        0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc,
     215        0x1ffc};
     216        static StringOverridePropertyObject property_object(uc,
     217                                                    SUC_ns::property_object,
     218                                                    overridden_set,
    202219                                                    static_cast<const char *>(string_buffer),
    203220                                                    buffer_length,
     
    205222    }
    206223    namespace TC_ns {
    207         /** Code Point Ranges for tc mapping to <none>
    208         **/
    209 
    210         const UnicodeSet null_codepoint_set
    211                     {{{Empty, 34816}},
    212              {}};
    213 
    214         /** Code Point Ranges for tc mapping to <codepoint>
    215         [0000, 00de], [00e0, 0148], [014a, 01ef], [01f1, 038f],
    216         [0391, 03af], [03b1, 0586], [0588, 1e95], [1e9b, 1f4f],
    217         [1f51, 1f51], [1f53, 1f53], [1f55, 1f55], [1f57, 1f7f],
    218         [1fb0, 1fb1], [1fb5, 1fb5], [1fb8, 1fbb], [1fbd, 1fc1],
    219         [1fc5, 1fc5], [1fc8, 1fcb], [1fcd, 1fd1], [1fd4, 1fd5],
    220         [1fd8, 1fe1], [1fe5, 1fe5], [1fe8, 1ff1], [1ff5, 1ff5],
    221         [1ff8, 1ffb], [1ffd, faff], [fb07, fb12], [fb18, 10ffff]**/
    222         const UnicodeSet reflexive_set
    223                     {{{Full, 6}, {Mixed, 1}, {Full, 3}, {Mixed, 1}, {Full, 4},
    224               {Mixed, 1}, {Full, 12}, {Mixed, 2}, {Full, 14}, {Mixed, 1},
    225               {Full, 199}, {Mixed, 1}, {Full, 5}, {Mixed, 1}, {Full, 1},
    226               {Empty, 1}, {Mixed, 3}, {Full, 1752}, {Mixed, 1},
    227               {Full, 32807}},
    228              {0x7fffffff, 0xfffffdff, 0xfffeffff, 0xfffeffff, 0xfffeffff,
    229               0xffffff7f, 0xf83fffff, 0xffaaffff, 0xef230000, 0xff33ef23,
    230               0xef23ff23, 0xff07ff80}};
    231 
    232         const unsigned buffer_length = 568;
    233         const static char __attribute__ ((aligned (32))) string_buffer[768] = u8R"__(SS
     224        /** Code Point Ranges for tc overriding values from STC
     225        [0046, 0046], [0049, 0049], [004c, 004c], [004e, 004e],
     226        [0053, 0054], [0130, 0130], [02be, 02be], [0300, 0301],
     227        [0308, 0308], [030a, 030a], [030c, 030c], [0313, 0313],
     228        [0331, 0331], [0342, 0342], [0399, 0399], [0535, 0535],
     229        [053b, 053b], [053d, 053d], [0546, 0546], [0552, 0552]**/
     230
     231        const UnicodeSet overridden_set
     232                    {{{Empty, 2}, {Mixed, 1}, {Empty, 6}, {Mixed, 1}, {Empty, 11},
     233              {Mixed, 1}, {Empty, 2}, {Mixed, 3}, {Empty, 1}, {Mixed, 1},
     234              {Empty, 12}, {Mixed, 2}, {Empty, 34773}},
     235             {0x00185240, 0x00010000, 0x40000000, 0x00081503, 0x00020000,
     236              0x00000004, 0x02000000, 0x28200000, 0x00040040}};
     237
     238        const unsigned buffer_length = 100;
     239        const static char __attribute__ ((aligned (32))) string_buffer[256] = u8R"__(FF
     240FFI
     241FFL
    234242ÊŒN
    235 J̌
    236 Î™ÌˆÌ
    237 Î¥ÌˆÌ
    238 ÔµÕ’
    239 H̱
    240 T̈
    241 W̊
    242 Y̊
     243SS
     244ST
     245Ä°
    243246AÊŸ
    244 Î¥Ì“
    245 Î¥Ì“Ì€
    246 Î¥Ì“́
    247 Î¥Ì“Í‚
    248 áŒˆÎ™
    249 áŒ‰Î™
    250 áŒŠÎ™
    251 áŒ‹Î™
    252 áŒŒÎ™
    253 áŒÎ™
    254 áŒŽÎ™
    255 áŒÎ™
    256 áŒˆÎ™
    257 áŒ‰Î™
    258 áŒŠÎ™
    259 áŒ‹Î™
    260 áŒŒÎ™
    261 áŒÎ™
    262 áŒŽÎ™
    263 áŒÎ™
    264 áŒšÎ™
    265 áŒ©Î™
    266 áŒªÎ™
    267 áŒ«Î™
    268 áŒ¬Î™
    269 áŒ­Î™
    270 áŒ®Î™
    271 áŒ¯Î™
    272 áŒšÎ™
    273 áŒ©Î™
    274 áŒªÎ™
    275 áŒ«Î™
    276 áŒ¬Î™
    277 áŒ­Î™
    278 áŒ®Î™
    279 áŒ¯Î™
    280 áœšÎ™
    281 áœ©Î™
    282 áœªÎ™
    283 áœ«Î™
    284 áœ¬Î™
    285 áœ­Î™
    286 áœ®Î™
    287 áœ¯Î™
    288 áœšÎ™
    289 áœ©Î™
    290 áœªÎ™
    291 áœ«Î™
    292 áœ¬Î™
    293 áœ­Î™
    294 áœ®Î™
    295 áœ¯Î™
    296 áŸºÎ™
    297 Î‘Ι
    298 Î†Î™
    299 Î‘Í‚
    300 Î‘͂Ι
    301 Î‘Ι
    302 á¿ŠÎ™
    303 Î—Ι
    304 Î‰Î™
    305 Î—Í‚
    306 Î—͂Ι
    307 Î—Ι
    308 Î™ÌˆÌ€
    309 Î™ÌˆÌ
    310 Î™Í‚
    311 Î™ÌˆÍ‚
    312247Ϋ̀
    313248Ϋ́
     249T̈
     250Y̊
     251J̌
    314252Ρ̓
    315 Î¥Í‚
    316 Î¥ÌˆÍ‚
    317 á¿ºÎ™
    318 Î©Î™
    319 ÎÎ™
     253H̱
    320254Ω͂
    321255Ω͂Ι
    322 Î©Î™
    323 FF
    324 FI
    325 FL
    326 FFI
    327 FFL
    328 ST
    329 ST
    330 Õ„Õ†
    331256ՄԵ
    332257ՄԻ
     258Õ„Ôœ
    333259ՎՆ
    334 Õ„Ôœ
     260ԵՒ
    335261)__";
    336262
    337263        const static std::vector<codepoint_t> defined_cps = {
    338         0x00df, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96, 0x1e97,
    339         0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56, 0x1f80,
    340         0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87, 0x1f88,
    341         0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f90,
    342         0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97, 0x1f98,
    343         0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa0,
    344         0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7, 0x1fa8,
    345         0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fb2,
    346         0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2, 0x1fc3, 0x1fc4,
    347         0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7, 0x1fe2,
    348         0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4, 0x1ff6,
    349         0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04, 0xfb05,
    350         0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
    351         static StringPropertyObject property_object(tc,
    352                                                     null_codepoint_set,
    353                                                     reflexive_set,
     264        0x0046, 0x0049, 0x004c, 0x004e, 0x0053, 0x0054, 0x0130, 0x02be,
     265        0x0300, 0x0301, 0x0308, 0x030a, 0x030c, 0x0313, 0x0331, 0x0342,
     266        0x0399, 0x0535, 0x053b, 0x053d, 0x0546, 0x0552};
     267        static StringOverridePropertyObject property_object(tc,
     268                                                    STC_ns::property_object,
     269                                                    overridden_set,
    354270                                                    static_cast<const char *>(string_buffer),
    355271                                                    buffer_length,
Note: See TracChangeset for help on using the changeset viewer.