Changeset 5748 for icGREP/icgrepdevel/icgrep/UCD/CaseFolding.h
 Timestamp:
 Nov 28, 2017, 1:45:19 AM (21 months ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

icGREP/icgrepdevel/icgrep/UCD/CaseFolding.h
r5673 r5748 12 12 #include "PropertyObjects.h" 13 13 #include "PropertyValueAliases.h" 14 #include "re/re_cc.h"15 14 #include "unicode_set.h" 16 15 #include <vector> 17 16 18 typedef unsigned codepoint_t;19 20 17 struct FoldEntry { 21 re::codepoint_t range_lo;22 int fold_offset;23 std::vector<re::interval_t> fold_pairs;18 const UCD::codepoint_t range_lo; 19 const int fold_offset; 20 const std::vector<UCD::interval_t> fold_pairs; 24 21 }; 25 22 26 27 void caseInsensitiveInsertRange(re::CC * cc, const re::codepoint_t lo, const re::codepoint_t hi); 28 29 inline void caseInsensitiveInsert(re::CC * cc, const re::codepoint_t cp) { 23 void caseInsensitiveInsertRange(UCD::UnicodeSet * const cc, const UCD::codepoint_t lo, const UCD::codepoint_t hi); 24 25 inline void caseInsensitiveInsert(UCD::UnicodeSet * const cc, const UCD::codepoint_t cp) { 30 26 caseInsensitiveInsertRange(cc, cp, cp); 31 27 } … … 312 308 /** Code Point Ranges for scf mapping to <none> 313 309 **/ 314 315 const UnicodeSet null_codepoint_set 316 {{{Empty, 34816}}, 317 {}}; 310 311 312 namespace { 313 const static UnicodeSet::run_t __null_codepoint_set_runs[] = { 314 {Empty, 34816}}; 315 const static UnicodeSet::bitquad_t * const __null_codepoint_set_quads = nullptr; 316 } 317 318 const static UnicodeSet null_codepoint_set{const_cast<UnicodeSet::run_t *>(__null_codepoint_set_runs), 1, 0, const_cast<UnicodeSet::bitquad_t *>(__null_codepoint_set_quads), 0, 0}; 319 320 318 321 319 322 /** Code Point Ranges for scf mapping to <codepoint> … … 468 471 [a7b7, ab6f], [abc0, ff20], [ff3b, 103ff], [10428, 104af], 469 472 [104d4, 10c7f], [10cb3, 1189f], [118c0, 1e8ff], [1e922, 10ffff]**/ 470 const UnicodeSet reflexive_set 471 {{{Full, 2}, {Mixed, 1}, {Full, 2}, {Mixed, 2}, {Full, 1}, 472 {Mixed, 11}, {Full, 7}, {Mixed, 6}, {Empty, 1}, {Mixed, 1}, 473 {Full, 1}, {Mixed, 8}, {Full, 90}, {Empty, 1}, {Mixed, 1}, 474 {Full, 24}, {Mixed, 1}, {Full, 68}, {Mixed, 1}, {Full, 11}, 475 {Mixed, 16}, {Full, 9}, {Mixed, 1}, {Full, 1}, {Mixed, 2}, 476 {Full, 24}, {Mixed, 2}, {Full, 57}, {Empty, 1}, {Mixed, 1}, 477 {Full, 1}, {Mixed, 5}, {Full, 970}, {Mixed, 3}, {Full, 4}, 478 {Mixed, 5}, {Full, 29}, {Mixed, 1}, {Empty, 2}, {Full, 667}, 479 {Mixed, 1}, {Full, 38}, {Empty, 1}, {Mixed, 1}, {Full, 3}, 480 {Mixed, 2}, {Full, 61}, {Empty, 1}, {Mixed, 1}, {Full, 95}, 481 {Empty, 1}, {Full, 1666}, {Empty, 1}, {Mixed, 1}, 482 {Full, 30902}}, 483 {0xf8000001, 0xffdfffff, 0x80800000, 0xaaaaaaaa, 0x55abaaaa, 484 0xaaaaab55, 0x54aaaaaa, 0x4e243129, 0xee512d2a, 0xb555524f, 485 0xaa29aaaa, 0xaaaaaaaa, 0x93faaaaa, 0xffffaa85, 0xffffffdf, 486 0x7fbaffff, 0x000128bf, 0xfffff004, 0xaa9c7ffb, 0x194caaaa, 487 0xffff0000, 0xaaaaaaaa, 0xaaaaabfe, 0xaaaaaaaa, 0xaaaad554, 488 0xaaaaaaaa, 0xaaaaaaaa, 0x0001aaaa, 0xff800000, 0xffffdf40, 489 0xc0ffffff, 0xfffffe00, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 490 0xaaaaaaaa, 0xb7eaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 491 0xc0ff00ff, 0x00ff00ff, 0x55ffc0ff, 0xffff00ff, 0x00ff00ff, 492 0xa0ff00ff, 0xf0ffe0ff, 0xe0ffe0ff, 0xfffbf3bf, 0xffff0000, 493 0xfffffff7, 0x003fffff, 0xffff0000, 0xffff8000, 0x3fda1562, 494 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xfffbd7fa, 0xaaaaaaaa, 495 0xffffeaaa, 0xfaaaaaaa, 0xaaabaaab, 0xaaaaaaaa, 0x95ffaaaa, 496 0xaabad7aa, 0xffa082aa, 0x0000ffff, 0xf8000001, 0xffffff00, 497 0x0000ffff, 0xfff00000, 0xfff80000, 0xfffffffc}}; 473 474 475 namespace { 476 const static UnicodeSet::run_t __reflexive_set_runs[] = { 477 {Full, 2}, {Mixed, 1}, {Full, 2}, {Mixed, 2}, {Full, 1}, 478 {Mixed, 11}, {Full, 7}, {Mixed, 6}, {Empty, 1}, {Mixed, 1}, 479 {Full, 1}, {Mixed, 8}, {Full, 90}, {Empty, 1}, {Mixed, 1}, 480 {Full, 24}, {Mixed, 1}, {Full, 68}, {Mixed, 1}, {Full, 11}, 481 {Mixed, 16}, {Full, 9}, {Mixed, 1}, {Full, 1}, {Mixed, 2}, 482 {Full, 24}, {Mixed, 2}, {Full, 57}, {Empty, 1}, {Mixed, 1}, 483 {Full, 1}, {Mixed, 5}, {Full, 970}, {Mixed, 3}, {Full, 4}, 484 {Mixed, 5}, {Full, 29}, {Mixed, 1}, {Empty, 2}, {Full, 667}, 485 {Mixed, 1}, {Full, 38}, {Empty, 1}, {Mixed, 1}, {Full, 3}, 486 {Mixed, 2}, {Full, 61}, {Empty, 1}, {Mixed, 1}, {Full, 95}, 487 {Empty, 1}, {Full, 1666}, {Empty, 1}, {Mixed, 1}, {Full, 30902}}; 488 const static UnicodeSet::bitquad_t __reflexive_set_quads[] = { 489 0xf8000001, 0xffdfffff, 0x80800000, 0xaaaaaaaa, 0x55abaaaa, 490 0xaaaaab55, 0x54aaaaaa, 0x4e243129, 0xee512d2a, 0xb555524f, 491 0xaa29aaaa, 0xaaaaaaaa, 0x93faaaaa, 0xffffaa85, 0xffffffdf, 492 0x7fbaffff, 0x000128bf, 0xfffff004, 0xaa9c7ffb, 0x194caaaa, 493 0xffff0000, 0xaaaaaaaa, 0xaaaaabfe, 0xaaaaaaaa, 0xaaaad554, 494 0xaaaaaaaa, 0xaaaaaaaa, 0x0001aaaa, 0xff800000, 0xffffdf40, 495 0xc0ffffff, 0xfffffe00, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 496 0xaaaaaaaa, 0xb7eaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 497 0xc0ff00ff, 0x00ff00ff, 0x55ffc0ff, 0xffff00ff, 0x00ff00ff, 498 0xa0ff00ff, 0xf0ffe0ff, 0xe0ffe0ff, 0xfffbf3bf, 0xffff0000, 499 0xfffffff7, 0x003fffff, 0xffff0000, 0xffff8000, 0x3fda1562, 500 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xfffbd7fa, 0xaaaaaaaa, 501 0xffffeaaa, 0xfaaaaaaa, 0xaaabaaab, 0xaaaaaaaa, 0x95ffaaaa, 502 0xaabad7aa, 0xffa082aa, 0x0000ffff, 0xf8000001, 0xffffff00, 503 0x0000ffff, 0xfff00000, 0xfff80000, 0xfffffffc}; 504 } 505 506 const static UnicodeSet reflexive_set{const_cast<UnicodeSet::run_t *>(__reflexive_set_runs), 55, 0, const_cast<UnicodeSet::bitquad_t *>(__reflexive_set_quads), 74, 0}; 507 508 498 509 499 510 const unsigned buffer_length = 4958; 500 const static char __attribute__ ((aligned (32))) string_buffer[5120]= u8R"__(a511 const static char string_buffer[5120] LLVM_ALIGNAS(32) = u8R"__(a 501 512 b 502 513 c … … 1869 1880 )__"; 1870 1881 1871 const static std::vector<codepoint_t> defined_cps ={1882 const static std::vector<codepoint_t> defined_cps{ 1872 1883 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 1873 1884 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, … … 2040 2051 0x1e91b, 0x1e91c, 0x1e91d, 0x1e91e, 0x1e91f, 0x1e920, 0x1e921}; 2041 2052 static StringPropertyObject property_object(scf, 2042 null_codepoint_set,2043 reflexive_set,2053 std::move(null_codepoint_set), 2054 std::move(reflexive_set), 2044 2055 static_cast<const char *>(string_buffer), 2045 2056 buffer_length, 2046 defined_cps);2057 std::move(defined_cps)); 2047 2058 } 2048 2059 namespace CF_ns { … … 2057 2068 [fb13, fb17]**/ 2058 2069 2059 const UnicodeSet explicitly_defined_set 2060 {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4}, 2061 {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1}, 2062 {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1}, 2063 {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1}, 2064 {Empty, 32807}}, 2065 {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000, 2066 0x00010000, 0x00000080, 0x47c00000, 0x00550000, 0x10dcffff, 2067 0x00cc10dc, 0x10dc00dc, 0x00f8007f}}; 2070 2071 namespace { 2072 const static UnicodeSet::run_t __explicitly_defined_set_runs[] = { 2073 {Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4}, 2074 {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1}, 2075 {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1}, 2076 {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1}, {Empty, 32807}}; 2077 const static UnicodeSet::bitquad_t __explicitly_defined_set_quads[] = { 2078 0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000, 2079 0x00010000, 0x00000080, 0x47c00000, 0x00550000, 0x10dcffff, 2080 0x00cc10dc, 0x10dc00dc, 0x00f8007f}; 2081 } 2082 2083 const static UnicodeSet explicitly_defined_set{const_cast<UnicodeSet::run_t *>(__explicitly_defined_set_runs), 20, 0, const_cast<UnicodeSet::bitquad_t *>(__explicitly_defined_set_quads), 13, 0}; 2084 2085 2068 2086 2069 2087 const unsigned buffer_length = 575; 2070 const static char __attribute__ ((aligned (32))) string_buffer[768]= u8R"__(ss2088 const static char string_buffer[768] LLVM_ALIGNAS(32) = u8R"__(ss 2071 2089 iÌ 2072 2090 ÊŒn … … 2185 2203 )__"; 2186 2204 2187 const static std::vector<codepoint_t> defined_cps ={2205 const static std::vector<codepoint_t> defined_cps{ 2188 2206 0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96, 2189 2207 0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1e9e, 0x1f50, 0x1f52, 0x1f54, … … 2201 2219 static StringOverridePropertyObject property_object(cf, 2202 2220 SCF_ns::property_object, 2203 explicitly_defined_set,2221 std::move(explicitly_defined_set), 2204 2222 static_cast<const char *>(string_buffer), 2205 2223 buffer_length, 2206 defined_cps);2224 std::move(defined_cps)); 2207 2225 } 2208 2226 }
Note: See TracChangeset
for help on using the changeset viewer.