Changeset 5673 for icGREP


Ignore:
Timestamp:
Oct 6, 2017, 1:22:53 PM (19 months ago)
Author:
cameron
Message:

Case folding property objects

Location:
icGREP/icgrep-devel
Files:
10 edited
2 moved

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/UCD_parser.py

    r5672 r5673  
    427427# There may be multiple entries per codepoint
    428428
    429 def parse_CaseFolding_txt():
     429def parse_CaseFolding_txt(property_object_map):
    430430    fold_map = {}
    431431    f = open(UCD_config.UCD_src_dir + "/" + 'CaseFolding.txt')
     
    438438        if fold_type == 'S' or fold_type == 'C':
    439439            # fold value is guaranteed to be a single codepoint
    440             fold_val = int(fold_val, 16)
     440            property_object_map['scf'].addDataRecord(cp, cp, fold_val)
    441441        else:
    442             fold_val = [int(x, 16) for x in fold_val.split(" ")]
     442            if fold_type == 'F':
     443                property_object_map['cf'].addDataRecord(cp, cp, fold_val)
    443444        fold_map[fold_type][cp] = fold_val
     445    property_object_map['scf'].finalizeProperty()
     446    property_object_map['cf'].finalizeProperty()
    444447    return fold_map
    445448
  • icGREP/icgrep-devel/UCD-scripts/UCD_properties.py

    r5672 r5673  
    8787def emit_string_override_property(f, property_code, overridden_code, override_set, cp_value_map):
    8888    s = string.Template(r"""    namespace ${prop_enum_up}_ns {
    89         /** Code Point Ranges for ${prop_enum} overriding values from ${overridden}
     89        /** Code Point Ranges for ${prop_enum} (possibly overriding values from ${overridden})
    9090        ${overridden_set_ranges}**/
    9191
    92         const UnicodeSet overridden_set
     92        const UnicodeSet explicitly_defined_set
    9393        ${overridden_set_value};
    9494
     
    100100        static StringOverridePropertyObject property_object(${prop_enum},
    101101                                                    ${overridden}_ns::property_object,
    102                                                     overridden_set,
     102                                                    explicitly_defined_set,
    103103                                                    static_cast<const char *>(string_buffer),
    104104                                                    buffer_length,
     
    195195""")
    196196    f.write(s.substitute(prop_enum = property_code, prop_enum_up = property_code.upper()))
     197
     198
     199def simple_CaseClosure_map(fold_data):
     200   simpleFoldMap = {}
     201   for k in fold_data['S'].keys(): simpleFoldMap[k] = int(fold_data['S'][k], 16)
     202   for k in fold_data['C'].keys(): simpleFoldMap[k] = int(fold_data['C'][k], 16)
     203   cl_map = {}
     204   for k in simpleFoldMap.keys():
     205      v = simpleFoldMap[k]
     206      if not v in cl_map: cl_map[v] = [k]
     207      else: cl_map[v].append(k)
     208      if not k in cl_map: cl_map[k] = [v]
     209      else: cl_map[k].append(v)
     210   newEntries = True
     211   while newEntries:
     212      newEntries = False
     213      for k in cl_map.keys():
     214         vlist = cl_map[k]
     215         for v in vlist:
     216            for w in cl_map[v]:
     217               if k != w and not k in cl_map[w]:
     218                  cl_map[w].append(k)
     219                  newEntries = True
     220   return cl_map
     221
     222#
     223# Simple case fold map.     
     224# The simple case fold map is an ordered list of fold entries each of
     225# the form (lo_codepoint, hicodepoint, offset).  Each entry describes
     226# the case fold that applies for the consecutive entries in the given
     227# codepoint range, according to the following equations. 
     228# casefold(x) = x + offset, if ((x - low_codepoint) div offset) mod 2 = 0
     229#             = x - offset, if ((x - low_codepoint) div offset) mod 2 = 1
     230#
     231#
     232def caseFoldRangeMap(casemap):
     233   foldable = sorted(casemap.keys())
     234   entries = []
     235   cp = foldable[0]
     236   open_entries = [(cp, f - cp) for f in casemap[cp]]
     237   last_cp = cp
     238   for cp in foldable[1:]:
     239      if cp != last_cp + 1:
     240         # Close the pending range entries
     241         for (cp0, offset) in open_entries:
     242            entries.append((cp0, last_cp, offset))
     243         open_entries = [(cp, f - cp) for f in casemap[cp]]
     244      else:
     245         new_open = []
     246         projected = []
     247         for (cp0, offset) in open_entries:
     248            even_odd_offset_group = int(abs(cp - cp0)/ abs(offset)) & 1
     249            if even_odd_offset_group == 0:
     250               projected_foldcp = cp + offset
     251            else: projected_foldcp = cp - offset
     252            if not projected_foldcp in casemap[cp]:
     253               entries.append((cp0, last_cp, offset))
     254            else:
     255               new_open.append((cp0, offset))
     256               projected.append(projected_foldcp)
     257         open_entries = new_open
     258         for f in casemap[cp]:
     259            if not f in projected:
     260               open_entries.append((cp, f-cp))
     261      last_cp = cp
     262   # Close the final entries.
     263   for (cp0, offset) in open_entries:
     264      entries.append((cp0, last_cp, offset))
     265   return entries
     266
     267
     268
     269def genFoldEntryData(casemap):
     270   rMap = caseFoldRangeMap(casemap)
     271   individuals = [(m[0],m[0]+m[2]) for m in rMap if m[0] == m[1]]
     272   ranges = [m for m in rMap if m[0] != m[1]]
     273   last_hi = -1
     274   generated = "const FoldEntry foldTable[foldTableSize] = {\n"
     275   foldTableSize = 0
     276   for (lo, hi, offset) in ranges:
     277      if lo != last_hi + 1:
     278         pairs = ["{0x%x, 0x%x}" % (m[0], m[1]) for m in individuals if m[0]>last_hi and m[0]< lo]
     279         generated += "  {0x%x, 0, {" % (last_hi + 1) + cformat.multiline_fill(pairs) + "}},\n"
     280         foldTableSize += 1
     281      last_hi = hi
     282      pairs = ["{0x%x, 0x%x}" % (m[0], m[1]) for m in individuals if m[0]>=lo and m[0]<= hi]
     283      generated += "  {0x%x, %i, {" % (lo, offset) + cformat.multiline_fill(pairs) + "}},\n"
     284      foldTableSize += 1
     285   if last_hi != 0x10FFFF:
     286      pairs = ["{0x%x, 0x%x}" % (m[0], m[1]) for m in individuals if m[0]>last_hi]
     287      generated += "  {0x%x, 0, {" % (last_hi + 1) + cformat.multiline_fill(pairs) + "}},\n"
     288      foldTableSize += 1
     289   generated += "  {0x110000, 0, {}}};"
     290   foldTableSize += 1
     291   generated = "\nconst int foldTableSize = %s;\n\n" % foldTableSize  + generated
     292   return generated
     293
     294foldDeclarations = r"""
     295typedef unsigned codepoint_t;
     296
     297struct FoldEntry {
     298    re::codepoint_t range_lo;
     299    int fold_offset;
     300    std::vector<re::interval_t> fold_pairs;
     301};
     302
     303
     304void caseInsensitiveInsertRange(re::CC * cc, const re::codepoint_t lo, const re::codepoint_t hi);
     305
     306inline void caseInsensitiveInsert(re::CC * cc, const re::codepoint_t cp) {
     307    caseInsensitiveInsertRange(cc, cp, cp);
     308}
     309"""
    197310
    198311
     
    410523
    411524
     525    def genCaseFolding_h(self):
     526        basename = 'CaseFolding'
     527        fold_data = parse_CaseFolding_txt(self.property_object_map)
     528        cm = simple_CaseClosure_map(fold_data)
     529        f = cformat.open_header_file_for_write(basename, 'casefold.py')
     530        cformat.write_imports(f, ['"PropertyAliases.h"', '"PropertyObjects.h"', '"PropertyValueAliases.h"', '"unicode_set.h"', "<vector>", '"re/re_cc.h"'])
     531        f.write(foldDeclarations)
     532        f.write(genFoldEntryData(cm))
     533        f.write("\nnamespace UCD {\n")
     534        self.emit_property(f, 'scf')
     535        self.emit_property(f, 'cf')
     536        f.write("}\n")
     537        cformat.close_header_file(f)
     538        self.supported_props.append(['scf', 'cf'])
     539        self.property_data_headers.append(basename)
     540
     541
    412542
    413543def UCD_main():
     
    427557   
    428558    ucd.generate_SpecialCasing_h()
     559   
     560    ucd.genCaseFolding_h()
    429561   
    430562    ucd.generate_multicolumn_properties_file('NameAliases', ['Name_Alias', 'Alias_Kind'])
  • icGREP/icgrep-devel/UCD-scripts/UCD_property_objects.py

    r5672 r5673  
    249249        else:
    250250            raise Exception("Expecting codepoint string, but got " + stringValue)
    251         self.cp_value_map[cp] = stringValue
     251        for cp in range(cp_lo, cp_hi+1): self.cp_value_map[cp] = stringValue
    252252
    253253    def finalizeProperty(self):
  • icGREP/icgrep-devel/UCD-scripts/casefold.py

    r5672 r5673  
    1313import UCD_config
    1414from unicode_set import *
    15 from UCD_parser import parse_CaseFolding_txt
     15from UCD_parser import parse_PropertyAlias_txt, parse_CaseFolding_txt
    1616
    1717def simple_CaseFolding_BitSets(fold_map):
     
    4040def simple_CaseClosure_map(fold_data):
    4141   simpleFoldMap = {}
    42    for k in fold_data['S'].keys(): simpleFoldMap[k] = fold_data['S'][k]
    43    for k in fold_data['C'].keys(): simpleFoldMap[k] = fold_data['C'][k]
     42   for k in fold_data['S'].keys(): simpleFoldMap[k] = int(fold_data['S'][k], 16)
     43   for k in fold_data['C'].keys(): simpleFoldMap[k] = int(fold_data['C'][k], 16)
    4444   cl_map = {}
    4545   for k in simpleFoldMap.keys():
     
    151151
    152152def genCaseFolding_txt_h():
    153    fold_data = parse_CaseFolding_txt()
    154    cm = simple_CaseClosure_map(fold_data)
    155    f = cformat.open_header_file_for_write('CaseFolding_txt', 'casefold.py')
    156    cformat.write_imports(f, ["<vector>", '"re/re_cc.h"'])
    157    f.write(foldDeclarations)
    158    f.write(genFoldEntryData(cm))
    159    cformat.close_header_file(f)
     153    (property_enum_name_list, property_object_map) = parse_PropertyAlias_txt()
     154    fold_data = parse_CaseFolding_txt(property_object_map)
     155    cm = simple_CaseClosure_map(fold_data)
     156    f = cformat.open_header_file_for_write('CaseFolding_txt', 'casefold.py')
     157    cformat.write_imports(f, ["<vector>", '"re/re_cc.h"'])
     158    f.write(foldDeclarations)
     159    f.write(genFoldEntryData(cm))
     160    #emit_property(f, 'scf')
     161    #emit_property(f, 'cf')
     162    cformat.close_header_file(f)
    160163
    161164if __name__ == "__main__":
  • icGREP/icgrep-devel/icgrep/CMakeLists.txt

    r5646 r5673  
    8888add_library(RegExpADT re/re_re.cpp re/re_cc.cpp re/re_rep.cpp re/re_diff.cpp re/re_intersect.cpp re/printer_re.cpp)
    8989add_library(RegExpCompiler re/re_parser.cpp re/re_memoizer.cpp re/re_nullable.cpp re/re_simplifier.cpp re/re_star_normal.cpp re/re_minimizer.cpp re/re_local.cpp re/re_compiler.cpp re/re_analysis.cpp re/re_toolchain.cpp re/re_name_resolve.cpp re/re_name_gather.cpp re/re_collect_unicodesets.cpp re/re_multiplex.cpp re/re_parser_pcre.cpp re/re_parser_ere.cpp re/re_parser_bre.cpp re/re_parser_prosite.cpp re/re_utility.cpp)
    90 add_library(CCADT cc/cc_compiler.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/CaseFolding_txt.cpp cc/alphabet.cpp cc/multiplex_CCs.cpp)
     90add_library(CCADT cc/cc_compiler.cpp utf8_encoder.cpp utf16_encoder.cpp UCD/CaseFolding.cpp cc/alphabet.cpp cc/multiplex_CCs.cpp)
    9191add_library(UCDlib UCD/unicode_set.cpp UCD/ucd_compiler.cpp UCD/PropertyObjects.cpp UCD/resolve_properties.cpp UCD/UnicodeNameData.cpp)
    9292
  • icGREP/icgrep-devel/icgrep/UCD/CaseFolding.cpp

    r5672 r5673  
    66 */
    77
    8 #include "CaseFolding_txt.h"
     8#include "CaseFolding.h"
    99#include <algorithm>
    1010
  • icGREP/icgrep-devel/icgrep/UCD/CaseFolding.h

    r5672 r5673  
    1 #ifndef CASEFOLDING_TXT_H
    2 #define CASEFOLDING_TXT_H
     1#ifndef CASEFOLDING_H
     2#define CASEFOLDING_H
    33/*
    44 *  Copyright (c) 2017 International Characters, Inc.
     
    99 */
    1010
     11#include "PropertyAliases.h"
     12#include "PropertyObjects.h"
     13#include "PropertyValueAliases.h"
    1114#include "re/re_cc.h"
     15#include "unicode_set.h"
    1216#include <vector>
    1317
     
    304308  {0x1e944, 0, {}},
    305309  {0x110000, 0, {}}};
     310namespace UCD {
     311    namespace SCF_ns {
     312        /** Code Point Ranges for scf mapping to <none>
     313        **/
     314
     315        const UnicodeSet null_codepoint_set
     316                    {{{Empty, 34816}},
     317             {}};
     318
     319        /** Code Point Ranges for scf mapping to <codepoint>
     320        [0000, 0040], [005b, 00b4], [00b6, 00bf], [00d7, 00d7],
     321        [00df, 00ff], [0101, 0101], [0103, 0103], [0105, 0105],
     322        [0107, 0107], [0109, 0109], [010b, 010b], [010d, 010d],
     323        [010f, 010f], [0111, 0111], [0113, 0113], [0115, 0115],
     324        [0117, 0117], [0119, 0119], [011b, 011b], [011d, 011d],
     325        [011f, 011f], [0121, 0121], [0123, 0123], [0125, 0125],
     326        [0127, 0127], [0129, 0129], [012b, 012b], [012d, 012d],
     327        [012f, 0131], [0133, 0133], [0135, 0135], [0137, 0138],
     328        [013a, 013a], [013c, 013c], [013e, 013e], [0140, 0140],
     329        [0142, 0142], [0144, 0144], [0146, 0146], [0148, 0149],
     330        [014b, 014b], [014d, 014d], [014f, 014f], [0151, 0151],
     331        [0153, 0153], [0155, 0155], [0157, 0157], [0159, 0159],
     332        [015b, 015b], [015d, 015d], [015f, 015f], [0161, 0161],
     333        [0163, 0163], [0165, 0165], [0167, 0167], [0169, 0169],
     334        [016b, 016b], [016d, 016d], [016f, 016f], [0171, 0171],
     335        [0173, 0173], [0175, 0175], [0177, 0177], [017a, 017a],
     336        [017c, 017c], [017e, 017e], [0180, 0180], [0183, 0183],
     337        [0185, 0185], [0188, 0188], [018c, 018d], [0192, 0192],
     338        [0195, 0195], [0199, 019b], [019e, 019e], [01a1, 01a1],
     339        [01a3, 01a3], [01a5, 01a5], [01a8, 01a8], [01aa, 01ab],
     340        [01ad, 01ad], [01b0, 01b0], [01b4, 01b4], [01b6, 01b6],
     341        [01b9, 01bb], [01bd, 01c3], [01c6, 01c6], [01c9, 01c9],
     342        [01cc, 01cc], [01ce, 01ce], [01d0, 01d0], [01d2, 01d2],
     343        [01d4, 01d4], [01d6, 01d6], [01d8, 01d8], [01da, 01da],
     344        [01dc, 01dd], [01df, 01df], [01e1, 01e1], [01e3, 01e3],
     345        [01e5, 01e5], [01e7, 01e7], [01e9, 01e9], [01eb, 01eb],
     346        [01ed, 01ed], [01ef, 01f0], [01f3, 01f3], [01f5, 01f5],
     347        [01f9, 01f9], [01fb, 01fb], [01fd, 01fd], [01ff, 01ff],
     348        [0201, 0201], [0203, 0203], [0205, 0205], [0207, 0207],
     349        [0209, 0209], [020b, 020b], [020d, 020d], [020f, 020f],
     350        [0211, 0211], [0213, 0213], [0215, 0215], [0217, 0217],
     351        [0219, 0219], [021b, 021b], [021d, 021d], [021f, 021f],
     352        [0221, 0221], [0223, 0223], [0225, 0225], [0227, 0227],
     353        [0229, 0229], [022b, 022b], [022d, 022d], [022f, 022f],
     354        [0231, 0231], [0233, 0239], [023c, 023c], [023f, 0240],
     355        [0242, 0242], [0247, 0247], [0249, 0249], [024b, 024b],
     356        [024d, 024d], [024f, 0344], [0346, 036f], [0371, 0371],
     357        [0373, 0375], [0377, 037e], [0380, 0385], [0387, 0387],
     358        [038b, 038b], [038d, 038d], [0390, 0390], [03a2, 03a2],
     359        [03ac, 03c1], [03c3, 03ce], [03d2, 03d4], [03d7, 03d7],
     360        [03d9, 03d9], [03db, 03db], [03dd, 03dd], [03df, 03df],
     361        [03e1, 03e1], [03e3, 03e3], [03e5, 03e5], [03e7, 03e7],
     362        [03e9, 03e9], [03eb, 03eb], [03ed, 03ed], [03ef, 03ef],
     363        [03f2, 03f3], [03f6, 03f6], [03f8, 03f8], [03fb, 03fc],
     364        [0430, 045f], [0461, 0461], [0463, 0463], [0465, 0465],
     365        [0467, 0467], [0469, 0469], [046b, 046b], [046d, 046d],
     366        [046f, 046f], [0471, 0471], [0473, 0473], [0475, 0475],
     367        [0477, 0477], [0479, 0479], [047b, 047b], [047d, 047d],
     368        [047f, 047f], [0481, 0489], [048b, 048b], [048d, 048d],
     369        [048f, 048f], [0491, 0491], [0493, 0493], [0495, 0495],
     370        [0497, 0497], [0499, 0499], [049b, 049b], [049d, 049d],
     371        [049f, 049f], [04a1, 04a1], [04a3, 04a3], [04a5, 04a5],
     372        [04a7, 04a7], [04a9, 04a9], [04ab, 04ab], [04ad, 04ad],
     373        [04af, 04af], [04b1, 04b1], [04b3, 04b3], [04b5, 04b5],
     374        [04b7, 04b7], [04b9, 04b9], [04bb, 04bb], [04bd, 04bd],
     375        [04bf, 04bf], [04c2, 04c2], [04c4, 04c4], [04c6, 04c6],
     376        [04c8, 04c8], [04ca, 04ca], [04cc, 04cc], [04ce, 04cf],
     377        [04d1, 04d1], [04d3, 04d3], [04d5, 04d5], [04d7, 04d7],
     378        [04d9, 04d9], [04db, 04db], [04dd, 04dd], [04df, 04df],
     379        [04e1, 04e1], [04e3, 04e3], [04e5, 04e5], [04e7, 04e7],
     380        [04e9, 04e9], [04eb, 04eb], [04ed, 04ed], [04ef, 04ef],
     381        [04f1, 04f1], [04f3, 04f3], [04f5, 04f5], [04f7, 04f7],
     382        [04f9, 04f9], [04fb, 04fb], [04fd, 04fd], [04ff, 04ff],
     383        [0501, 0501], [0503, 0503], [0505, 0505], [0507, 0507],
     384        [0509, 0509], [050b, 050b], [050d, 050d], [050f, 050f],
     385        [0511, 0511], [0513, 0513], [0515, 0515], [0517, 0517],
     386        [0519, 0519], [051b, 051b], [051d, 051d], [051f, 051f],
     387        [0521, 0521], [0523, 0523], [0525, 0525], [0527, 0527],
     388        [0529, 0529], [052b, 052b], [052d, 052d], [052f, 0530],
     389        [0557, 109f], [10c6, 10c6], [10c8, 10cc], [10ce, 13f7],
     390        [13fe, 1c7f], [1c89, 1dff], [1e01, 1e01], [1e03, 1e03],
     391        [1e05, 1e05], [1e07, 1e07], [1e09, 1e09], [1e0b, 1e0b],
     392        [1e0d, 1e0d], [1e0f, 1e0f], [1e11, 1e11], [1e13, 1e13],
     393        [1e15, 1e15], [1e17, 1e17], [1e19, 1e19], [1e1b, 1e1b],
     394        [1e1d, 1e1d], [1e1f, 1e1f], [1e21, 1e21], [1e23, 1e23],
     395        [1e25, 1e25], [1e27, 1e27], [1e29, 1e29], [1e2b, 1e2b],
     396        [1e2d, 1e2d], [1e2f, 1e2f], [1e31, 1e31], [1e33, 1e33],
     397        [1e35, 1e35], [1e37, 1e37], [1e39, 1e39], [1e3b, 1e3b],
     398        [1e3d, 1e3d], [1e3f, 1e3f], [1e41, 1e41], [1e43, 1e43],
     399        [1e45, 1e45], [1e47, 1e47], [1e49, 1e49], [1e4b, 1e4b],
     400        [1e4d, 1e4d], [1e4f, 1e4f], [1e51, 1e51], [1e53, 1e53],
     401        [1e55, 1e55], [1e57, 1e57], [1e59, 1e59], [1e5b, 1e5b],
     402        [1e5d, 1e5d], [1e5f, 1e5f], [1e61, 1e61], [1e63, 1e63],
     403        [1e65, 1e65], [1e67, 1e67], [1e69, 1e69], [1e6b, 1e6b],
     404        [1e6d, 1e6d], [1e6f, 1e6f], [1e71, 1e71], [1e73, 1e73],
     405        [1e75, 1e75], [1e77, 1e77], [1e79, 1e79], [1e7b, 1e7b],
     406        [1e7d, 1e7d], [1e7f, 1e7f], [1e81, 1e81], [1e83, 1e83],
     407        [1e85, 1e85], [1e87, 1e87], [1e89, 1e89], [1e8b, 1e8b],
     408        [1e8d, 1e8d], [1e8f, 1e8f], [1e91, 1e91], [1e93, 1e93],
     409        [1e95, 1e9a], [1e9c, 1e9d], [1e9f, 1e9f], [1ea1, 1ea1],
     410        [1ea3, 1ea3], [1ea5, 1ea5], [1ea7, 1ea7], [1ea9, 1ea9],
     411        [1eab, 1eab], [1ead, 1ead], [1eaf, 1eaf], [1eb1, 1eb1],
     412        [1eb3, 1eb3], [1eb5, 1eb5], [1eb7, 1eb7], [1eb9, 1eb9],
     413        [1ebb, 1ebb], [1ebd, 1ebd], [1ebf, 1ebf], [1ec1, 1ec1],
     414        [1ec3, 1ec3], [1ec5, 1ec5], [1ec7, 1ec7], [1ec9, 1ec9],
     415        [1ecb, 1ecb], [1ecd, 1ecd], [1ecf, 1ecf], [1ed1, 1ed1],
     416        [1ed3, 1ed3], [1ed5, 1ed5], [1ed7, 1ed7], [1ed9, 1ed9],
     417        [1edb, 1edb], [1edd, 1edd], [1edf, 1edf], [1ee1, 1ee1],
     418        [1ee3, 1ee3], [1ee5, 1ee5], [1ee7, 1ee7], [1ee9, 1ee9],
     419        [1eeb, 1eeb], [1eed, 1eed], [1eef, 1eef], [1ef1, 1ef1],
     420        [1ef3, 1ef3], [1ef5, 1ef5], [1ef7, 1ef7], [1ef9, 1ef9],
     421        [1efb, 1efb], [1efd, 1efd], [1eff, 1f07], [1f10, 1f17],
     422        [1f1e, 1f27], [1f30, 1f37], [1f40, 1f47], [1f4e, 1f58],
     423        [1f5a, 1f5a], [1f5c, 1f5c], [1f5e, 1f5e], [1f60, 1f67],
     424        [1f70, 1f87], [1f90, 1f97], [1fa0, 1fa7], [1fb0, 1fb7],
     425        [1fbd, 1fbd], [1fbf, 1fc7], [1fcd, 1fd7], [1fdc, 1fe7],
     426        [1fed, 1ff7], [1ffd, 2125], [2127, 2129], [212c, 2131],
     427        [2133, 215f], [2170, 2182], [2184, 24b5], [24d0, 2bff],
     428        [2c2f, 2c5f], [2c61, 2c61], [2c65, 2c66], [2c68, 2c68],
     429        [2c6a, 2c6a], [2c6c, 2c6c], [2c71, 2c71], [2c73, 2c74],
     430        [2c76, 2c7d], [2c81, 2c81], [2c83, 2c83], [2c85, 2c85],
     431        [2c87, 2c87], [2c89, 2c89], [2c8b, 2c8b], [2c8d, 2c8d],
     432        [2c8f, 2c8f], [2c91, 2c91], [2c93, 2c93], [2c95, 2c95],
     433        [2c97, 2c97], [2c99, 2c99], [2c9b, 2c9b], [2c9d, 2c9d],
     434        [2c9f, 2c9f], [2ca1, 2ca1], [2ca3, 2ca3], [2ca5, 2ca5],
     435        [2ca7, 2ca7], [2ca9, 2ca9], [2cab, 2cab], [2cad, 2cad],
     436        [2caf, 2caf], [2cb1, 2cb1], [2cb3, 2cb3], [2cb5, 2cb5],
     437        [2cb7, 2cb7], [2cb9, 2cb9], [2cbb, 2cbb], [2cbd, 2cbd],
     438        [2cbf, 2cbf], [2cc1, 2cc1], [2cc3, 2cc3], [2cc5, 2cc5],
     439        [2cc7, 2cc7], [2cc9, 2cc9], [2ccb, 2ccb], [2ccd, 2ccd],
     440        [2ccf, 2ccf], [2cd1, 2cd1], [2cd3, 2cd3], [2cd5, 2cd5],
     441        [2cd7, 2cd7], [2cd9, 2cd9], [2cdb, 2cdb], [2cdd, 2cdd],
     442        [2cdf, 2cdf], [2ce1, 2ce1], [2ce3, 2cea], [2cec, 2cec],
     443        [2cee, 2cf1], [2cf3, a63f], [a641, a641], [a643, a643],
     444        [a645, a645], [a647, a647], [a649, a649], [a64b, a64b],
     445        [a64d, a64d], [a64f, a64f], [a651, a651], [a653, a653],
     446        [a655, a655], [a657, a657], [a659, a659], [a65b, a65b],
     447        [a65d, a65d], [a65f, a65f], [a661, a661], [a663, a663],
     448        [a665, a665], [a667, a667], [a669, a669], [a66b, a66b],
     449        [a66d, a67f], [a681, a681], [a683, a683], [a685, a685],
     450        [a687, a687], [a689, a689], [a68b, a68b], [a68d, a68d],
     451        [a68f, a68f], [a691, a691], [a693, a693], [a695, a695],
     452        [a697, a697], [a699, a699], [a69b, a721], [a723, a723],
     453        [a725, a725], [a727, a727], [a729, a729], [a72b, a72b],
     454        [a72d, a72d], [a72f, a731], [a733, a733], [a735, a735],
     455        [a737, a737], [a739, a739], [a73b, a73b], [a73d, a73d],
     456        [a73f, a73f], [a741, a741], [a743, a743], [a745, a745],
     457        [a747, a747], [a749, a749], [a74b, a74b], [a74d, a74d],
     458        [a74f, a74f], [a751, a751], [a753, a753], [a755, a755],
     459        [a757, a757], [a759, a759], [a75b, a75b], [a75d, a75d],
     460        [a75f, a75f], [a761, a761], [a763, a763], [a765, a765],
     461        [a767, a767], [a769, a769], [a76b, a76b], [a76d, a76d],
     462        [a76f, a778], [a77a, a77a], [a77c, a77c], [a77f, a77f],
     463        [a781, a781], [a783, a783], [a785, a785], [a787, a78a],
     464        [a78c, a78c], [a78e, a78f], [a791, a791], [a793, a795],
     465        [a797, a797], [a799, a799], [a79b, a79b], [a79d, a79d],
     466        [a79f, a79f], [a7a1, a7a1], [a7a3, a7a3], [a7a5, a7a5],
     467        [a7a7, a7a7], [a7a9, a7a9], [a7af, a7af], [a7b5, a7b5],
     468        [a7b7, ab6f], [abc0, ff20], [ff3b, 103ff], [10428, 104af],
     469        [104d4, 10c7f], [10cb3, 1189f], [118c0, 1e8ff], [1e922, 10ffff]**/
     470        const UnicodeSet reflexive_set
     471                    {{{Full, 2}, {Mixed, 1}, {Full, 2}, {Mixed, 2}, {Full, 1},
     472              {Mixed, 11}, {Full, 7}, {Mixed, 6}, {Empty, 1}, {Mixed, 1},
     473              {Full, 1}, {Mixed, 8}, {Full, 90}, {Empty, 1}, {Mixed, 1},
     474              {Full, 24}, {Mixed, 1}, {Full, 68}, {Mixed, 1}, {Full, 11},
     475              {Mixed, 16}, {Full, 9}, {Mixed, 1}, {Full, 1}, {Mixed, 2},
     476              {Full, 24}, {Mixed, 2}, {Full, 57}, {Empty, 1}, {Mixed, 1},
     477              {Full, 1}, {Mixed, 5}, {Full, 970}, {Mixed, 3}, {Full, 4},
     478              {Mixed, 5}, {Full, 29}, {Mixed, 1}, {Empty, 2}, {Full, 667},
     479              {Mixed, 1}, {Full, 38}, {Empty, 1}, {Mixed, 1}, {Full, 3},
     480              {Mixed, 2}, {Full, 61}, {Empty, 1}, {Mixed, 1}, {Full, 95},
     481              {Empty, 1}, {Full, 1666}, {Empty, 1}, {Mixed, 1},
     482              {Full, 30902}},
     483             {0xf8000001, 0xffdfffff, 0x80800000, 0xaaaaaaaa, 0x55abaaaa,
     484              0xaaaaab55, 0x54aaaaaa, 0x4e243129, 0xee512d2a, 0xb555524f,
     485              0xaa29aaaa, 0xaaaaaaaa, 0x93faaaaa, 0xffffaa85, 0xffffffdf,
     486              0x7fbaffff, 0x000128bf, 0xfffff004, 0xaa9c7ffb, 0x194caaaa,
     487              0xffff0000, 0xaaaaaaaa, 0xaaaaabfe, 0xaaaaaaaa, 0xaaaad554,
     488              0xaaaaaaaa, 0xaaaaaaaa, 0x0001aaaa, 0xff800000, 0xffffdf40,
     489              0xc0ffffff, 0xfffffe00, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
     490              0xaaaaaaaa, 0xb7eaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
     491              0xc0ff00ff, 0x00ff00ff, 0x55ffc0ff, 0xffff00ff, 0x00ff00ff,
     492              0xa0ff00ff, 0xf0ffe0ff, 0xe0ffe0ff, 0xfffbf3bf, 0xffff0000,
     493              0xfffffff7, 0x003fffff, 0xffff0000, 0xffff8000, 0x3fda1562,
     494              0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xfffbd7fa, 0xaaaaaaaa,
     495              0xffffeaaa, 0xfaaaaaaa, 0xaaabaaab, 0xaaaaaaaa, 0x95ffaaaa,
     496              0xaabad7aa, 0xffa082aa, 0x0000ffff, 0xf8000001, 0xffffff00,
     497              0x0000ffff, 0xfff00000, 0xfff80000, 0xfffffffc}};
     498
     499        const unsigned buffer_length = 4958;
     500        const static char __attribute__ ((aligned (32))) string_buffer[5120] = u8R"__(a
     501b
     502c
     503d
     504e
     505f
     506g
     507h
     508i
     509j
     510k
     511l
     512m
     513n
     514o
     515p
     516q
     517r
     518s
     519t
     520u
     521v
     522w
     523x
     524y
     525z
     526ÎŒ
     527à
     528á
     529â
     530ã
     531À
     532Ã¥
     533Ê
     534ç
     535Ú
     536é
     537ê
     538ë
     539ì
     540í
     541î
     542ï
     543ð
     544ñ
     545ò
     546ó
     547ÃŽ
     548õ
     549ö
     550Þ
     551ù
     552ú
     553û
     554ÃŒ
     555Ãœ
     556ß
     557ā
     558ă
     559Ä
     560
     561ć
     562ĉ
     563ċ
     564č
     565ď
     566đ
     567ē
     568ĕ
     569ė
     570ę
     571ě
     572ĝ
     573ğ
     574Ä¡
     575Ä£
     576Ä¥
     577ħ
     578Ä©
     579Ä«
     580Ä­
     581į
     582ij
     583ĵ
     584Ä·
     585ĺ
     586ÄŒ
     587ÄŸ
     588ŀ
     589ł
     590ń
     591ņ
     592ň
     593ŋ
     594ō
     595ŏ
     596ő
     597œ
     598ŕ
     599ŗ
     600ř
     601ś
     602ŝ
     603ş
     604Å¡
     605Å£
     606Å¥
     607ŧ
     608Å©
     609Å«
     610Å­
     611ů
     612ű
     613ų
     614ŵ
     615Å·
     616ÿ
     617ź
     618ÅŒ
     619ÅŸ
     620s
     621ɓ
     622ƃ
     623Æ
     624
     625ɔ
     626ƈ
     627ɖ
     628ɗ
     629ƌ
     630ǝ
     631ə
     632ɛ
     633ƒ
     634É 
     635É£
     636É©
     637Éš
     638ƙ
     639ɯ
     640ɲ
     641ɵ
     642Æ¡
     643Æ£
     644Æ¥
     645ʀ
     646Æš
     647ʃ
     648Æ­
     649ʈ
     650Æ°
     651ʊ
     652ʋ
     653ÆŽ
     654ƶ
     655ʒ
     656ƹ
     657Æœ
     658dž
     659dž
     660lj
     661lj
     662nj
     663nj
     664ǎ
     665ǐ
     666ǒ
     667ǔ
     668ǖ
     669ǘ
     670ǚ
     671ǜ
     672ǟ
     673Ç¡
     674Ç£
     675Ç¥
     676ǧ
     677Ç©
     678Ç«
     679Ç­
     680ǯ
     681dz
     682dz
     683ǵ
     684ƕ
     685Æ¿
     686ǹ
     687Ç»
     688Çœ
     689Ç¿
     690ȁ
     691ȃ
     692È
     693
     694ȇ
     695ȉ
     696ȋ
     697ȍ
     698ȏ
     699ȑ
     700ȓ
     701ȕ
     702ȗ
     703ș
     704ț
     705ȝ
     706ȟ
     707ƞ
     708È£
     709È¥
     710ȧ
     711È©
     712È«
     713È­
     714ȯ
     715ȱ
     716ȳ
     717â±¥
     718ÈŒ
     719ƚ
     720ⱊ
     721ɂ
     722ƀ
     723ʉ
     724ʌ
     725ɇ
     726ɉ
     727ɋ
     728ɍ
     729ɏ
     730ι
     731ͱ
     732ͳ
     733Í·
     734ϳ
     735ά
     736έ
     737ή
     738ί
     739ό
     740ύ
     741ώ
     742α
     743β
     744γ
     745ÎŽ
     746ε
     747ζ
     748η
     749Ξ
     750ι
     751κ
     752λ
     753ÎŒ
     754Îœ
     755Ο
     756ο
     757π
     758ρ
     759σ
     760τ
     761Ï
     762
     763φ
     764χ
     765ψ
     766ω
     767ϊ
     768ϋ
     769σ
     770ϗ
     771β
     772Ξ
     773φ
     774π
     775ϙ
     776ϛ
     777ϝ
     778ϟ
     779Ï¡
     780Ï£
     781Ï¥
     782ϧ
     783Ï©
     784Ï«
     785Ï­
     786ϯ
     787κ
     788ρ
     789Ξ
     790ε
     791Ïž
     792ϲ
     793Ï»
     794Í»
     795ÍŒ
     796Íœ
     797ѐ
     798ё
     799ђ
     800ѓ
     801є
     802ѕ
     803і
     804ї
     805ј
     806љ
     807њ
     808ћ
     809ќ
     810ѝ
     811ў
     812џ
     813а
     814б
     815в
     816г
     817ÐŽ
     818е
     819ж
     820з
     821О
     822й
     823к
     824л
     825ÐŒ
     826Ðœ
     827П
     828п
     829р
     830с
     831т
     832у
     833ф
     834Ñ
     835
     836ц
     837ч
     838ш
     839щ
     840ъ
     841ы
     842ь
     843э
     844ю
     845я
     846Ñ¡
     847Ñ£
     848Ñ¥
     849ѧ
     850Ñ©
     851Ñ«
     852Ñ­
     853ѯ
     854ѱ
     855ѳ
     856ѵ
     857Ñ·
     858ѹ
     859Ñ»
     860Ñœ
     861Ñ¿
     862ҁ
     863ҋ
     864ҍ
     865ҏ
     866ґ
     867ғ
     868ҕ
     869җ
     870ҙ
     871қ
     872ҝ
     873ҟ
     874Ò¡
     875Ò£
     876Ò¥
     877Ò§
     878Ò©
     879Ò«
     880Ò­
     881Ò¯
     882Ò±
     883Ò³
     884Òµ
     885Ò·
     886Ò¹
     887Ò»
     888Òœ
     889Ò¿
     890ӏ
     891ӂ
     892ӄ
     893ӆ
     894ӈ
     895ӊ
     896ӌ
     897ӎ
     898ӑ
     899ӓ
     900ӕ
     901ӗ
     902ә
     903ӛ
     904ӝ
     905ӟ
     906Ó¡
     907Ó£
     908Ó¥
     909Ó§
     910Ó©
     911Ó«
     912Ó­
     913Ó¯
     914Ó±
     915Ó³
     916Óµ
     917Ó·
     918Ó¹
     919Ó»
     920Óœ
     921Ó¿
     922ԁ
     923ԃ
     924Ô
     925
     926ԇ
     927ԉ
     928ԋ
     929ԍ
     930ԏ
     931ԑ
     932ԓ
     933ԕ
     934ԗ
     935ԙ
     936ԛ
     937ԝ
     938ԟ
     939Ô¡
     940Ô£
     941Ô¥
     942Ô§
     943Ô©
     944Ô«
     945Ô­
     946Ô¯
     947Õ¡
     948Õ¢
     949Õ£
     950Õ€
     951Õ¥
     952ÕŠ
     953Õ§
     954Õš
     955Õ©
     956Õª
     957Õ«
     958Õ¬
     959Õ­
     960Õ®
     961Õ¯
     962Õ°
     963Õ±
     964Õ²
     965Õ³
     966ÕŽ
     967Õµ
     968Õ¶
     969Õ·
     970Õž
     971Õ¹
     972Õº
     973Õ»
     974ÕŒ
     975Õœ
     976ÕŸ
     977Õ¿
     978ր
     979ց
     980ւ
     981փ
     982ք
     983Ö
     984
     985ֆ
     986⮀
     987⎁
     988⮂
     989⎃
     990⮄
     991âŽ
     992
     993⮆
     994⮇
     995⎈
     996⮉
     997⮊
     998⮋
     999⮌
     1000⎍
     1001⮎
     1002⎏
     1003⎐
     1004⮑
     1005⮒
     1006⮓
     1007⮔
     1008⮕
     1009⮖
     1010⮗
     1011⎘
     1012⮙
     1013⮚
     1014⮛
     1015⮜
     1016⎝
     1017⮞
     1018⮟
     1019⎠
     1020⎡
     1021⎢
     1022⎣
     1023⎀
     1024⎥
     1025⎧
     1026⎭
     1027Ᏸ
     1028Ᏹ
     1029Ᏺ
     1030Ᏻ
     1031Ꮞ
     1032Ᏽ
     1033в
     1034ÐŽ
     1035П
     1036с
     1037т
     1038т
     1039ъ
     1040Ñ£
     1041ꙋ
     1042ខ
     1043ឃ
     1044áž
     1045
     1046ᾇ
     1047ᾉ
     1048ᾋ
     1049ឍ
     1050ត
     1051ᾑ
     1052ᾓ
     1053ᾕ
     1054ᾗ
     1055ᾙ
     1056ᾛ
     1057ឝ
     1058ᾟ
     1059áž¡
     1060ឣ
     1061ឥ
     1062ឧ
     1063áž©
     1064áž«
     1065áž­
     1066ឯ
     1067áž±
     1068áž³
     1069ážµ
     1070áž·
     1071áž¹
     1072áž»
     1073ážœ
     1074áž¿
     1075ṁ
     1076ṃ
     1077á¹
     1078
     1079ṇ
     1080ṉ
     1081ṋ
     1082ṍ
     1083ṏ
     1084ṑ
     1085ṓ
     1086ṕ
     1087ṗ
     1088ṙ
     1089ṛ
     1090ṝ
     1091ṟ
     1092ṡ
     1093á¹£
     1094á¹¥
     1095ṧ
     1096ṩ
     1097ṫ
     1098á¹­
     1099ṯ
     1100á¹±
     1101á¹³
     1102á¹µ
     1103á¹·
     1104á¹¹
     1105á¹»
     1106Ṝ
     1107ṿ
     1108ẁ
     1109ẃ
     1110áº
     1111
     1112ẇ
     1113ẉ
     1114ẋ
     1115ẍ
     1116ẏ
     1117ẑ
     1118ẓ
     1119ẕ
     1120ṡ
     1121ß
     1122ạ
     1123ả
     1124ấ
     1125ầ
     1126ẩ
     1127ẫ
     1128ậ
     1129ắ
     1130ằ
     1131ẳ
     1132ẵ
     1133ặ
     1134ẹ
     1135ẻ
     1136ẜ
     1137ế
     1138ề
     1139ể
     1140á»
     1141
     1142ệ
     1143ỉ
     1144ị
     1145ọ
     1146ỏ
     1147ố
     1148ồ
     1149ổ
     1150ỗ
     1151ộ
     1152ớ
     1153ờ
     1154ở
     1155ỡ
     1156ợ
     1157ụ
     1158ủ
     1159ứ
     1160ừ
     1161á»­
     1162ữ
     1163á»±
     1164ỳ
     1165ỵ
     1166á»·
     1167ỹ
     1168á»»
     1169Ờ
     1170ỿ
     1171ጀ
     1172ጁ
     1173ጂ
     1174ጃ
     1175ጄ
     1176áŒ
     1177
     1178ጆ
     1179ጇ
     1180ጐ
     1181጑
     1182ጒ
     1183ጓ
     1184ጔ
     1185ጕ
     1186ጠ
     1187ጡ
     1188ጢ
     1189ጣ
     1190ጀ
     1191ጥ
     1192ጊ
     1193ጧ
     1194ጰ
     1195ጱ
     1196ጲ
     1197ጳ
     1198ጎ
     1199ጵ
     1200ጶ
     1201ጷ
     1202ᜀ
     1203ᜁ
     1204ᜂ
     1205ᜃ
     1206ᜄ
     1207áœ
     1208
     1209ᜑ
     1210ᜓ
     1211᜕
     1212᜗
     1213ᜠ
     1214ᜡ
     1215ᜢ
     1216ᜣ
     1217ᜀ
     1218ᜥ
     1219ᜊ
     1220ᜧ
     1221ៀ
     1222េ
     1223ែ
     1224ៃ
     1225ោ
     1226áŸ
     1227
     1228ំ
     1229ះ
     1230័
     1231៑
     1232្
     1233៓
     1234។
     1235៕
     1236៖
     1237ៗ
     1238០
     1239១
     1240២
     1241៣
     1242ៀ
     1243៥
     1244៊
     1245៧
     1246៰
     1247៱
     1248ᜰ
     1249ᜱ
     1250៳
     1251ι
     1252ᜲ
     1253ᜳ
     1254ᜎ
     1255᜵
     1256ῃ
     1257ῐ
     1258ῑ
     1259᜶
     1260᜷
     1261á¿ 
     1262á¿¡
     1263᜺
     1264᜻
     1265á¿¥
     1266᜞
     1267᜹
     1268ᜌ
     1269᜜
     1270ῳ
     1271ω
     1272k
     1273Ã¥
     1274â
     1275Ž
     1276â
     1277°
     1278â
     1279±
     1280â
     1281²
     1282â
     1283³
     1284â
     1285Ž
     1286â
     1287µ
     1288â
     1289
     1290â
     1291·
     1292â
     1293ž
     1294â
     1295¹
     1296â
     1297º
     1298â
     1299»
     1300â
     1301Œ
     1302â
     1303œ
     1304â
     1305Ÿ
     1306â
     1307¿
     1308ↄ
     1309ⓐ
     1310ⓑ
     1311ⓒ
     1312ⓓ
     1313ⓔ
     1314ⓕ
     1315ⓖ
     1316ⓗ
     1317ⓘ
     1318ⓙ
     1319ⓚ
     1320ⓛ
     1321ⓜ
     1322ⓝ
     1323ⓞ
     1324ⓟ
     1325ⓠ
     1326ⓡ
     1327ⓢ
     1328ⓣ
     1329â“€
     1330ⓥ
     1331ⓩ
     1332ⓧ
     1333⓹
     1334ⓩ
     1335â°°
     1336â°±
     1337â°²
     1338â°³
     1339â°Ž
     1340â°µ
     1341â°¶
     1342â°·
     1343â°ž
     1344â°¹
     1345â°º
     1346â°»
     1347â°Œ
     1348â°œ
     1349â°Ÿ
     1350â°¿
     1351ⱀ
     1352ⱁ
     1353ⱂ
     1354ⱃ
     1355ⱄ
     1356â±
     1357
     1358ⱆ
     1359ⱇ
     1360ⱈ
     1361ⱉ
     1362ⱊ
     1363ⱋ
     1364ⱌ
     1365ⱍ
     1366ⱎ
     1367ⱏ
     1368ⱐ
     1369ⱑ
     1370ⱒ
     1371ⱓ
     1372ⱔ
     1373ⱕ
     1374ⱖ
     1375ⱗ
     1376ⱘ
     1377ⱙ
     1378ⱚ
     1379ⱛ
     1380ⱜ
     1381ⱝ
     1382ⱞ
     1383ⱡ
     1384É«
     1385ᵜ
     1386Éœ
     1387ⱚ
     1388ⱪ
     1389ⱬ
     1390ɑ
     1391ɱ
     1392ɐ
     1393ɒ
     1394â±³
     1395ⱶ
     1396È¿
     1397ɀ
     1398ⲁ
     1399ⲃ
     1400â²
     1401
     1402ⲇ
     1403ⲉ
     1404ⲋ
     1405ⲍ
     1406ⲏ
     1407ⲑ
     1408ⲓ
     1409ⲕ
     1410ⲗ
     1411ⲙ
     1412ⲛ
     1413ⲝ
     1414ⲟ
     1415ⲡ
     1416â²£
     1417â²¥
     1418ⲧ
     1419ⲩ
     1420ⲫ
     1421â²­
     1422ⲯ
     1423â²±
     1424â²³
     1425â²µ
     1426â²·
     1427â²¹
     1428â²»
     1429Ⲝ
     1430ⲿ
     1431ⳁ
     1432ⳃ
     1433â³
     1434
     1435ⳇ
     1436ⳉ
     1437ⳋ
     1438ⳍ
     1439ⳏ
     1440ⳑ
     1441ⳓ
     1442ⳕ
     1443ⳗ
     1444ⳙ
     1445ⳛ
     1446ⳝ
     1447ⳟ
     1448ⳡ
     1449â³£
     1450ⳬ
     1451â³®
     1452â³³
     1453ꙁ
     1454ꙃ
     1455ê™
     1456
     1457ꙇ
     1458ꙉ
     1459ꙋ
     1460ꙍ
     1461ꙏ
     1462ꙑ
     1463ꙓ
     1464ꙕ
     1465ꙗ
     1466ꙙ
     1467ꙛ
     1468ꙝ
     1469ꙟ
     1470ꙡ
     1471ꙣ
     1472ꙥ
     1473ꙧ
     1474ꙩ
     1475ꙫ
     1476ꙭ
     1477ꚁ
     1478ꚃ
     1479êš
     1480
     1481ꚇ
     1482ꚉ
     1483ꚋ
     1484ꚍ
     1485ꚏ
     1486ꚑ
     1487ꚓ
     1488ꚕ
     1489ꚗ
     1490ꚙ
     1491ꚛ
     1492ꜣ
     1493ꜥ
     1494ꜧ
     1495ꜩ
     1496ꜫ
     1497ꜭ
     1498ꜯ
     1499ꜳ
     1500ꜵ
     1501ꜷ
     1502ꜹ
     1503ꜻ
     1504ꜜ
     1505ꜿ
     1506ꝁ
     1507ꝃ
     1508ê
     1509
     1510ꝇ
     1511ꝉ
     1512ꝋ
     1513ꝍ
     1514ꝏ
     1515ꝑ
     1516ꝓ
     1517ꝕ
     1518ꝗ
     1519ꝙ
     1520ꝛ
     1521ꝝ
     1522ꝟ
     1523ꝡ
     1524ꝣ
     1525ꝥ
     1526ꝧ
     1527ꝩ
     1528ꝫ
     1529ꝭ
     1530ꝯ
     1531ꝺ
     1532Ꝍ
     1533áµ¹
     1534ꝿ
     1535ꞁ
     1536ꞃ
     1537êž
     1538
     1539ꞇ
     1540ꞌ
     1541É¥
     1542ꞑ
     1543ꞓ
     1544ꞗ
     1545ꞙ
     1546ꞛ
     1547ꞝ
     1548ꞟ
     1549ꞡ
     1550ꞣ
     1551ꞥ
     1552ꞧ
     1553ꞩ
     1554ÉŠ
     1555ɜ
     1556É¡
     1557ɬ
     1558ɪ
     1559ʞ
     1560ʇ
     1561ʝ
     1562ꭓ
     1563ꞵ
     1564ꞷ
     1565Ꭰ
     1566Ꭱ
     1567Ꭲ
     1568Ꭳ
     1569ᎀ
     1570Ꭵ
     1571ᎊ
     1572Ꭷ
     1573᎚
     1574Ꭹ
     1575Ꭺ
     1576Ꭻ
     1577Ꭼ
     1578Ꭽ
     1579Ꭾ
     1580Ꭿ
     1581Ꮀ
     1582Ꮁ
     1583Ꮂ
     1584Ꮃ
     1585Ꭾ
     1586Ꮅ
     1587Ꮆ
     1588Ꮇ
     1589Ꮎ
     1590Ꮉ
     1591Ꮊ
     1592Ꮋ
     1593ᎌ
     1594᎜
     1595᎟
     1596Ꮏ
     1597Ꮐ
     1598Ꮑ
     1599Ꮒ
     1600Ꮓ
     1601Ꮔ
     1602á
     1603
     1604Ꮖ
     1605Ꮗ
     1606Ꮘ
     1607Ꮙ
     1608Ꮚ
     1609Ꮛ
     1610Ꮜ
     1611Ꮝ
     1612Ꮞ
     1613Ꮟ
     1614Ꮠ
     1615Ꮡ
     1616Ꮢ
     1617Ꮣ
     1618Ꮤ
     1619Ꮥ
     1620Ꮦ
     1621Ꮧ
     1622Ꮨ
     1623Ꮩ
     1624Ꮪ
     1625Ꮫ
     1626Ꮬ
     1627Ꮭ
     1628Ꮮ
     1629Ꮯ
     1630Ꮰ
     1631Ꮱ
     1632Ꮲ
     1633Ꮳ
     1634Ꮐ
     1635Ꮵ
     1636Ꮚ
     1637Ꮷ
     1638Ꮪ
     1639Ꮹ
     1640Ꮺ
     1641Ꮻ
     1642Ꮼ
     1643Ꮽ
     1644Ꮾ
     1645Ꮿ
     1646
     1647
     1648
     1649
     1650ïœ
     1651
     1652
     1653
     1654
     1655
     1656
     1657
     1658
     1659
     1660
     1661
     1662
     1663
     1664
     1665
     1666
     1667
     1668
     1669
     1670
     1671
     1672
     1673𐐚
     1674𐐩
     1675𐐪
     1676𐐫
     1677𐐬
     1678𐐭
     1679𐐮
     1680𐐯
     1681𐐰
     1682𐐱
     1683𐐲
     1684𐐳
     1685𐐎
     1686𐐵
     1687𐐶
     1688𐐷
     1689𐐞
     1690𐐹
     1691𐐺
     1692𐐻
     1693𐐌
     1694𐐜
     1695𐐟
     1696𐐿
     1697𐑀
     1698𐑁
     1699𐑂
     1700𐑃
     1701𐑄
     1702ð‘
     1703
     1704𐑆
     1705𐑇
     1706𐑈
     1707𐑉
     1708𐑊
     1709𐑋
     1710𐑌
     1711𐑍
     1712𐑎
     1713𐑏
     1714𐓘
     1715𐓙
     1716𐓚
     1717𐓛
     1718𐓜
     1719𐓝
     1720𐓞
     1721𐓟
     1722𐓠
     1723𐓡
     1724𐓢
     1725𐓣
     1726𐓀
     1727𐓥
     1728𐓊
     1729𐓧
     1730𐓚
     1731𐓩
     1732𐓪
     1733𐓫
     1734𐓬
     1735𐓭
     1736𐓮
     1737𐓯
     1738𐓰
     1739𐓱
     1740𐓲
     1741𐓳
     1742𐓎
     1743𐓵
     1744𐓶
     1745𐓷
     1746𐓞
     1747𐓹
     1748𐓺
     1749𐓻
     1750𐳀
     1751𐳁
     1752𐳂
     1753𐳃
     1754𐳄
     1755ð³
     1756
     1757𐳆
     1758𐳇
     1759𐳈
     1760𐳉
     1761𐳊
     1762𐳋
     1763𐳌
     1764𐳍
     1765𐳎
     1766𐳏
     1767𐳐
     1768𐳑
     1769𐳒
     1770𐳓
     1771𐳔
     1772𐳕
     1773𐳖
     1774𐳗
     1775𐳘
     1776𐳙
     1777𐳚
     1778𐳛
     1779𐳜
     1780𐳝
     1781𐳞
     1782𐳟
     1783𐳠
     1784𐳡
     1785𐳢
     1786𐳣
     1787𐳀
     1788𐳥
     1789𐳊
     1790𐳧
     1791𐳚
     1792𐳩
     1793𐳪
     1794𐳫
     1795𐳬
     1796𐳭
     1797𐳮
     1798𐳯
     1799𐳰
     1800𐳱
     1801𐳲
     1802𑣀
     1803𑣁
     1804𑣂
     1805𑣃
     1806𑣄
     1807ð‘£
     1808
     1809𑣆
     1810𑣇
     1811𑣈
     1812𑣉
     1813𑣊
     1814𑣋
     1815𑣌
     1816𑣍
     1817𑣎
     1818𑣏
     1819𑣐
     1820𑣑
     1821𑣒
     1822𑣓
     1823𑣔
     1824𑣕
     1825𑣖
     1826𑣗
     1827𑣘
     1828𑣙
     1829𑣚
     1830𑣛
     1831𑣜
     1832𑣝
     1833𑣞
     1834𑣟
     1835𞀢
     1836𞀣
     1837𞀀
     1838𞀥
     1839𞀊
     1840𞀧
     1841𞀚
     1842𞀩
     1843𞀪
     1844𞀫
     1845𞀬
     1846𞀭
     1847𞀮
     1848𞀯
     1849𞀰
     1850𞀱
     1851𞀲
     1852𞀳
     1853𞀎
     1854𞀵
     1855𞀶
     1856𞀷
     1857𞀞
     1858𞀹
     1859𞀺
     1860𞀻
     1861𞀌
     1862𞀜
     1863𞀟
     1864𞀿
     1865𞥀
     1866𞥁
     1867𞥂
     1868𞥃
     1869)__";
     1870
     1871        const static std::vector<codepoint_t> defined_cps = {
     1872        0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048,
     1873        0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050,
     1874        0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058,
     1875        0x0059, 0x005a, 0x00b5, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4,
     1876        0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc,
     1877        0x00cd, 0x00ce, 0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4,
     1878        0x00d5, 0x00d6, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd,
     1879        0x00de, 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c,
     1880        0x010e, 0x0110, 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c,
     1881        0x011e, 0x0120, 0x0122, 0x0124, 0x0126, 0x0128, 0x012a, 0x012c,
     1882        0x012e, 0x0132, 0x0134, 0x0136, 0x0139, 0x013b, 0x013d, 0x013f,
     1883        0x0141, 0x0143, 0x0145, 0x0147, 0x014a, 0x014c, 0x014e, 0x0150,
     1884        0x0152, 0x0154, 0x0156, 0x0158, 0x015a, 0x015c, 0x015e, 0x0160,
     1885        0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c, 0x016e, 0x0170,
     1886        0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d, 0x017f,
     1887        0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0189, 0x018a, 0x018b,
     1888        0x018e, 0x018f, 0x0190, 0x0191, 0x0193, 0x0194, 0x0196, 0x0197,
     1889        0x0198, 0x019c, 0x019d, 0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6,
     1890        0x01a7, 0x01a9, 0x01ac, 0x01ae, 0x01af, 0x01b1, 0x01b2, 0x01b3,
     1891        0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c5, 0x01c7, 0x01c8,
     1892        0x01ca, 0x01cb, 0x01cd, 0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7,
     1893        0x01d9, 0x01db, 0x01de, 0x01e0, 0x01e2, 0x01e4, 0x01e6, 0x01e8,
     1894        0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f2, 0x01f4, 0x01f6, 0x01f7,
     1895        0x01f8, 0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206,
     1896        0x0208, 0x020a, 0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216,
     1897        0x0218, 0x021a, 0x021c, 0x021e, 0x0220, 0x0222, 0x0224, 0x0226,
     1898        0x0228, 0x022a, 0x022c, 0x022e, 0x0230, 0x0232, 0x023a, 0x023b,
     1899        0x023d, 0x023e, 0x0241, 0x0243, 0x0244, 0x0245, 0x0246, 0x0248,
     1900        0x024a, 0x024c, 0x024e, 0x0345, 0x0370, 0x0372, 0x0376, 0x037f,
     1901        0x0386, 0x0388, 0x0389, 0x038a, 0x038c, 0x038e, 0x038f, 0x0391,
     1902        0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399,
     1903        0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, 0x03a1,
     1904        0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03aa,
     1905        0x03ab, 0x03c2, 0x03cf, 0x03d0, 0x03d1, 0x03d5, 0x03d6, 0x03d8,
     1906        0x03da, 0x03dc, 0x03de, 0x03e0, 0x03e2, 0x03e4, 0x03e6, 0x03e8,
     1907        0x03ea, 0x03ec, 0x03ee, 0x03f0, 0x03f1, 0x03f4, 0x03f5, 0x03f7,
     1908        0x03f9, 0x03fa, 0x03fd, 0x03fe, 0x03ff, 0x0400, 0x0401, 0x0402,
     1909        0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040a,
     1910        0x040b, 0x040c, 0x040d, 0x040e, 0x040f, 0x0410, 0x0411, 0x0412,
     1911        0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041a,
     1912        0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 0x0420, 0x0421, 0x0422,
     1913        0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042a,
     1914        0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 0x0460, 0x0462, 0x0464,
     1915        0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472, 0x0474,
     1916        0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048a, 0x048c,
     1917        0x048e, 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c,
     1918        0x049e, 0x04a0, 0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac,
     1919        0x04ae, 0x04b0, 0x04b2, 0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc,
     1920        0x04be, 0x04c0, 0x04c1, 0x04c3, 0x04c5, 0x04c7, 0x04c9, 0x04cb,
     1921        0x04cd, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc,
     1922        0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec,
     1923        0x04ee, 0x04f0, 0x04f2, 0x04f4, 0x04f6, 0x04f8, 0x04fa, 0x04fc,
     1924        0x04fe, 0x0500, 0x0502, 0x0504, 0x0506, 0x0508, 0x050a, 0x050c,
     1925        0x050e, 0x0510, 0x0512, 0x0514, 0x0516, 0x0518, 0x051a, 0x051c,
     1926        0x051e, 0x0520, 0x0522, 0x0524, 0x0526, 0x0528, 0x052a, 0x052c,
     1927        0x052e, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537,
     1928        0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f,
     1929        0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547,
     1930        0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f,
     1931        0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x10a0,
     1932        0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7, 0x10a8,
     1933        0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af, 0x10b0,
     1934        0x10b1, 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7, 0x10b8,
     1935        0x10b9, 0x10ba, 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf, 0x10c0,
     1936        0x10c1, 0x10c2, 0x10c3, 0x10c4, 0x10c5, 0x10c7, 0x10cd, 0x13f8,
     1937        0x13f9, 0x13fa, 0x13fb, 0x13fc, 0x13fd, 0x1c80, 0x1c81, 0x1c82,
     1938        0x1c83, 0x1c84, 0x1c85, 0x1c86, 0x1c87, 0x1c88, 0x1e00, 0x1e02,
     1939        0x1e04, 0x1e06, 0x1e08, 0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12,
     1940        0x1e14, 0x1e16, 0x1e18, 0x1e1a, 0x1e1c, 0x1e1e, 0x1e20, 0x1e22,
     1941        0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c, 0x1e2e, 0x1e30, 0x1e32,
     1942        0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e, 0x1e40, 0x1e42,
     1943        0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50, 0x1e52,
     1944        0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62,
     1945        0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72,
     1946        0x1e74, 0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82,
     1947        0x1e84, 0x1e86, 0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92,
     1948        0x1e94, 0x1e9b, 0x1e9e, 0x1ea0, 0x1ea2, 0x1ea4, 0x1ea6, 0x1ea8,
     1949        0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4, 0x1eb6, 0x1eb8,
     1950        0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6, 0x1ec8,
     1951        0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8,
     1952        0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8,
     1953        0x1eea, 0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8,
     1954        0x1efa, 0x1efc, 0x1efe, 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, 0x1f0c,
     1955        0x1f0d, 0x1f0e, 0x1f0f, 0x1f18, 0x1f19, 0x1f1a, 0x1f1b, 0x1f1c,
     1956        0x1f1d, 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, 0x1f2c, 0x1f2d, 0x1f2e,
     1957        0x1f2f, 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, 0x1f3c, 0x1f3d, 0x1f3e,
     1958        0x1f3f, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x1f59,
     1959        0x1f5b, 0x1f5d, 0x1f5f, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c,
     1960        0x1f6d, 0x1f6e, 0x1f6f, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c,
     1961        0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c,
     1962        0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac,
     1963        0x1fad, 0x1fae, 0x1faf, 0x1fb8, 0x1fb9, 0x1fba, 0x1fbb, 0x1fbc,
     1964        0x1fbe, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fcc, 0x1fd8, 0x1fd9,
     1965        0x1fda, 0x1fdb, 0x1fe8, 0x1fe9, 0x1fea, 0x1feb, 0x1fec, 0x1ff8,
     1966        0x1ff9, 0x1ffa, 0x1ffb, 0x1ffc, 0x2126, 0x212a, 0x212b, 0x2132,
     1967        0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167,
     1968        0x2168, 0x2169, 0x216a, 0x216b, 0x216c, 0x216d, 0x216e, 0x216f,
     1969        0x2183, 0x24b6, 0x24b7, 0x24b8, 0x24b9, 0x24ba, 0x24bb, 0x24bc,
     1970        0x24bd, 0x24be, 0x24bf, 0x24c0, 0x24c1, 0x24c2, 0x24c3, 0x24c4,
     1971        0x24c5, 0x24c6, 0x24c7, 0x24c8, 0x24c9, 0x24ca, 0x24cb, 0x24cc,
     1972        0x24cd, 0x24ce, 0x24cf, 0x2c00, 0x2c01, 0x2c02, 0x2c03, 0x2c04,
     1973        0x2c05, 0x2c06, 0x2c07, 0x2c08, 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c,
     1974        0x2c0d, 0x2c0e, 0x2c0f, 0x2c10, 0x2c11, 0x2c12, 0x2c13, 0x2c14,
     1975        0x2c15, 0x2c16, 0x2c17, 0x2c18, 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c,
     1976        0x2c1d, 0x2c1e, 0x2c1f, 0x2c20, 0x2c21, 0x2c22, 0x2c23, 0x2c24,
     1977        0x2c25, 0x2c26, 0x2c27, 0x2c28, 0x2c29, 0x2c2a, 0x2c2b, 0x2c2c,
     1978        0x2c2d, 0x2c2e, 0x2c60, 0x2c62, 0x2c63, 0x2c64, 0x2c67, 0x2c69,
     1979        0x2c6b, 0x2c6d, 0x2c6e, 0x2c6f, 0x2c70, 0x2c72, 0x2c75, 0x2c7e,
     1980        0x2c7f, 0x2c80, 0x2c82, 0x2c84, 0x2c86, 0x2c88, 0x2c8a, 0x2c8c,
     1981        0x2c8e, 0x2c90, 0x2c92, 0x2c94, 0x2c96, 0x2c98, 0x2c9a, 0x2c9c,
     1982        0x2c9e, 0x2ca0, 0x2ca2, 0x2ca4, 0x2ca6, 0x2ca8, 0x2caa, 0x2cac,
     1983        0x2cae, 0x2cb0, 0x2cb2, 0x2cb4, 0x2cb6, 0x2cb8, 0x2cba, 0x2cbc,
     1984        0x2cbe, 0x2cc0, 0x2cc2, 0x2cc4, 0x2cc6, 0x2cc8, 0x2cca, 0x2ccc,
     1985        0x2cce, 0x2cd0, 0x2cd2, 0x2cd4, 0x2cd6, 0x2cd8, 0x2cda, 0x2cdc,
     1986        0x2cde, 0x2ce0, 0x2ce2, 0x2ceb, 0x2ced, 0x2cf2, 0xa640, 0xa642,
     1987        0xa644, 0xa646, 0xa648, 0xa64a, 0xa64c, 0xa64e, 0xa650, 0xa652,
     1988        0xa654, 0xa656, 0xa658, 0xa65a, 0xa65c, 0xa65e, 0xa660, 0xa662,
     1989        0xa664, 0xa666, 0xa668, 0xa66a, 0xa66c, 0xa680, 0xa682, 0xa684,
     1990        0xa686, 0xa688, 0xa68a, 0xa68c, 0xa68e, 0xa690, 0xa692, 0xa694,
     1991        0xa696, 0xa698, 0xa69a, 0xa722, 0xa724, 0xa726, 0xa728, 0xa72a,
     1992        0xa72c, 0xa72e, 0xa732, 0xa734, 0xa736, 0xa738, 0xa73a, 0xa73c,
     1993        0xa73e, 0xa740, 0xa742, 0xa744, 0xa746, 0xa748, 0xa74a, 0xa74c,
     1994        0xa74e, 0xa750, 0xa752, 0xa754, 0xa756, 0xa758, 0xa75a, 0xa75c,
     1995        0xa75e, 0xa760, 0xa762, 0xa764, 0xa766, 0xa768, 0xa76a, 0xa76c,
     1996        0xa76e, 0xa779, 0xa77b, 0xa77d, 0xa77e, 0xa780, 0xa782, 0xa784,
     1997        0xa786, 0xa78b, 0xa78d, 0xa790, 0xa792, 0xa796, 0xa798, 0xa79a,
     1998        0xa79c, 0xa79e, 0xa7a0, 0xa7a2, 0xa7a4, 0xa7a6, 0xa7a8, 0xa7aa,
     1999        0xa7ab, 0xa7ac, 0xa7ad, 0xa7ae, 0xa7b0, 0xa7b1, 0xa7b2, 0xa7b3,
     2000        0xa7b4, 0xa7b6, 0xab70, 0xab71, 0xab72, 0xab73, 0xab74, 0xab75,
     2001        0xab76, 0xab77, 0xab78, 0xab79, 0xab7a, 0xab7b, 0xab7c, 0xab7d,
     2002        0xab7e, 0xab7f, 0xab80, 0xab81, 0xab82, 0xab83, 0xab84, 0xab85,
     2003        0xab86, 0xab87, 0xab88, 0xab89, 0xab8a, 0xab8b, 0xab8c, 0xab8d,
     2004        0xab8e, 0xab8f, 0xab90, 0xab91, 0xab92, 0xab93, 0xab94, 0xab95,
     2005        0xab96, 0xab97, 0xab98, 0xab99, 0xab9a, 0xab9b, 0xab9c, 0xab9d,
     2006        0xab9e, 0xab9f, 0xaba0, 0xaba1, 0xaba2, 0xaba3, 0xaba4, 0xaba5,
     2007        0xaba6, 0xaba7, 0xaba8, 0xaba9, 0xabaa, 0xabab, 0xabac, 0xabad,
     2008        0xabae, 0xabaf, 0xabb0, 0xabb1, 0xabb2, 0xabb3, 0xabb4, 0xabb5,
     2009        0xabb6, 0xabb7, 0xabb8, 0xabb9, 0xabba, 0xabbb, 0xabbc, 0xabbd,
     2010        0xabbe, 0xabbf, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26,
     2011        0xff27, 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e,
     2012        0xff2f, 0xff30, 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36,
     2013        0xff37, 0xff38, 0xff39, 0xff3a, 0x10400, 0x10401, 0x10402, 0x10403,
     2014        0x10404, 0x10405, 0x10406, 0x10407, 0x10408, 0x10409, 0x1040a,
     2015        0x1040b, 0x1040c, 0x1040d, 0x1040e, 0x1040f, 0x10410, 0x10411,
     2016        0x10412, 0x10413, 0x10414, 0x10415, 0x10416, 0x10417, 0x10418,
     2017        0x10419, 0x1041a, 0x1041b, 0x1041c, 0x1041d, 0x1041e, 0x1041f,
     2018        0x10420, 0x10421, 0x10422, 0x10423, 0x10424, 0x10425, 0x10426,
     2019        0x10427, 0x104b0, 0x104b1, 0x104b2, 0x104b3, 0x104b4, 0x104b5,
     2020        0x104b6, 0x104b7, 0x104b8, 0x104b9, 0x104ba, 0x104bb, 0x104bc,
     2021        0x104bd, 0x104be, 0x104bf, 0x104c0, 0x104c1, 0x104c2, 0x104c3,
     2022        0x104c4, 0x104c5, 0x104c6, 0x104c7, 0x104c8, 0x104c9, 0x104ca,
     2023        0x104cb, 0x104cc, 0x104cd, 0x104ce, 0x104cf, 0x104d0, 0x104d1,
     2024        0x104d2, 0x104d3, 0x10c80, 0x10c81, 0x10c82, 0x10c83, 0x10c84,
     2025        0x10c85, 0x10c86, 0x10c87, 0x10c88, 0x10c89, 0x10c8a, 0x10c8b,
     2026        0x10c8c, 0x10c8d, 0x10c8e, 0x10c8f, 0x10c90, 0x10c91, 0x10c92,
     2027        0x10c93, 0x10c94, 0x10c95, 0x10c96, 0x10c97, 0x10c98, 0x10c99,
     2028        0x10c9a, 0x10c9b, 0x10c9c, 0x10c9d, 0x10c9e, 0x10c9f, 0x10ca0,
     2029        0x10ca1, 0x10ca2, 0x10ca3, 0x10ca4, 0x10ca5, 0x10ca6, 0x10ca7,
     2030        0x10ca8, 0x10ca9, 0x10caa, 0x10cab, 0x10cac, 0x10cad, 0x10cae,
     2031        0x10caf, 0x10cb0, 0x10cb1, 0x10cb2, 0x118a0, 0x118a1, 0x118a2,
     2032        0x118a3, 0x118a4, 0x118a5, 0x118a6, 0x118a7, 0x118a8, 0x118a9,
     2033        0x118aa, 0x118ab, 0x118ac, 0x118ad, 0x118ae, 0x118af, 0x118b0,
     2034        0x118b1, 0x118b2, 0x118b3, 0x118b4, 0x118b5, 0x118b6, 0x118b7,
     2035        0x118b8, 0x118b9, 0x118ba, 0x118bb, 0x118bc, 0x118bd, 0x118be,
     2036        0x118bf, 0x1e900, 0x1e901, 0x1e902, 0x1e903, 0x1e904, 0x1e905,
     2037        0x1e906, 0x1e907, 0x1e908, 0x1e909, 0x1e90a, 0x1e90b, 0x1e90c,
     2038        0x1e90d, 0x1e90e, 0x1e90f, 0x1e910, 0x1e911, 0x1e912, 0x1e913,
     2039        0x1e914, 0x1e915, 0x1e916, 0x1e917, 0x1e918, 0x1e919, 0x1e91a,
     2040        0x1e91b, 0x1e91c, 0x1e91d, 0x1e91e, 0x1e91f, 0x1e920, 0x1e921};
     2041        static StringPropertyObject property_object(scf,
     2042                                                    null_codepoint_set,
     2043                                                    reflexive_set,
     2044                                                    static_cast<const char *>(string_buffer),
     2045                                                    buffer_length,
     2046                                                    defined_cps);
     2047    }
     2048    namespace CF_ns {
     2049        /** Code Point Ranges for cf (possibly overriding values from SCF)
     2050        [00df, 00df], [0130, 0130], [0149, 0149], [01f0, 01f0],
     2051        [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
     2052        [1e9e, 1e9e], [1f50, 1f50], [1f52, 1f52], [1f54, 1f54],
     2053        [1f56, 1f56], [1f80, 1faf], [1fb2, 1fb4], [1fb6, 1fb7],
     2054        [1fbc, 1fbc], [1fc2, 1fc4], [1fc6, 1fc7], [1fcc, 1fcc],
     2055        [1fd2, 1fd3], [1fd6, 1fd7], [1fe2, 1fe4], [1fe6, 1fe7],
     2056        [1ff2, 1ff4], [1ff6, 1ff7], [1ffc, 1ffc], [fb00, fb06],
     2057        [fb13, fb17]**/
     2058
     2059        const UnicodeSet explicitly_defined_set
     2060                    {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
     2061              {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
     2062              {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
     2063              {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1},
     2064              {Empty, 32807}},
     2065             {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
     2066              0x00010000, 0x00000080, 0x47c00000, 0x00550000, 0x10dcffff,
     2067              0x00cc10dc, 0x10dc00dc, 0x00f8007f}};
     2068
     2069        const unsigned buffer_length = 575;
     2070        const static char __attribute__ ((aligned (32))) string_buffer[768] = u8R"__(ss
     2071i̇
     2072ÊŒn
     2073ǰ
     2074ΐ
     2075Ï
     2076̈́
     2077եւ
     2078ẖ
     2079ẗ
     2080ẘ
     2081ẙ
     2082aÊŸ
     2083ss
     2084Ï
     2085̓
     2086Ï
     2087̓̀
     2088Ï
     2089̓́
     2090Ï
     2091̓͂
     2092ጀι
     2093ጁι
     2094ጂι
     2095ጃι
     2096ጄι
     2097áŒ
     2098ι
     2099ጆι
     2100ጇι
     2101ጀι
     2102ጁι
     2103ጂι
     2104ጃι
     2105ጄι
     2106áŒ
     2107ι
     2108ጆι
     2109ጇι
     2110ጠι
     2111ጡι
     2112ጢι
     2113ጣι
     2114ጀι
     2115ጥι
     2116ጊι
     2117ጧι
     2118ጠι
     2119ጡι
     2120ጢι
     2121ጣι
     2122ጀι
     2123ጥι
     2124ጊι
     2125ጧι
     2126ᜠι
     2127ᜡι
     2128ᜢι
     2129ᜣι
     2130ᜀι
     2131ᜥι
     2132ᜊι
     2133ᜧι
     2134ᜠι
     2135ᜡι
     2136ᜢι
     2137ᜣι
     2138ᜀι
     2139ᜥι
     2140ᜊι
     2141ᜧι
     2142ᜰι
     2143αι
     2144άι
     2145ᾶ
     2146ᾶι
     2147αι
     2148ᜎι
     2149ηι
     2150ήι
     2151ῆ
     2152ῆι
     2153ηι
     2154ῒ
     2155ΐ
     2156ῖ
     2157ῗ
     2158Ï
     2159̈̀
     2160Ï
     2161̈́
     2162ῤ
     2163Ï
     2164͂
     2165Ï
     2166̈͂
     2167ᜌι
     2168ωι
     2169ώι
     2170ῶ
     2171ῶι
     2172ωι
     2173ff
     2174fi
     2175fl
     2176ffi
     2177ffl
     2178st
     2179st
     2180ÕŽÕ¶
     2181ÕŽÕ¥
     2182ÕŽÕ«
     2183ÕŸÕ¶
     2184ÕŽÕ­
     2185)__";
     2186
     2187        const static std::vector<codepoint_t> defined_cps = {
     2188        0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96,
     2189        0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1e9e, 0x1f50, 0x1f52, 0x1f54,
     2190        0x1f56, 0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86,
     2191        0x1f87, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e,
     2192        0x1f8f, 0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96,
     2193        0x1f97, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e,
     2194        0x1f9f, 0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6,
     2195        0x1fa7, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae,
     2196        0x1faf, 0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2,
     2197        0x1fc3, 0x1fc4, 0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6,
     2198        0x1fd7, 0x1fe2, 0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3,
     2199        0x1ff4, 0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03,
     2200        0xfb04, 0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
     2201        static StringOverridePropertyObject property_object(cf,
     2202                                                    SCF_ns::property_object,
     2203                                                    explicitly_defined_set,
     2204                                                    static_cast<const char *>(string_buffer),
     2205                                                    buffer_length,
     2206                                                    defined_cps);
     2207    }
     2208}
     2209
    3062210#endif
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjectTable.h

    r5672 r5673  
    1515#include "BidiMirroring.h"
    1616#include "Blocks.h"
     17#include "CaseFolding.h"
    1718#include "CompositionExclusions.h"
    1819#include "DerivedAge.h"
     
    5051    new UnsupportedPropertyObject(cjkPrimaryNumeric, PropertyObject::ClassTypeId::NumericProperty),
    5152    &NV_ns::property_object,
    52     new UnsupportedPropertyObject(cf, PropertyObject::ClassTypeId::StringOverrideProperty),
     53    &CF_ns::property_object,
    5354    new UnsupportedPropertyObject(cjkCompatibilityVariant, PropertyObject::ClassTypeId::StringProperty),
    5455    &DM_ns::property_object,
     
    5657    &LC_ns::property_object,
    5758    &NFKC_CF_ns::property_object,
    58     new UnsupportedPropertyObject(scf, PropertyObject::ClassTypeId::StringProperty),
     59    &SCF_ns::property_object,
    5960    &SLC_ns::property_object,
    6061    &STC_ns::property_object,
  • icGREP/icgrep-devel/icgrep/UCD/PropertyObjects.h

    r5672 r5673  
    220220        return false;
    221221    }
    222     StringOverridePropertyObject(UCD::property_t p, PropertyObject & baseObj, UnicodeSet overridden, const char * string_buffer, unsigned bufsize, const std::vector<UCD::codepoint_t> & cps)
     222    StringOverridePropertyObject(UCD::property_t p, PropertyObject & baseObj, UnicodeSet explicitly_defined, const char * string_buffer, unsigned bufsize, const std::vector<UCD::codepoint_t> & cps)
    223223    : PropertyObject(p, ClassTypeId::StringOverrideProperty)
    224224    , mBaseObject(baseObj)
    225     , mOverriddenSet(overridden)
     225    , mOverriddenSet(explicitly_defined)
    226226    , mStringBuffer(string_buffer)
    227227    , mBufSize(bufsize)
  • icGREP/icgrep-devel/icgrep/UCD/SpecialCasing.h

    r5672 r5673  
    1717namespace UCD {
    1818    namespace LC_ns {
    19         /** Code Point Ranges for lc overriding values from SLC
    20         [00df, 00df], [0149, 0149], [01f0, 01f0], [0307, 0307],
     19        /** Code Point Ranges for lc (possibly overriding values from SLC)
     20        [00df, 00df], [0130, 0130], [0149, 0149], [01f0, 01f0],
    2121        [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
    2222        [1f50, 1f50], [1f52, 1f52], [1f54, 1f54], [1f56, 1f56],
    23         [1f80, 1f87], [1f90, 1f97], [1fa0, 1fa7], [1fb2, 1fb4],
    24         [1fb6, 1fb7], [1fc2, 1fc4], [1fc6, 1fc7], [1fd2, 1fd3],
     23        [1f80, 1faf], [1fb2, 1fb4], [1fb6, 1fb7], [1fbc, 1fbc],
     24        [1fc2, 1fc4], [1fc6, 1fc7], [1fcc, 1fcc], [1fd2, 1fd3],
    2525        [1fd6, 1fd7], [1fe2, 1fe4], [1fe6, 1fe7], [1ff2, 1ff4],
    26         [1ff6, 1ff7], [fb00, fb06], [fb13, fb17]**/
    27 
    28         const UnicodeSet overridden_set
    29                     {{{Empty, 6}, {Mixed, 1}, {Empty, 3}, {Mixed, 1}, {Empty, 4},
    30               {Mixed, 1}, {Empty, 8}, {Mixed, 1}, {Empty, 3}, {Mixed, 2},
    31               {Empty, 14}, {Mixed, 1}, {Empty, 199}, {Mixed, 1}, {Empty, 5},
    32               {Mixed, 1}, {Empty, 1}, {Mixed, 4}, {Empty, 1752}, {Mixed, 1},
     26        [1ff6, 1ff7], [1ffc, 1ffc], [fb00, fb06], [fb13, fb17]**/
     27
     28        const UnicodeSet explicitly_defined_set
     29                    {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
     30              {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
     31              {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
     32              {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1},
    3333              {Empty, 32807}},
    34              {0x80000000, 0x00000200, 0x00010000, 0x00000080, 0x00010000,
    35               0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x00ff00ff,
    36               0x00dc00ff, 0x00cc00dc, 0x00dc00dc, 0x00f8007f}};
    37 
    38         const unsigned buffer_length = 298;
     34             {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
     35              0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x10dcffff,
     36              0x00cc10dc, 0x10dc00dc, 0x00f8007f}};
     37
     38        const unsigned buffer_length = 406;
    3939        const static char __attribute__ ((aligned (32))) string_buffer[512] = u8R"__(ß
     40i̇
    4041ʼn
    4142Ç°
    42 i̇
    4343ΐ
    4444ΰ
     
    6262ំ
    6363ះ
     64ៀ
     65េ
     66ែ
     67ៃ
     68ោ
     69áŸ
     70
     71ំ
     72ះ
     73័
     74៑
     75្
     76៓
     77។
     78៕
     79៖
     80ៗ
    6481័
    6582៑
     
    7895៊
    7996៧
     97០
     98១
     99២
     100៣
     101ៀ
     102៥
     103៊
     104៧
    80105៲
    81106៳
     
    83108៶
    84109៷
     110៳
    85111ῂ
    86112ῃ
     
    88114ῆ
    89115ῇ
     116ῃ
    90117ῒ
    91118ΐ
     
    102129ῶ
    103130á¿·
     131ῳ
    104132ff
    105133fi
     
    118146
    119147        const static std::vector<codepoint_t> defined_cps = {
    120         0x00df, 0x0149, 0x01f0, 0x0307, 0x0390, 0x03b0, 0x0587, 0x1e96,
     148        0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96,
    121149        0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56,
    122150        0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87,
     151        0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
    123152        0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97,
     153        0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
    124154        0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7,
    125         0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fc2, 0x1fc3, 0x1fc4,
    126         0x1fc6, 0x1fc7, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7, 0x1fe2, 0x1fe3,
    127         0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4, 0x1ff6, 0x1ff7,
    128         0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04, 0xfb05, 0xfb06, 0xfb13,
    129         0xfb14, 0xfb15, 0xfb16, 0xfb17};
     155        0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
     156        0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2, 0x1fc3,
     157        0x1fc4, 0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7,
     158        0x1fe2, 0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4,
     159        0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04,
     160        0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
    130161        static StringOverridePropertyObject property_object(lc,
    131162                                                    SLC_ns::property_object,
    132                                                     overridden_set,
     163                                                    explicitly_defined_set,
    133164                                                    static_cast<const char *>(string_buffer),
    134165                                                    buffer_length,
     
    136167    }
    137168    namespace UC_ns {
    138         /** Code Point Ranges for uc overriding values from SUC
    139         [004e, 004e], [0066, 0066], [0069, 0069], [006c, 006c],
    140         [0073, 0074], [0130, 0130], [02be, 02be], [0300, 0301],
    141         [0308, 0308], [030a, 030a], [030c, 030c], [0313, 0313],
    142         [0331, 0331], [0342, 0342], [0345, 0345], [0565, 0565],
    143         [056b, 056b], [056d, 056d], [0576, 0576], [0582, 0582],
    144         [1f88, 1f8f], [1f98, 1f9f], [1fa8, 1faf], [1fbc, 1fbc],
    145         [1fcc, 1fcc], [1ffc, 1ffc]**/
    146 
    147         const UnicodeSet overridden_set
    148                     {{{Empty, 2}, {Mixed, 2}, {Empty, 5}, {Mixed, 1}, {Empty, 11},
    149               {Mixed, 1}, {Empty, 2}, {Mixed, 3}, {Empty, 16}, {Mixed, 2},
    150               {Empty, 207}, {Mixed, 4}, {Empty, 34560}},
    151              {0x00004000, 0x00181240, 0x00010000, 0x40000000, 0x00081503,
    152               0x00020000, 0x00000024, 0x00402820, 0x00000004, 0xff00ff00,
    153               0x1000ff00, 0x00001000, 0x10000000}};
    154 
    155         const unsigned buffer_length = 208;
    156         const static char __attribute__ ((aligned (32))) string_buffer[256] = u8R"__(ÊŒN
    157 Ff
    158 Ffi
    159 Ffl
    160 Ss
    161 St
     169        /** Code Point Ranges for uc (possibly overriding values from SUC)
     170        [00df, 00df], [0130, 0130], [0149, 0149], [01f0, 01f0],
     171        [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
     172        [1f50, 1f50], [1f52, 1f52], [1f54, 1f54], [1f56, 1f56],
     173        [1f80, 1faf], [1fb2, 1fb4], [1fb6, 1fb7], [1fbc, 1fbc],
     174        [1fc2, 1fc4], [1fc6, 1fc7], [1fcc, 1fcc], [1fd2, 1fd3],
     175        [1fd6, 1fd7], [1fe2, 1fe4], [1fe6, 1fe7], [1ff2, 1ff4],
     176        [1ff6, 1ff7], [1ffc, 1ffc], [fb00, fb06], [fb13, fb17]**/
     177
     178        const UnicodeSet explicitly_defined_set
     179                    {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
     180              {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
     181              {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
     182              {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1},
     183              {Empty, 32807}},
     184             {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
     185              0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x10dcffff,
     186              0x00cc10dc, 0x10dc00dc, 0x00f8007f}};
     187
     188        const unsigned buffer_length = 469;
     189        const static char __attribute__ ((aligned (32))) string_buffer[512] = u8R"__(Ss
    162190Ä°
     191ÊŒN
     192J̌
     193Ϊ́
     194Ϋ́
     195Եւ
     196H̱
     197T̈
     198W̊
     199Y̊
    163200AÊŸ
    164 Î¥ÌˆÌ€
    165 Î¥ÌˆÌ
    166 T̈
    167 Y̊
    168 J̌
    169 Î¡Ì“
    170 H̱
    171 Î©Í‚
    172 Î©Í‚Í
    173 
    174 Õ„Õ¥
    175 Õ„Õ«
    176 Õ„Õ­
    177 ÕŽÕ¶
    178 ÔµÖ‚
     201Υ̓
     202Υ̓̀
     203Υ̓́
     204Υ̓͂
     205ៈ
     206៉
     207៊
     208់
     209៌
     210៍
     211៎
     212៏
    179213ៈ
    180214៉
     
    193227៞
    194228៟
     229៘
     230៙
     231៚
     232៛
     233ៜ
     234៝
     235៞
     236៟
    195237៚
    196238៩
     
    201243៮
    202244៯
     245៚
     246៩
     247៪
     248៫
     249៬
     250៭
     251៮
     252៯
     253៺Í
     254
    203255៌
     256ΆÍ
     257
     258Α͂
     259Α͂Í
     260
     261៌
     262ῊÍ
     263
    204264ῌ
     265ΉÍ
     266
     267Η͂
     268Η͂Í
     269
     270ῌ
     271Ϊ̀
     272Ϊ́
     273Ι͂
     274Ϊ͂
     275Ϋ̀
     276Ϋ́
     277Ρ̓
     278Υ͂
     279Ϋ͂
     280ῺÍ
     281
    205282ῌ
     283ΏÍ
     284
     285Ω͂
     286Ω͂Í
     287
     288á¿Œ
     289Ff
     290Fi
     291Fl
     292Ffi
     293Ffl
     294St
     295St
     296Մն
     297Մե
     298Մի
     299Վն
     300Մխ
    206301)__";
    207302
    208303        const static std::vector<codepoint_t> defined_cps = {
    209         0x004e, 0x0066, 0x0069, 0x006c, 0x0073, 0x0074, 0x0130, 0x02be,
    210         0x0300, 0x0301, 0x0308, 0x030a, 0x030c, 0x0313, 0x0331, 0x0342,
    211         0x0345, 0x0565, 0x056b, 0x056d, 0x0576, 0x0582, 0x1f88, 0x1f89,
    212         0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99,
    213         0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9,
    214         0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc,
    215         0x1ffc};
     304        0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96,
     305        0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56,
     306        0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87,
     307        0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
     308        0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97,
     309        0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
     310        0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7,
     311        0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
     312        0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2, 0x1fc3,
     313        0x1fc4, 0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7,
     314        0x1fe2, 0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4,
     315        0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04,
     316        0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
    216317        static StringOverridePropertyObject property_object(uc,
    217318                                                    SUC_ns::property_object,
    218                                                     overridden_set,
     319                                                    explicitly_defined_set,
    219320                                                    static_cast<const char *>(string_buffer),
    220321                                                    buffer_length,
     
    222323    }
    223324    namespace TC_ns {
    224         /** Code Point Ranges for tc overriding values from STC
    225         [0046, 0046], [0049, 0049], [004c, 004c], [004e, 004e],
    226         [0053, 0054], [0130, 0130], [02be, 02be], [0300, 0301],
    227         [0308, 0308], [030a, 030a], [030c, 030c], [0313, 0313],
    228         [0331, 0331], [0342, 0342], [0399, 0399], [0535, 0535],
    229         [053b, 053b], [053d, 053d], [0546, 0546], [0552, 0552]**/
    230 
    231         const UnicodeSet overridden_set
    232                     {{{Empty, 2}, {Mixed, 1}, {Empty, 6}, {Mixed, 1}, {Empty, 11},
    233               {Mixed, 1}, {Empty, 2}, {Mixed, 3}, {Empty, 1}, {Mixed, 1},
    234               {Empty, 12}, {Mixed, 2}, {Empty, 34773}},
    235              {0x00185240, 0x00010000, 0x40000000, 0x00081503, 0x00020000,
    236               0x00000004, 0x02000000, 0x28200000, 0x00040040}};
    237 
    238         const unsigned buffer_length = 100;
    239         const static char __attribute__ ((aligned (32))) string_buffer[256] = u8R"__(FF
     325        /** Code Point Ranges for tc (possibly overriding values from STC)
     326        [00df, 00df], [0130, 0130], [0149, 0149], [01f0, 01f0],
     327        [0390, 0390], [03b0, 03b0], [0587, 0587], [1e96, 1e9a],
     328        [1f50, 1f50], [1f52, 1f52], [1f54, 1f54], [1f56, 1f56],
     329        [1f80, 1faf], [1fb2, 1fb4], [1fb6, 1fb7], [1fbc, 1fbc],
     330        [1fc2, 1fc4], [1fc6, 1fc7], [1fcc, 1fcc], [1fd2, 1fd3],
     331        [1fd6, 1fd7], [1fe2, 1fe4], [1fe6, 1fe7], [1ff2, 1ff4],
     332        [1ff6, 1ff7], [1ffc, 1ffc], [fb00, fb06], [fb13, fb17]**/
     333
     334        const UnicodeSet explicitly_defined_set
     335                    {{{Empty, 6}, {Mixed, 1}, {Empty, 2}, {Mixed, 2}, {Empty, 4},
     336              {Mixed, 1}, {Empty, 12}, {Mixed, 2}, {Empty, 14}, {Mixed, 1},
     337              {Empty, 199}, {Mixed, 1}, {Empty, 5}, {Mixed, 1}, {Empty, 1},
     338              {Full, 1}, {Mixed, 3}, {Empty, 1752}, {Mixed, 1},
     339              {Empty, 32807}},
     340             {0x80000000, 0x00010000, 0x00000200, 0x00010000, 0x00010000,
     341              0x00010000, 0x00000080, 0x07c00000, 0x00550000, 0x10dcffff,
     342              0x00cc10dc, 0x10dc00dc, 0x00f8007f}};
     343
     344        const unsigned buffer_length = 571;
     345        const static char __attribute__ ((aligned (32))) string_buffer[768] = u8R"__(SS
     346Ä°
     347ÊŒN
     348J̌
     349Ϊ́
     350Ϋ́
     351ԵՒ
     352H̱
     353T̈
     354W̊
     355Y̊
     356AÊŸ
     357Υ̓
     358Υ̓̀
     359Υ̓́
     360Υ̓͂
     361ገΙ
     362ጉΙ
     363ጊΙ
     364ጋΙ
     365ጌΙ
     366ግΙ
     367ጎΙ
     368ጏΙ
     369ገΙ
     370ጉΙ
     371ጊΙ
     372ጋΙ
     373ጌΙ
     374ግΙ
     375ጎΙ
     376ጏΙ
     377ጚΙ
     378ጩΙ
     379ጪΙ
     380ጫΙ
     381ጬΙ
     382ጭΙ
     383ጮΙ
     384ጯΙ
     385ጚΙ
     386ጩΙ
     387ጪΙ
     388ጫΙ
     389ጬΙ
     390ጭΙ
     391ጮΙ
     392ጯΙ
     393᜚Ι
     394ᜩΙ
     395ᜪΙ
     396ᜫΙ
     397ᜬΙ
     398ᜭΙ
     399ᜮΙ
     400ᜯΙ
     401᜚Ι
     402ᜩΙ
     403ᜪΙ
     404ᜫΙ
     405ᜬΙ
     406ᜭΙ
     407ᜮΙ
     408ᜯΙ
     409៺Ι
     410ΑΙ
     411ΆΙ
     412Α͂
     413Α͂Ι
     414ΑΙ
     415ῊΙ
     416ΗΙ
     417ΉΙ
     418Η͂
     419Η͂Ι
     420ΗΙ
     421Ϊ̀
     422Ϊ́
     423Ι͂
     424Ϊ͂
     425Ϋ̀
     426Ϋ́
     427Ρ̓
     428Υ͂
     429Ϋ͂
     430ῺΙ
     431ΩΙ
     432ΏΙ
     433Ω͂
     434Ω͂Ι
     435ΩΙ
     436FF
     437FI
     438FL
    240439FFI
    241440FFL
    242 ÊŒN
    243 SS
    244441ST
    245 Ä°
    246 AÊŸ
    247 Î¥ÌˆÌ€
    248 Î¥ÌˆÌ
    249 T̈
    250 Y̊
    251 J̌
    252 Î¡Ì“
    253 H̱
    254 Î©Í‚
    255 Î©Í‚Ι
     442ST
     443ՄՆ
    256444ՄԵ
    257445ՄԻ
     446ՎՆ
    258447ՄԜ
    259 ÕŽÕ†
    260 ÔµÕ’
    261448)__";
    262449
    263450        const static std::vector<codepoint_t> defined_cps = {
    264         0x0046, 0x0049, 0x004c, 0x004e, 0x0053, 0x0054, 0x0130, 0x02be,
    265         0x0300, 0x0301, 0x0308, 0x030a, 0x030c, 0x0313, 0x0331, 0x0342,
    266         0x0399, 0x0535, 0x053b, 0x053d, 0x0546, 0x0552};
     451        0x00df, 0x0130, 0x0149, 0x01f0, 0x0390, 0x03b0, 0x0587, 0x1e96,
     452        0x1e97, 0x1e98, 0x1e99, 0x1e9a, 0x1f50, 0x1f52, 0x1f54, 0x1f56,
     453        0x1f80, 0x1f81, 0x1f82, 0x1f83, 0x1f84, 0x1f85, 0x1f86, 0x1f87,
     454        0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
     455        0x1f90, 0x1f91, 0x1f92, 0x1f93, 0x1f94, 0x1f95, 0x1f96, 0x1f97,
     456        0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
     457        0x1fa0, 0x1fa1, 0x1fa2, 0x1fa3, 0x1fa4, 0x1fa5, 0x1fa6, 0x1fa7,
     458        0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
     459        0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc2, 0x1fc3,
     460        0x1fc4, 0x1fc6, 0x1fc7, 0x1fcc, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7,
     461        0x1fe2, 0x1fe3, 0x1fe4, 0x1fe6, 0x1fe7, 0x1ff2, 0x1ff3, 0x1ff4,
     462        0x1ff6, 0x1ff7, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04,
     463        0xfb05, 0xfb06, 0xfb13, 0xfb14, 0xfb15, 0xfb16, 0xfb17};
    267464        static StringOverridePropertyObject property_object(tc,
    268465                                                    STC_ns::property_object,
    269                                                     overridden_set,
     466                                                    explicitly_defined_set,
    270467                                                    static_cast<const char *>(string_buffer),
    271468                                                    buffer_length,
  • icGREP/icgrep-devel/icgrep/re/re_cc.cpp

    r4829 r5673  
    77#include "re_cc.h"
    88#include <llvm/Support/Compiler.h>
    9 #include <UCD/CaseFolding_txt.h>
     9#include <UCD/CaseFolding.h>
    1010#include <sstream>
    1111
  • icGREP/icgrep-devel/icgrep/re/re_parser.cpp

    r5663 r5673  
    2424#include <UCD/UnicodeNameData.h>
    2525#include <UCD/resolve_properties.h>
    26 #include <UCD/CaseFolding_txt.h>
     26#include <UCD/CaseFolding.h>
    2727#include <grep_engine.h>
    2828#include <sstream>
Note: See TracChangeset for help on using the changeset viewer.