Ignore:
Timestamp:
Sep 30, 2017, 9:49:24 AM (23 months ago)
Author:
cameron
Message:

UCD_Config.h, add Indic and CompositionExclusion? properties

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/UnicodeNameData.py

    r5642 r5655  
    1616#include "UnicodeNameData.h"
    1717const int Unamesize = %s;
    18 char __attribute__ ((aligned (32))) Unamedata[Unamesize + %s] = "%s";
     18char __attribute__ ((aligned (32))) Unamedata[Unamesize + %s] = R"___(%s)___";
    1919
    2020char * getUnicodeNameDataPtr() {
     
    2626"""
    2727
     28NonName_regexp = re.compile("<[^>]*>")
    2829
    2930def genUnicodeNameData():
    30     parsed_data = parse_UnicodeData_txt()
     31    (parsed_data, ranges) = parse_UnicodeData_txt()
    3132    name_data_string = ""
    3233    name_data_len = 0
    3334    for record in parsed_data:
    3435        (cp, name, gc, ccc, bidic, decomp, decval, digitval, numval, bidim, uc, lc, tc) = record
    35         name_data_string += cp + ";" + name + "\\n"
    36         name_data_len += len(cp) + len(name) + 2
     36        if NonName_regexp.match(name): continue   # Skip codepoints whose name field is not actually a name.
     37        name_data_string += cp + ";" + name + "\n"
     38    # for range_record in ranges:
     39    #     (lo_cp, hi_cp, range_name, gc, ccc, bidic, decomp, decval, digitval, numval, bidim, uc, lc, tc) = range_record
     40    #     print(lo_cp, hi_cp, range_name)
     41    #     if range_name[:13] == "CJK Ideograph":
     42    #         for cp in range(int(lo_cp,16), int(hi_cp,16)):
     43    #             name_data_string += "%04X;CJK UNIFIED IDEOGRAPH-%04X\n" % (cp, cp)
     44    #     elif range_name[:16] == "Tangut Ideograph":
     45    #         for cp in range(int(lo_cp,16), int(hi_cp,16)):
     46    #             name_data_string += "%04X;TANGUT IDEOGRAPH-%04X\n" % (cp, cp)
     47    #     elif range_name[:5] == "Nushu":
     48    #         for cp in range(int(lo_cp,16), int(hi_cp,16)):
     49    #             name_data_string += "%04X;NUSHU CHARACTER-%04X\n" % (cp, cp)
     50    name_data_len = len(name_data_string)
    3751    f = open(UCD_config.UCD_output_dir + '/UnicodeNameData.cpp', 'w')
    3852    f.write(UnicodeNameData_cpp_template % (name_data_len + 1, 255 - (name_data_len % 256), name_data_string))
Note: See TracChangeset for help on using the changeset viewer.