Changeset 5143 for proto


Ignore:
Timestamp:
Sep 7, 2016, 11:54:14 AM (2 years ago)
Author:
cameron
Message:

Updates for Unicode 9.0; clean-ups

Location:
proto/charsetcompiler/UCD
Files:
1 added
2 deleted
5 edited
2 moved

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/README-generate-UCD.txt

    r5142 r5143  
    1 Generating Precompiled UCD files.
     1Generating UCD data files.
     2
     31. Edit UCD_config.py - set UCD_src_dir, UCD_output_dir
     4
     52. UnicodeNameData
     6   python UnicodeNameData.py
     7   
     83. python UCD_properties.py
     9
     104. python casefold.py
     11
     125. python generate_UCD_tests.py
     13    copy output to icgrep-devel/QA/proptest.xml
     14-------------------------------------------
     15
     16Generating Precompiled UCD files.  (Now Deprecated)
    217
    3181.   python generate_UCD_property_functions.py
  • proto/charsetcompiler/UCD/UCD_parser.py

    r4948 r5143  
    99#
    1010import re, string, os.path
     11import UCD_config
    1112from unicode_set import *
    12 
    13 UCD_dir = "8.0.0"
    14 def set_UCD_dir(d):
    15     global UCD_dir
    16     UCD_dir = d
    1713
    1814trivial_name_char_re = re.compile('[-_\s]')
     
    3935    property_kind_map = {}
    4036    property_kind = "unspecified"
    41     f = open(UCD_dir + "/" + 'PropertyAliases.txt')
     37    f = open(UCD_config.UCD_src_dir + "/" + 'PropertyAliases.txt')
    4238    lines = f.readlines()
    4339    for t in lines:
     
    8682    property_value_lookup_map = {}
    8783    missing_specs = {}
    88     f = open(UCD_dir + "/" + 'PropertyValueAliases.txt')
     84    f = open(UCD_config.UCD_src_dir + "/" + 'PropertyValueAliases.txt')
    8985    lines = f.readlines()
    9086    for t in lines:
     
    168164    value_map = {}
    169165    name_list_order = []
    170     f = open(UCD_dir + "/" + mapfile)
     166    f = open(UCD_config.UCD_src_dir + "/" + mapfile)
    171167    lines = f.readlines()
    172168    for t in lines:
     
    256252    value_map = {}
    257253    name_list_order = []
    258     f = open(UCD_dir + "/" + mapfile)
     254    f = open(UCD_config.UCD_src_dir + "/" + mapfile)
    259255    lines = f.readlines()
    260256    for t in lines:
     
    286282def parse_UnicodeData_txt():
    287283   data_records = []
    288    f = open(UCD_dir + "/UnicodeData.txt")
     284   f = open(UCD_config.UCD_src_dir + "/UnicodeData.txt")
    289285   lines = f.readlines()
    290286   for t in lines:
  • proto/charsetcompiler/UCD/UCD_properties.py

    r4745 r5143  
    1313from unicode_set import *
    1414from UCD_parser import *
    15 
    16 UCD_dir = "8.0.0"
    1715
    1816PropertyAliases_template = r"""
  • proto/charsetcompiler/UCD/UnicodeNameData.py

    r5131 r5143  
    99#
    1010import re, string, os.path, cformat
     11import UCD_config
    1112from UCD_parser import *
    1213
    13 UCD_dir = "8.0.0"
    1414
    1515UnicodeNameData_cpp_template = r"""
     
    3535        name_data_string += cp + ";" + name + "\\n"
    3636        name_data_len += len(cp) + len(name) + 2
    37     f = open('UnicodeNameData.cpp', 'w')
     37    f = open(UCD_config.UCD_output_dir + '/UnicodeNameData.cpp', 'w')
    3838    f.write(UnicodeNameData_cpp_template % (name_data_len + 1, name_data_string))
    3939    f.close()
  • proto/charsetcompiler/UCD/casefold.py

    r4317 r5143  
    1111#
    1212import re, string, cformat
     13import UCD_config
    1314from unicode_set import *
    1415
    15 UCD_dir = "7.0.0"
    1616
    1717
     
    3131   fold_type = {}
    3232   fold_value = {}
    33    f = open(UCD_dir + "/" + 'CaseFolding.txt')
     33   f = open(UCD_config.UCD_src_dir + "/" + 'CaseFolding.txt')
    3434   lines = f.readlines()
    3535   for t in lines:
     
    174174
    175175struct FoldEntry {
    176     codepoint_t range_lo;
     176    re::codepoint_t range_lo;
    177177    int fold_offset;
    178     std::vector<std::pair<codepoint_t, codepoint_t> > fold_pairs;
     178    std::vector<re::interval_t> fold_pairs;
    179179};
    180180
    181 void caseInsensitiveInsert(re::CC * cc, codepoint_t cp);
    182 
    183 void caseInsensitiveInsertRange(re::CC * cc, codepoint_t lo, codepoint_t hi);
    184    
     181
     182void caseInsensitiveInsertRange(re::CC * cc, const re::codepoint_t lo, const re::codepoint_t hi);
     183
     184inline void caseInsensitiveInsert(re::CC * cc, const re::codepoint_t cp) {
     185    caseInsensitiveInsertRange(cc, cp, cp);
     186}
    185187"""
    186188
  • proto/charsetcompiler/UCD/cformat.py

    r4632 r5143  
     1import UCD_config
    12
    23header_template = r"""#ifndef %s
    34#define %s
    45/*
    5  *  Copyright (c) 2015 International Characters, Inc.
     6 *  Copyright (c) 2016 International Characters, Inc.
    67 *  This software is licensed to the public under the Open Software License 3.0.
    78 *  icgrep is a trademark of International Characters, Inc.
     
    1415cpp_template = r"""
    1516/*
    16  *  Copyright (c) 2015 International Characters, Inc.
     17 *  Copyright (c) 2016 International Characters, Inc.
    1718 *  This software is licensed to the public under the Open Software License 3.0.
    1819 *  icgrep is a trademark of International Characters, Inc.
     
    2728
    2829def open_header_file_for_write(filename, generator_name='UCD_properties.py'):
    29    f = open(filename + '.h', 'w')
     30   f = open(UCD_config.UCD_output_dir + '/' + filename + '.h', 'w')
    3031   hname = filename.upper() + '_H'
    3132   f.write(header_template % (hname, hname, generator_name))
     
    3334
    3435def open_cpp_file_for_write(filename, generator_name='UCD_properties.py'):
    35    f = open(filename + '.cpp', 'w')
     36   f = open(UCD_config.UCD_output_dir + '/' + filename + '.cpp', 'w')
    3637   f.write(cpp_template % (generator_name, filename))
    3738   return f
  • proto/charsetcompiler/UCD/generate_UCD_tests.py

    r5142 r5143  
    99#
    1010#
    11 import re, string, os.path, UCD.cformat
     11import re, string, os.path, cformat
    1212from random import randint
    13 from UCD.unicode_set import *
    14 from UCD.UCD_parser import *
    15 from if_hierarchy import *
     13from unicode_set import *
     14from UCD_parser import *
    1615from string import Template
    1716
     
    189188
    190189if __name__ == "__main__":
    191     set_UCD_dir('UCD/8.0.0')
    192190    UCD_main()
Note: See TracChangeset for help on using the changeset viewer.