Changeset 5652 for icGREP


Ignore:
Timestamp:
Sep 28, 2017, 2:11:38 PM (19 months ago)
Author:
cameron
Message:

Parse Unicode version; parse decomposition mapping fields

Location:
icGREP/icgrep-devel/UCD-scripts
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/UCD_config.py

    r5143 r5652  
    99
    1010UCD_output_dir = "generated"
     11
     12version = "Unknown"
  • icGREP/icgrep-devel/UCD-scripts/UCD_parser.py

    r5155 r5652  
    1111import UCD_config
    1212from unicode_set import *
     13
     14version_regexp = re.compile(".*Version\s+([0-9.]*)\s+of the Unicode Standard.*")
     15
     16def setVersionfromReadMe_txt():
     17    f = open(UCD_config.UCD_src_dir + "/" + 'ReadMe.txt')
     18    lines = f.readlines()
     19    for t in lines:
     20        m = version_regexp.match(t)
     21        if m:
     22            UCD_config.version = m.group(1)
     23            print "Version %s" % m.group(1)
    1324
    1425trivial_name_char_re = re.compile('[-_\s]')
     
    289300   return data_records
    290301
     302#  Parse a decomposition mapping field in one of two forms:
     303#  (a) compatibility mappings:  "<" decomp_type:[A-Za-z]* ">" {codepoint}
     304#  (b) canonical mappings:  {codepoint} 
     305compatibility_regexp = re.compile("^<([^>]*)>\s*([0-9A-F ]*)$")
     306codepoints_regexp = re.compile("^[0-9A-F]{4,6}(?: +[0-9A-F]{4,6})*$")
     307def parse_decomposition(s):
     308    m = compatibility_regexp.match(s)
     309    if m:
     310        decomp_type = m.group(1)
     311        mapping = m.group(2)
     312    else:
     313        decomp_type = "Canonical"
     314        mapping = s
     315    m = codepoints_regexp.match(mapping)
     316    if not m: raise Exception("Bad codepoint string syntax in parse_decomposition: %s" % mapping)
     317    cps = [int(x, 16) for x in mapping.split(" ")]
     318    return (decomp_type, cps)
     319
  • icGREP/icgrep-devel/UCD-scripts/UCD_properties.py

    r5159 r5652  
    1010#
    1111#
    12 import re, string, os.path, cformat
     12import re, string, os.path, cformat, UCD_config
    1313from unicode_set import *
    1414from UCD_parser import *
     
    208208        cformat.write_imports(f, ['"%s.h"' % fname for fname in self.property_data_headers])
    209209        f.write("\nnamespace UCD {\n")
     210        f.write("   const std::string UnicodeVersion = \"%s\";\n" % UCD_config.version)
    210211        objlist = []
    211212        for p in self.property_enum_name_list:
     
    227228
    228229def UCD_main():
     230    setVersionfromReadMe_txt()
     231   
    229232    ucd = UCD_generator()
    230233
Note: See TracChangeset for help on using the changeset viewer.