Changeset 4948


Ignore:
Timestamp:
Mar 2, 2016, 5:45:41 PM (3 years ago)
Author:
cameron
Message:

Generator for UnicodeNameData?.cpp

Location:
proto/charsetcompiler/UCD
Files:
1 added
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/UCD_parser.py

    r4743 r4948  
    282282
    283283
     284UnicodeData_txt_regexp = re.compile("^([0-9A-F]{4,6});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);(.*)$")
     285
     286def parse_UnicodeData_txt():
     287   data_records = []
     288   f = open(UCD_dir + "/UnicodeData.txt")
     289   lines = f.readlines()
     290   for t in lines:
     291      if UCD_skip.match(t):
     292        continue  # skip comment and blank lines
     293      m = UnicodeData_txt_regexp.match(t)
     294      if not m: raise Exception("Unknown syntax: %s" % t)
     295      (cp, name, gc) = (m.group(1), m.group(2), m.group(3))
     296      (ccc, bidic, decomp, bidim) = (m.group(4), m.group(5), m.group(6), m.group(10))
     297      (decval, digitval, numval) = (m.group(7), m.group(8), m.group(9))
     298      # Unicode 1 name and ISO comment are obolete
     299      (uc, lc, tc) = (m.group(13), m.group(14), m.group(15))
     300      data_records.append((cp, name, gc, ccc, bidic, decomp, decval, digitval, numval, bidim, uc, lc, tc))
     301   return data_records
     302
Note: See TracChangeset for help on using the changeset viewer.