Ignore:
Timestamp:
Sep 8, 2014, 8:58:26 PM (5 years ago)
Author:
cameron
Message:

parse UCsets from UCD format; print UCsets in C format

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/unicode_set.py

    r4135 r4142  
    5353        self.quads.append(q)
    5454
     55   # printing
     56   def showC(self, name, indent = 8, entries_per_line = 4):
     57      runtype = {-1:"Full", 0:"Empty", 1: "Mixed"}
     58      setrep = (" " * indent) + ("%s.runs = {" % name)
     59      setrep += '{%s, %i}' % (runtype[self.runs[0][0]], self.runs[0][1])
     60      for i in range(1, len(self.runs)):
     61         setrep += ', '
     62         if i % entries_per_line == 0: setrep += "\n" + (" " * (indent+1))
     63         setrep += '{%s, %i}' % (runtype[self.runs[i][0]], self.runs[i][1])
     64      setrep += '};\n'
     65      setrep += (" " * indent) + ("%s.quads = {" % name)
     66      if self.quads != []:
     67         setrep += "%#018x" % self.quads[0]
     68         for i in range(1, len(self.quads)):
     69            setrep += ', '
     70            if i % entries_per_line == 0: setrep += "\n" + (" " * (indent+1))
     71            setrep += "%#018x" % (self.quads[i])
     72      setrep += '};\n'
     73      return setrep   
     74
    5575
    5676# helper
     
    7090   e.quads = []
    7191   e.quad_count = UnicodeQuadCount
     92   return e
    7293
    7394def singleton_set(codepoint):
     
    248269   return iset
    249270
    250 
     271UCD_point_regexp = re.compile("^([0-9A-F]{4,6})\s+;")
     272UCD_range_regexp = re.compile("^([0-9A-F]{4,6})[.][.]([0-9A-F]{4,6})\s+;")
     273
     274def parse_UCD_set(lines):
     275    pset = empty_set()
     276    for t in lines:
     277        m = UCD_point_regexp.match(t)
     278        if m:
     279            point = m.group(1)
     280            pval = int(point, 16)
     281            pset = union(pset, singleton_set(pval))
     282        m = UCD_range_regexp.match(t)
     283        if m:
     284            point1 = m.group(1)
     285            point2 = m.group(2)
     286            pval1 = int(point1, 16)
     287            pval2 = int(point2, 16)
     288            pset = union(pset, make_range_set(pval1, pval2))
     289    return pset
     290
     291def parse_UCD_file(fname, vname):
     292    f = open(fname)
     293    lines = f.readlines()
     294    f.close()
     295    s = parse_UCD_set(lines)
     296    print s.showC(vname)
     297
     298
     299
     300
Note: See TracChangeset for help on using the changeset viewer.