Changeset 4176 for proto


Ignore:
Timestamp:
Sep 16, 2014, 4:36:24 PM (4 years ago)
Author:
cameron
Message:

Parameterize on quad size

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/unicode_set.py

    r4146 r4176  
    1919# (c) Explicitly listing all the quads of Mixed type.
    2020#
    21 UnicodeQuadCount = 0x110000 >> 6 #  2**6 codepoints per quad
    2221
    2322Empty = 0
     
    2524Mixed = 1
    2625
    27 FullQuad = (1<<64) - 1
     26default_log_2_quad_bits = 6
    2827
    2928class UCset:
    30    def __init__(self):
     29   def __init__(self, log2_quad_bits = default_log_2_quad_bits):
    3130      self.runs = []
    3231      self.quads = []
    3332      self.quad_count = 0
     33      self.run_bytes = 2
     34      self.log2_quad_bits = log2_quad_bits
     35      self.quad_bits = 1 << log2_quad_bits
     36      self.mod_quad_bit_mask = self.quad_bits - 1
     37      self.UnicodeQuadCount = 0x110000 / self.quad_bits #  2**log2_quad_bits codepoints per quad
     38      self.FullQuadMask = (1<<(self.quad_bits)) - 1
     39     
    3440   # internal methods
    3541   def append_run(self, runtype, runlength):
     
    4753      if q == 0:
    4854        self.append_run(Empty, 1)
    49       elif q & FullQuad == FullQuad:
     55      elif q & self.FullQuadMask == self.FullQuadMask:
    5056        self.append_run(Full, 1)
    5157      else:
     
    5561   # printing
    5662   def showC(self, name, indent = 8, entries_per_line = 4):
     63      hex_specifier =  "%%#0%ix" % (self.quad_bits/4 + 2)
    5764      runtype = {-1:"Full", 0:"Empty", 1: "Mixed"}
    5865      setrep = (" " * indent) + ("%s.runs = {" % name)
     
    6774      if len(self.quads) >= entries_per_line: setrep += "\n" + (" " * (indent+1))
    6875      if self.quads != []:
    69          setrep += "%#018x" % self.quads[0]
     76         setrep += hex_specifier % self.quads[0]
    7077         for i in range(1, len(self.quads)):
    7178            setrep += ', '
    7279            if i % entries_per_line == 0: setrep += "\n" + (" " * (indent+1))
    73             setrep += "%#018x" % (self.quads[i])
     80            setrep += hex_specifier % (self.quads[i])
    7481      setrep += '};\n'
    75       return setrep   
     82      return setrep
     83
     84   def bytes(self):
     85       return (len(self.runs) * self.run_bytes) + (len(self.quads) * self.quad_bits/8)
    7686
    7787
     
    8797# Set Operations
    8898#
    89 def empty_set():
    90    e = UCset()
     99def empty_set(log2_quad_bits = default_log_2_quad_bits):
     100   e = UCset(log2_quad_bits)
    91101   e.runs = [(Empty, UnicodeQuadCount)]
    92102   e.quads = []
     
    94104   return e
    95105
    96 def singleton_set(codepoint):
    97    e = UCset()
    98    quad_no = codepoint >> 6
    99    quad_val = 1 << (codepoint & 0x3F)
     106def singleton_set(codepoint, log2_quad_bits = default_log_2_quad_bits):
     107   e = UCset(log2_quad_bits)
     108   quad_no = codepoint >> log2_quad_bits
     109   quad_val = 1 << (codepoint & e.mod_quad_bit_mask)
    100110   if quad_no > 0: e.append_run(Empty, quad_no)
    101111   e.append_run(Mixed, 1)
    102112   e.quads = [quad_val]
    103    if quad_no < UnicodeQuadCount - 1: e.append_run(Empty, UnicodeQuadCount - (quad_no + 1))
    104    e.quad_count = UnicodeQuadCount
     113   if quad_no < e.UnicodeQuadCount - 1: e.append_run(Empty, e.UnicodeQuadCount - (quad_no + 1))
     114   e.quad_count = e.UnicodeQuadCount
    105115   return e
    106116
    107 def make_range_set(lo_codepoint, hi_codepoint):
    108    e = UCset()
    109    lo_quad_no = lo_codepoint >> 6   
    110    hi_quad_no = hi_codepoint >> 6
    111    lo_offset = lo_codepoint & 0x3F
    112    hi_offset = hi_codepoint & 0x3F
     117def make_range_set(lo_codepoint, hi_codepoint, log2_quad_bits = default_log_2_quad_bits):
     118   e = UCset(log2_quad_bits)
     119   lo_quad_no = lo_codepoint >> e.log2_quad_bits   
     120   hi_quad_no = hi_codepoint >> e.log2_quad_bits
     121   lo_offset = lo_codepoint & e.mod_quad_bit_mask
     122   hi_offset = hi_codepoint & e.mod_quad_bit_mask
    113123   if lo_quad_no > 0:  e.append_run(Empty, lo_quad_no)
    114124   if lo_quad_no == hi_quad_no:
    115       quad = (FullQuad << lo_offset) & (FullQuad >> (63 - hi_offset))
     125      quad = (e.FullQuadMask << lo_offset) & (e.FullQuadMask >> (e.quad_bits - 1 - hi_offset))
    116126      e.append_quad(quad)
    117127   else:
    118       e.append_quad((FullQuad << lo_offset) & FullQuad)
     128      e.append_quad((e.FullQuadMask << lo_offset) & e.FullQuadMask)
    119129      e.append_run(Full, hi_quad_no - (lo_quad_no + 1))
    120       e.append_quad((FullQuad >> (63 - hi_offset)) & FullQuad)
    121    if hi_quad_no < UnicodeQuadCount - 1: e.append_run(Empty, UnicodeQuadCount - (hi_quad_no + 1))
     130      e.append_quad((e.FullQuadMask >> (e.quad_bits - 1 - hi_offset)) & e.FullQuadMask)
     131   if hi_quad_no < e.UnicodeQuadCount - 1: e.append_run(Empty, e.UnicodeQuadCount - (hi_quad_no + 1))
    122132   return e
    123133
    124134
    125135def complement (s):
    126    assert s.quad_count == UnicodeQuadCount
    127    iset = UCset()
     136   assert s.quad_count == s.UnicodeQuadCount
     137   iset = UCset(s.log2_quad_bits)
    128138   R = s.runs
    129139   Q = s.quads
     
    135145         iset.append_run(Empty, n)
    136146      else:
    137          iset.append_mixed_run(n, [FullQuad ^ q for q in Q[0:n]])
     147         iset.append_mixed_run(n, [s.FullQuadMask ^ q for q in Q[0:n]])
    138148         Q = Q[n:]
    139149      R = advance_run_list(R, n)
     
    141151
    142152def intersect (s1, s2):
    143    assert s1.quad_count == UnicodeQuadCount
    144    assert s2.quad_count == UnicodeQuadCount
     153   assert s1.quad_count == s1.UnicodeQuadCount
     154   assert s2.quad_count == s1.UnicodeQuadCount
    145155   iset = UCset()
    146156   r1 = s1.runs
     
    172182
    173183def union (s1, s2):
    174    assert s1.quad_count == UnicodeQuadCount
    175    assert s2.quad_count == UnicodeQuadCount
     184   assert s1.quad_count == s1.UnicodeQuadCount
     185   assert s2.quad_count == s1.UnicodeQuadCount
    176186   iset = UCset()
    177187   r1 = s1.runs
     
    203213
    204214def difference (s1, s2):
    205    assert s1.quad_count == UnicodeQuadCount
    206    assert s2.quad_count == UnicodeQuadCount
     215   assert s1.quad_count == s1.UnicodeQuadCount
     216   assert s2.quad_count == s1.UnicodeQuadCount
    207217   iset = UCset()
    208218   r1 = s1.runs
     
    222232         q1 = q1[n:]
    223233      elif s1_type == Full:
    224          iset.append_mixed_run(n, [FullQuad ^ q for q in q2[0:n]])
     234         iset.append_mixed_run(n, [s1.FullQuadMask ^ q for q in q2[0:n]])
    225235         q2 = q2[n:]
    226236      else: # both s1 and s2 have mixed blocks; form block-by-block difference
     
    235245
    236246def symmetric_difference (s1, s2):
    237    assert s1.quad_count == UnicodeQuadCount
    238    assert s2.quad_count == UnicodeQuadCount
     247   assert s1.quad_count == s1.UnicodeQuadCount
     248   assert s2.quad_count == s1.UnicodeQuadCount
    239249   iset = UCset()
    240250   r1 = s1.runs
     
    257267         q1 = q1[n:]
    258268      elif s1_type == Full:
    259          iset.append_mixed_run(n, [FullQuad ^ q for q in q2[0:n]])
     269         iset.append_mixed_run(n, [s1.FullQuadMask ^ q for q in q2[0:n]])
    260270         q2 = q2[n:]
    261271      elif s2_type == Full:
    262          iset.append_mixed_run(n, [FullQuad ^ q for q in q1[0:n]])
     272         iset.append_mixed_run(n, [s1.FullQuadMask ^ q for q in q1[0:n]])
    263273         q1 = q1[n:]
    264274      else: # both s1 and s2 have mixed blocks; form block-by-block symmetric difference
Note: See TracChangeset for help on using the changeset viewer.