Changeset 4178 for proto


Ignore:
Timestamp:
Sep 19, 2014, 9:35:09 AM (4 years ago)
Author:
cameron
Message:

Restructure for compatability with C++ version; uset_member

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/UCD/unicode_set.py

    r4177 r4178  
    2424Mixed = 1
    2525
    26 default_log_2_quad_bits = 6
     26default_log2_quad_bits = 5
     27
     28log2_quad_bits = default_log2_quad_bits
     29quad_bits = 1 << log2_quad_bits
     30mod_quad_bit_mask = quad_bits - 1
     31UnicodeQuadCount = 0x110000 / quad_bits #  2**log2_quad_bits codepoints per quad
     32FullQuadMask = (1<<(quad_bits)) - 1
     33run_bytes = 2
     34
    2735
    2836class UCset:
    29    def __init__(self, log2_quad_bits = default_log_2_quad_bits):
     37   def __init__(self):
    3038      self.runs = []
    3139      self.quads = []
    3240      self.quad_count = 0
    33       self.run_bytes = 2
    34       self.log2_quad_bits = log2_quad_bits
    35       self.quad_bits = 1 << log2_quad_bits
    36       self.mod_quad_bit_mask = self.quad_bits - 1
    37       self.UnicodeQuadCount = 0x110000 / self.quad_bits #  2**log2_quad_bits codepoints per quad
    38       self.FullQuadMask = (1<<(self.quad_bits)) - 1
    3941     
    4042   # internal methods
     
    5355      if q == 0:
    5456        self.append_run(Empty, 1)
    55       elif q & self.FullQuadMask == self.FullQuadMask:
     57      elif q & FullQuadMask == FullQuadMask:
    5658        self.append_run(Full, 1)
    5759      else:
     
    6163   # printing
    6264   def showC(self, name, indent = 8, entries_per_line = 4):
    63       hex_specifier =  "%%#0%ix" % (self.quad_bits/4 + 2)
     65      hex_specifier =  "%%#0%ix" % (quad_bits/4 + 2)
    6466      runtype = {-1:"Full", 0:"Empty", 1: "Mixed"}
    6567      setrep = (" " * indent) + ("%s.runs = {" % name)
     
    8385
    8486   def bytes(self):
    85        return (len(self.runs) * self.run_bytes) + (len(self.quads) * self.quad_bits/8)
     87       return (len(self.runs) * run_bytes) + (len(self.quads) * quad_bits/8)
    8688
    8789
     
    8991# Set Operations
    9092#
    91 def empty_set(log2_quad_bits = default_log_2_quad_bits):
    92    e = UCset(log2_quad_bits)
    93    e.runs = [(Empty, e.UnicodeQuadCount)]
     93def empty_uset():
     94   e = UCset()
     95   e.runs = [(Empty, UnicodeQuadCount)]
    9496   e.quads = []
    95    e.quad_count = e.UnicodeQuadCount
     97   e.quad_count = UnicodeQuadCount
    9698   return e
    9799
    98 def singleton_set(codepoint, log2_quad_bits = default_log_2_quad_bits):
    99    e = UCset(log2_quad_bits)
     100def singleton_uset(codepoint):
     101   e = UCset()
    100102   quad_no = codepoint >> log2_quad_bits
    101    quad_val = 1 << (codepoint & e.mod_quad_bit_mask)
     103   quad_val = 1 << (codepoint & mod_quad_bit_mask)
    102104   if quad_no > 0: e.append_run(Empty, quad_no)
    103105   e.append_run(Mixed, 1)
    104106   e.quads = [quad_val]
    105    if quad_no < e.UnicodeQuadCount - 1: e.append_run(Empty, e.UnicodeQuadCount - (quad_no + 1))
    106    e.quad_count = e.UnicodeQuadCount
     107   if quad_no < UnicodeQuadCount - 1: e.append_run(Empty, UnicodeQuadCount - (quad_no + 1))
     108   e.quad_count = UnicodeQuadCount
    107109   return e
    108110
    109 def make_range_set(lo_codepoint, hi_codepoint, log2_quad_bits = default_log_2_quad_bits):
    110    e = UCset(log2_quad_bits)
    111    lo_quad_no = lo_codepoint >> e.log2_quad_bits   
    112    hi_quad_no = hi_codepoint >> e.log2_quad_bits
    113    lo_offset = lo_codepoint & e.mod_quad_bit_mask
    114    hi_offset = hi_codepoint & e.mod_quad_bit_mask
     111def range_uset(lo_codepoint, hi_codepoint):
     112   e = UCset()
     113   lo_quad_no = lo_codepoint >> log2_quad_bits   
     114   hi_quad_no = hi_codepoint >> log2_quad_bits
     115   lo_offset = lo_codepoint & mod_quad_bit_mask
     116   hi_offset = hi_codepoint & mod_quad_bit_mask
    115117   if lo_quad_no > 0:  e.append_run(Empty, lo_quad_no)
    116118   if lo_quad_no == hi_quad_no:
    117       quad = (e.FullQuadMask << lo_offset) & (e.FullQuadMask >> (e.quad_bits - 1 - hi_offset))
     119      quad = (FullQuadMask << lo_offset) & (FullQuadMask >> (quad_bits - 1 - hi_offset))
    118120      e.append_quad(quad)
    119121   else:
    120       e.append_quad((e.FullQuadMask << lo_offset) & e.FullQuadMask)
     122      e.append_quad((FullQuadMask << lo_offset) & FullQuadMask)
    121123      e.append_run(Full, hi_quad_no - (lo_quad_no + 1))
    122       e.append_quad((e.FullQuadMask >> (e.quad_bits - 1 - hi_offset)) & e.FullQuadMask)
    123    if hi_quad_no < e.UnicodeQuadCount - 1: e.append_run(Empty, e.UnicodeQuadCount - (hi_quad_no + 1))
     124      e.append_quad((FullQuadMask >> (quad_bits - 1 - hi_offset)) & FullQuadMask)
     125   if hi_quad_no < UnicodeQuadCount - 1: e.append_run(Empty, UnicodeQuadCount - (hi_quad_no + 1))
    124126   return e
    125127
     
    159161               if this_run_type == Mixed: self.quad_no += remain
    160162               n -= remain
     163
     164
     165def uset_member(s, codepoint):
     166   quad_no = codepoint / quad_bits
     167   quad_val = 1 << (codepoint & mod_quad_bit_mask)
     168   it = Uset_Iterator(s)   
     169   it.advance(quad_no)
     170   return (it.get_quad() & quad_val) != 0
    161171 
    162 def complement (s):
    163    assert s.quad_count == s.UnicodeQuadCount
    164    iset = UCset(s.log2_quad_bits)
     172def uset_complement (s):
     173   assert s.quad_count == UnicodeQuadCount
     174   iset = UCset()
    165175   it = Uset_Iterator(s)
    166    R = s.runs
    167    Q = s.quads
    168176   while not it.at_end():
    169177      (runtype, n) = it.current_run()
     
    181189
    182190
    183 def intersect (s1, s2):
    184    assert s1.quad_count == s1.UnicodeQuadCount
    185    assert s2.quad_count == s1.UnicodeQuadCount
     191def uset_intersection (s1, s2):
     192   assert s1.quad_count == UnicodeQuadCount
     193   assert s2.quad_count == UnicodeQuadCount
    186194   iset = UCset()
    187195   i1 = Uset_Iterator(s1)
     
    216224   return iset
    217225
    218 def union (s1, s2):
    219    assert s1.quad_count == s1.UnicodeQuadCount
    220    assert s2.quad_count == s1.UnicodeQuadCount
     226def uset_union (s1, s2):
     227   assert s1.quad_count == UnicodeQuadCount
     228   assert s2.quad_count == UnicodeQuadCount
    221229   iset = UCset()
    222230   i1 = Uset_Iterator(s1)
     
    251259   return iset
    252260
    253 def difference (s1, s2):
    254    assert s1.quad_count == s1.UnicodeQuadCount
    255    assert s2.quad_count == s1.UnicodeQuadCount
     261def uset_difference (s1, s2):
     262   assert s1.quad_count == UnicodeQuadCount
     263   assert s2.quad_count == UnicodeQuadCount
    256264   iset = UCset()
    257265   i1 = Uset_Iterator(s1)
     
    286294   return iset
    287295
    288 def symmetric_difference (s1, s2):
    289    assert s1.quad_count == s1.UnicodeQuadCount
    290    assert s2.quad_count == s1.UnicodeQuadCount
     296def uset_symmetric_difference (s1, s2):
     297   assert s1.quad_count == UnicodeQuadCount
     298   assert s2.quad_count == UnicodeQuadCount
    291299   iset = UCset()
    292300   i1 = Uset_Iterator(s1)
     
    298306      if s1_type == Empty and s2_type == Full or s1_type == Full and s2_type == Empty:
    299307         iset.append_run(Full, n)
     308         i1.advance(n)
     309         i2.advance(n)
    300310      elif s1_type == Full and s2_type == Full or s1_type == Empty and s2_type == Empty:
    301311         iset.append_run(Empty, n)
     312         i1.advance(n)
     313         i2.advance(n)
    302314      elif s1_type == Empty:
    303315         for i in range(n):
Note: See TracChangeset for help on using the changeset viewer.