Ignore:
Timestamp:
Nov 28, 2017, 1:48:14 AM (20 months ago)
Author:
nmedfort
Message:

updated UCD python scripts

File:
1 edited

Legend:

Unmodified
Added
Removed
  • icGREP/icgrep-devel/UCD-scripts/unicode_set.py

    r5653 r5749  
    77#
    88# Licensed under Open Software License 3.0.
    9 import re, cformat
     9import cformat
     10import re
     11
    1012#
    1113# Unicode Sparse Bitset Representation
     
    2830quad_bits = 1 << log2_quad_bits
    2931mod_quad_bit_mask = quad_bits - 1
    30 UnicodeQuadCount = int(0x110000 / quad_bits) # 2**log2_quad_bits codepoints per quad
    31 FullQuadMask = (1<<(quad_bits)) - 1
     32UnicodeQuadCount = int(0x110000 / quad_bits)  # 2**log2_quad_bits codepoints per quad
     33FullQuadMask = (1 << (quad_bits)) - 1
    3234run_bytes = 4
    3335
    3436
    3537class UCset:
    36    def __init__(self):
    37       self.runs = []
    38       self.quads = []
    39       self.quad_count = 0
    40      
    41    # internal methods
    42    def append_run(self, runtype, runlength):
    43       if runlength == 0: return
    44       if self.runs == []:  self.runs = [(runtype, runlength)]
    45       else:
    46          (lastruntype, lastrunlength) = self.runs[-1]
    47          if lastruntype == runtype:  self.runs[-1] = (runtype, lastrunlength + runlength)
    48          else: self.runs.append((runtype, runlength))
    49       self.quad_count += runlength
    50    def append_mixed_run(self, n, quadlist):
    51       self.append_run(Mixed, n)
    52       self.quads += quadlist
    53    def append_quad(self, q):
    54       if q == 0:
    55         self.append_run(Empty, 1)
    56       elif q & FullQuadMask == FullQuadMask:
    57         self.append_run(Full, 1)
    58       else:
    59         self.append_run(Mixed, 1)
    60         self.quads.append(q)
    61 
    62    # printing
    63    def showC(self, indent = 4):
    64       hex_specifier =  "%%#0%ix" % (int(quad_bits/4) + 2)
    65       runtype = {-1:"Full", 0:"Empty", 1: "Mixed"}
    66       formatted_runs = ['{%s, %i}' % (runtype[r[0]], r[1]) for r in self.runs]
    67       formatted_quads = [hex_specifier % q for q in self.quads]
    68       setrep = (" " * indent) + "{{"
    69       setrep += cformat.multiline_fill(formatted_runs, ',', indent+2)
    70       setrep += '},\n'
    71       setrep += (" " * indent) + " {"
    72       setrep += cformat.multiline_fill(formatted_quads, ',', indent+2)
    73       setrep += '}}'
    74       return setrep
    75 
    76    def bytes(self):
    77        return (len(self.runs) * run_bytes) + (len(self.quads) * int(quad_bits/8))
     38    def __init__(self):
     39        self.runs = []
     40        self.quads = []
     41
     42    # internal methods
     43    def append_run(self, runtype, runlength):
     44        if runlength == 0: return
     45        if self.runs == []:
     46            self.runs = [(runtype, runlength)]
     47        else:
     48            (lastruntype, lastrunlength) = self.runs[-1]
     49            if lastruntype == runtype:
     50                self.runs[-1] = (runtype, lastrunlength + runlength)
     51            else:
     52                self.runs.append((runtype, runlength))
     53
     54    def append_quad(self, q):
     55        if q == 0:
     56            self.append_run(Empty, 1)
     57        elif (q & FullQuadMask) == FullQuadMask:
     58            self.append_run(Full, 1)
     59        else:
     60            self.append_run(Mixed, 1)
     61            self.quads.append(q)
     62
     63    # printing
     64    def generate(self, propertyName, indent=4):
     65        hex_specifier = "%%#0%ix" % (int(quad_bits / 4) + 2)
     66        runtype = {-1: "Full", 0: "Empty", 1: "Mixed"}
     67
     68        str = "\n" + (" " * indent) + "namespace {\n" + \
     69              (" " * indent) + "const static UnicodeSet::run_t __%s_runs[] = {\n" % propertyName + \
     70              (" " * indent) + cformat.multiline_fill(['{%s, %i}' % (runtype[r[0]], r[1]) for r in self.runs], ',',
     71                                                      indent) + \
     72              "};\n"
     73
     74        if len(self.quads) == 0:
     75            str += (" " * indent) + "const static UnicodeSet::bitquad_t * const __%s_quads = nullptr;\n" % propertyName
     76        else:
     77            str += (" " * indent) + "const static UnicodeSet::bitquad_t  __%s_quads[] = {\n" % propertyName + \
     78                   (" " * indent) + cformat.multiline_fill([hex_specifier % q for q in self.quads], ',', indent) + \
     79                   "};\n"
     80
     81        # Despite being const_cast below, neither runs nor quads will be modified by the UnicodeSet. If any
     82        # modifications are made, they first test the run/quad capacity and will observe that they 0 length
     83        # and allocate heap memory to make any changes
     84
     85        str += (" " * indent) + "}\n\n" + \
     86               (" " * indent) + \
     87               "const static UnicodeSet %s{const_cast<UnicodeSet::run_t *>(__%s_runs), %i, 0, " \
     88               "const_cast<UnicodeSet::bitquad_t *>(__%s_quads), %i, 0};\n\n" \
     89               % (propertyName, propertyName, len(self.runs), propertyName, len(self.quads))
     90
     91        return str
     92
     93    def bytes(self):
     94        return (len(self.runs) * run_bytes) + (len(self.quads) * int(quad_bits / 8))
    7895
    7996
     
    8299#
    83100def empty_uset():
    84    e = UCset()
    85    e.runs = [(Empty, UnicodeQuadCount)]
    86    e.quads = []
    87    e.quad_count = UnicodeQuadCount
    88    return e
     101    e = UCset()
     102    e.runs = [(Empty, UnicodeQuadCount)]
     103    e.quads = []
     104    return e
     105
    89106
    90107def singleton_uset(codepoint):
    91    e = UCset()
    92    quad_no = codepoint >> log2_quad_bits
    93    quad_val = 1 << (codepoint & mod_quad_bit_mask)
    94    if quad_no > 0: e.append_run(Empty, quad_no)
    95    e.append_run(Mixed, 1)
    96    e.quads = [quad_val]
    97    if quad_no < UnicodeQuadCount - 1: e.append_run(Empty, UnicodeQuadCount - (quad_no + 1))
    98    e.quad_count = UnicodeQuadCount
    99    return e
     108    e = UCset()
     109    quad_no = codepoint >> log2_quad_bits
     110    quad_val = 1 << (codepoint & mod_quad_bit_mask)
     111    if quad_no > 0: e.append_run(Empty, quad_no)
     112    e.append_run(Mixed, 1)
     113    e.quads = [quad_val]
     114    if quad_no < UnicodeQuadCount - 1:
     115        e.append_run(Empty, UnicodeQuadCount - (quad_no + 1))
     116    return e
     117
    100118
    101119def range_uset(lo_codepoint, hi_codepoint):
    102    e = UCset()
    103    lo_quad_no = lo_codepoint >> log2_quad_bits   
    104    hi_quad_no = hi_codepoint >> log2_quad_bits
    105    lo_offset = lo_codepoint & mod_quad_bit_mask
    106    hi_offset = hi_codepoint & mod_quad_bit_mask
    107    if lo_quad_no > 0:  e.append_run(Empty, lo_quad_no)
    108    if lo_quad_no == hi_quad_no:
    109       quad = (FullQuadMask << lo_offset) & (FullQuadMask >> (quad_bits - 1 - hi_offset))
    110       e.append_quad(quad)
    111    else:
    112       e.append_quad((FullQuadMask << lo_offset) & FullQuadMask)
    113       e.append_run(Full, hi_quad_no - (lo_quad_no + 1))
    114       e.append_quad((FullQuadMask >> (quad_bits - 1 - hi_offset)) & FullQuadMask)
    115    if hi_quad_no < UnicodeQuadCount - 1: e.append_run(Empty, UnicodeQuadCount - (hi_quad_no + 1))
    116    return e
     120    e = UCset()
     121    lo_quad_no = lo_codepoint >> log2_quad_bits
     122    hi_quad_no = hi_codepoint >> log2_quad_bits
     123    lo_offset = lo_codepoint & mod_quad_bit_mask
     124    hi_offset = hi_codepoint & mod_quad_bit_mask
     125    if lo_quad_no > 0:  e.append_run(Empty, lo_quad_no)
     126    if lo_quad_no == hi_quad_no:
     127        quad = (FullQuadMask << lo_offset) & (FullQuadMask >> (quad_bits - 1 - hi_offset))
     128        e.append_quad(quad)
     129    else:
     130        e.append_quad((FullQuadMask << lo_offset) & FullQuadMask)
     131        e.append_run(Full, hi_quad_no - (lo_quad_no + 1))
     132        e.append_quad((FullQuadMask >> (quad_bits - 1 - hi_offset)) & FullQuadMask)
     133    if hi_quad_no < UnicodeQuadCount - 1:
     134        e.append_run(Empty, UnicodeQuadCount - (hi_quad_no + 1))
     135    return e
    117136
    118137
     
    123142        self.offset = 0
    124143        self.quad_no = 0
     144
    125145    def at_end(self):
    126146        return self.run_no == len(self.uSet.runs)
     147
    127148    def current_run(self):
    128149        (this_run_type, this_run_length) = self.uSet.runs[self.run_no]
    129150        return (this_run_type, this_run_length - self.offset)
     151
    130152    def get_quad(self):
    131153        (this_run_type, this_run_length) = self.uSet.runs[self.run_no]
    132         if this_run_type == Empty: return 0
    133         elif this_run_type == Full: return FullQuadMask
    134         else: return self.uSet.quads[self.quad_no]
     154        if this_run_type == Empty:
     155            return 0
     156        elif this_run_type == Full:
     157            return FullQuadMask
     158        else:
     159            return self.uSet.quads[self.quad_no]
     160
    135161    def advance(self, n):
    136162        while n > 0:
    137            (this_run_type, this_run_length) = self.uSet.runs[self.run_no]
    138            remain = this_run_length - self.offset
    139            if remain > n:
    140                self.offset += n
    141                if this_run_type == Mixed: self.quad_no += n
    142                n = 0
    143            elif remain == n:
    144                self.run_no += 1
    145                self.offset = 0
    146                if this_run_type == Mixed: self.quad_no += n
    147                n = 0
    148            else:
    149                self.run_no += 1
    150                self.offset = 0
    151                if this_run_type == Mixed: self.quad_no += remain
    152                n -= remain
     163            (this_run_type, this_run_length) = self.uSet.runs[self.run_no]
     164            remain = this_run_length - self.offset
     165            if remain > n:
     166                self.offset += n
     167                if this_run_type == Mixed: self.quad_no += n
     168                n = 0
     169            elif remain == n:
     170                self.run_no += 1
     171                self.offset = 0
     172                if this_run_type == Mixed: self.quad_no += n
     173                n = 0
     174            else:
     175                self.run_no += 1
     176                self.offset = 0
     177                if this_run_type == Mixed: self.quad_no += remain
     178                n -= remain
    153179
    154180
    155181def uset_member(s, codepoint):
    156    quad_no = int(codepoint / quad_bits)
    157    quad_val = 1 << (codepoint & mod_quad_bit_mask)
    158    it = Uset_Iterator(s)   
    159    it.advance(quad_no)
    160    return (it.get_quad() & quad_val) != 0
     182    quad_no = int(codepoint / quad_bits)
     183    quad_val = 1 << (codepoint & mod_quad_bit_mask)
     184    it = Uset_Iterator(s)
     185    it.advance(quad_no)
     186    return (it.get_quad() & quad_val) != 0
     187
    161188
    162189def uset_popcount(s):
     
    175202    return popcount
    176203
     204
    177205def popcount_quad(q):
    178206    c = 0
    179207    while q != 0:
    180         q = q & (q - 1) # clear low bit
     208        q = q & (q - 1)  # clear low bit
    181209        c += 1
    182210    return c
    183211
    184 def uset_complement (s):
    185    assert s.quad_count == UnicodeQuadCount
    186    iset = UCset()
    187    it = Uset_Iterator(s)
    188    while not it.at_end():
    189       (runtype, n) = it.current_run()
    190       if runtype == Empty:
    191          iset.append_run(Full, n)
    192          it.advance(n)
    193       elif runtype == Full:
    194          iset.append_run(Empty, n)
    195          it.advance(n)
    196       else:
    197          for i in range(n):
    198             iset.append_quad(FullQuadMask ^ it.get_quad())
    199             it.advance(1)
    200    return iset
    201 
    202 def uset_intersection (s1, s2):
    203    assert s1.quad_count == UnicodeQuadCount
    204    assert s2.quad_count == UnicodeQuadCount
    205    iset = UCset()
    206    i1 = Uset_Iterator(s1)
    207    i2 = Uset_Iterator(s2)
    208    while not i1.at_end():
    209       (s1_type, s1_length) = i1.current_run()
    210       (s2_type, s2_length) = i2.current_run()
    211       n = min(s1_length, s2_length)
    212       if s1_type == Empty or s2_type == Empty:
    213          iset.append_run(Empty, n)
    214          i1.advance(n)
    215          i2.advance(n)
    216       elif s1_type == Full and s2_type == Full:
    217          iset.append_run(Full, n)
    218          i1.advance(n)
    219          i2.advance(n)
    220       elif s1_type == Full:
    221          for i in range(n):
    222             iset.append_quad(i2.get_quad())
    223             i2.advance(1)
    224          i1.advance(n)
    225       elif s2_type == Full:
    226          for i in range(n):
    227             iset.append_quad(i1.get_quad())
    228             i1.advance(1)
    229          i2.advance(n)
    230       else: # both s1 and s2 have mixed blocks; form block-by-block intersection
    231          for i in range(n):
    232             iset.append_quad(i1.get_quad() & i2.get_quad())
    233             i1.advance(1)
    234             i2.advance(1)
    235    return iset
    236 
    237 def uset_union (s1, s2):
    238    assert s1.quad_count == UnicodeQuadCount
    239    assert s2.quad_count == UnicodeQuadCount
    240    iset = UCset()
    241    i1 = Uset_Iterator(s1)
    242    i2 = Uset_Iterator(s2)
    243    while not i1.at_end():
    244       (s1_type, s1_length) = i1.current_run()
    245       (s2_type, s2_length) = i2.current_run()
    246       n = min(s1_length, s2_length)
    247       if s1_type == Empty and s2_type == Empty:
    248          iset.append_run(Empty, n)
    249          i1.advance(n)
    250          i2.advance(n)
    251       elif s1_type == Full or s2_type == Full:
    252          iset.append_run(Full, n)
    253          i1.advance(n)
    254          i2.advance(n)
    255       elif s1_type == Empty:
    256          for i in range(n):
    257             iset.append_quad(i2.get_quad())
    258             i2.advance(1)
    259          i1.advance(n)
    260       elif s2_type == Empty:
    261          for i in range(n):
    262             iset.append_quad(i1.get_quad())
    263             i1.advance(1)
    264          i2.advance(n)
    265       else: # both s1 and s2 have mixed blocks; form block-by-block union
    266          for i in range(n):
    267             iset.append_quad(i1.get_quad() | i2.get_quad())
    268             i1.advance(1)
    269             i2.advance(1)
    270    return iset
    271 
    272 def uset_difference (s1, s2):
    273    assert s1.quad_count == UnicodeQuadCount
    274    assert s2.quad_count == UnicodeQuadCount
    275    iset = UCset()
    276    i1 = Uset_Iterator(s1)
    277    i2 = Uset_Iterator(s2)
    278    while not i1.at_end():
    279       (s1_type, s1_length) = i1.current_run()
    280       (s2_type, s2_length) = i2.current_run()
    281       n = min(s1_length, s2_length)
    282       if s1_type == Empty or s2_type == Full:
    283          iset.append_run(Empty, n)
    284          i1.advance(n)
    285          i2.advance(n)
    286       elif s1_type == Full and s2_type == Empty:
    287          iset.append_run(Full, n)
    288          i1.advance(n)
    289          i2.advance(n)
    290       elif s1_type == Full:
    291          for i in range(n):
    292             iset.append_quad(FullQuadMask ^ i2.get_quad())
    293             i2.advance(1)
    294          i1.advance(n)
    295       elif s2_type == Empty:
    296          for i in range(n):
    297             iset.append_quad(i1.get_quad())
    298             i1.advance(1)
    299          i2.advance(n)
    300       else: # both s1 and s2 have mixed blocks; form block-by-block union
    301          for i in range(n):
    302             iset.append_quad(i1.get_quad() &~ i2.get_quad())
    303             i1.advance(1)
    304             i2.advance(1)
    305    return iset
    306 
    307 def uset_symmetric_difference (s1, s2):
    308    assert s1.quad_count == UnicodeQuadCount
    309    assert s2.quad_count == UnicodeQuadCount
    310    iset = UCset()
    311    i1 = Uset_Iterator(s1)
    312    i2 = Uset_Iterator(s2)
    313    while not i1.at_end():
    314       (s1_type, s1_length) = i1.current_run()
    315       (s2_type, s2_length) = i2.current_run()
    316       n = min(s1_length, s2_length)
    317       if s1_type == Empty and s2_type == Full or s1_type == Full and s2_type == Empty:
    318          iset.append_run(Full, n)
    319          i1.advance(n)
    320          i2.advance(n)
    321       elif s1_type == Full and s2_type == Full or s1_type == Empty and s2_type == Empty:
    322          iset.append_run(Empty, n)
    323          i1.advance(n)
    324          i2.advance(n)
    325       elif s1_type == Empty:
    326          for i in range(n):
    327             iset.append_quad(i2.get_quad())
    328             i2.advance(1)
    329          i1.advance(n)
    330       elif s2_type == Empty:
    331          for i in range(n):
    332             iset.append_quad(i1.get_quad())
    333             i1.advance(1)
    334          i2.advance(n)
    335       elif s1_type == Full:
    336          for i in range(n):
    337             iset.append_quad(FullQuadMask ^ i2.get_quad())
    338             i2.advance(1)
    339          i1.advance(n)
    340       elif s2_type == Full:
    341          for i in range(n):
    342             iset.append_quad(FullQuadMask ^ i1.get_quad())
    343             i1.advance(1)
    344          i2.advance(n)
    345       else: # both s1 and s2 have mixed blocks; form block-by-block union
    346          for i in range(n):
    347             iset.append_quad(i1.get_quad() ^ i2.get_quad())
    348             i1.advance(1)
    349             i2.advance(1)
    350    return iset
     212
     213def uset_complement(s):
     214    iset = UCset()
     215    it = Uset_Iterator(s)
     216    while not it.at_end():
     217        (runtype, n) = it.current_run()
     218        if runtype == Empty:
     219            iset.append_run(Full, n)
     220            it.advance(n)
     221        elif runtype == Full:
     222            iset.append_run(Empty, n)
     223            it.advance(n)
     224        else:
     225            for i in range(n):
     226                iset.append_quad(FullQuadMask ^ it.get_quad())
     227                it.advance(1)
     228    return iset
     229
     230
     231def uset_intersection(s1, s2):
     232    iset = UCset()
     233    i1 = Uset_Iterator(s1)
     234    i2 = Uset_Iterator(s2)
     235    while not i1.at_end():
     236        (s1_type, s1_length) = i1.current_run()
     237        (s2_type, s2_length) = i2.current_run()
     238        n = min(s1_length, s2_length)
     239        if s1_type == Empty or s2_type == Empty:
     240            iset.append_run(Empty, n)
     241            i1.advance(n)
     242            i2.advance(n)
     243        elif s1_type == Full and s2_type == Full:
     244            iset.append_run(Full, n)
     245            i1.advance(n)
     246            i2.advance(n)
     247        elif s1_type == Full:
     248            for i in range(n):
     249                iset.append_quad(i2.get_quad())
     250                i2.advance(1)
     251            i1.advance(n)
     252        elif s2_type == Full:
     253            for i in range(n):
     254                iset.append_quad(i1.get_quad())
     255                i1.advance(1)
     256            i2.advance(n)
     257        else:  # both s1 and s2 have mixed blocks; form block-by-block intersection
     258            for i in range(n):
     259                iset.append_quad(i1.get_quad() & i2.get_quad())
     260                i1.advance(1)
     261                i2.advance(1)
     262    return iset
     263
     264
     265def uset_union(s1, s2):
     266    iset = UCset()
     267    i1 = Uset_Iterator(s1)
     268    i2 = Uset_Iterator(s2)
     269    while not i1.at_end():
     270        (s1_type, s1_length) = i1.current_run()
     271        (s2_type, s2_length) = i2.current_run()
     272        n = min(s1_length, s2_length)
     273        if s1_type == Empty and s2_type == Empty:
     274            iset.append_run(Empty, n)
     275            i1.advance(n)
     276            i2.advance(n)
     277        elif s1_type == Full or s2_type == Full:
     278            iset.append_run(Full, n)
     279            i1.advance(n)
     280            i2.advance(n)
     281        elif s1_type == Empty:
     282            for i in range(n):
     283                iset.append_quad(i2.get_quad())
     284                i2.advance(1)
     285            i1.advance(n)
     286        elif s2_type == Empty:
     287            for i in range(n):
     288                iset.append_quad(i1.get_quad())
     289                i1.advance(1)
     290            i2.advance(n)
     291        else:  # both s1 and s2 have mixed blocks; form block-by-block union
     292            for i in range(n):
     293                iset.append_quad(i1.get_quad() | i2.get_quad())
     294                i1.advance(1)
     295                i2.advance(1)
     296    return iset
     297
     298
     299def uset_difference(s1, s2):
     300    iset = UCset()
     301    i1 = Uset_Iterator(s1)
     302    i2 = Uset_Iterator(s2)
     303    while not i1.at_end():
     304        (s1_type, s1_length) = i1.current_run()
     305        (s2_type, s2_length) = i2.current_run()
     306        n = min(s1_length, s2_length)
     307        if s1_type == Empty or s2_type == Full:
     308            iset.append_run(Empty, n)
     309            i1.advance(n)
     310            i2.advance(n)
     311        elif s1_type == Full and s2_type == Empty:
     312            iset.append_run(Full, n)
     313            i1.advance(n)
     314            i2.advance(n)
     315        elif s1_type == Full:
     316            for i in range(n):
     317                iset.append_quad(FullQuadMask ^ i2.get_quad())
     318                i2.advance(1)
     319            i1.advance(n)
     320        elif s2_type == Empty:
     321            for i in range(n):
     322                iset.append_quad(i1.get_quad())
     323                i1.advance(1)
     324            i2.advance(n)
     325        else:  # both s1 and s2 have mixed blocks; form block-by-block union
     326            for i in range(n):
     327                iset.append_quad(i1.get_quad() & ~ i2.get_quad())
     328                i1.advance(1)
     329                i2.advance(1)
     330    return iset
     331
     332
     333def uset_symmetric_difference(s1, s2):
     334    iset = UCset()
     335    i1 = Uset_Iterator(s1)
     336    i2 = Uset_Iterator(s2)
     337    while not i1.at_end():
     338        (s1_type, s1_length) = i1.current_run()
     339        (s2_type, s2_length) = i2.current_run()
     340        n = min(s1_length, s2_length)
     341        if s1_type == Empty and s2_type == Full or s1_type == Full and s2_type == Empty:
     342            iset.append_run(Full, n)
     343            i1.advance(n)
     344            i2.advance(n)
     345        elif s1_type == Full and s2_type == Full or s1_type == Empty and s2_type == Empty:
     346            iset.append_run(Empty, n)
     347            i1.advance(n)
     348            i2.advance(n)
     349        elif s1_type == Empty:
     350            for i in range(n):
     351                iset.append_quad(i2.get_quad())
     352                i2.advance(1)
     353            i1.advance(n)
     354        elif s2_type == Empty:
     355            for i in range(n):
     356                iset.append_quad(i1.get_quad())
     357                i1.advance(1)
     358            i2.advance(n)
     359        elif s1_type == Full:
     360            for i in range(n):
     361                iset.append_quad(FullQuadMask ^ i2.get_quad())
     362                i2.advance(1)
     363            i1.advance(n)
     364        elif s2_type == Full:
     365            for i in range(n):
     366                iset.append_quad(FullQuadMask ^ i1.get_quad())
     367                i1.advance(1)
     368            i2.advance(n)
     369        else:  # both s1 and s2 have mixed blocks; form block-by-block union
     370            for i in range(n):
     371                iset.append_quad(i1.get_quad() ^ i2.get_quad())
     372                i1.advance(1)
     373                i2.advance(1)
     374    return iset
     375
    351376
    352377def uset_to_range_list(s):
     
    359384        (q_type, q_length) = i.current_run()
    360385        if q_type == Empty:
    361             if open_range: 
     386            if open_range:
    362387                rl.append((range_first, pos - 1))
    363388                open_range = False
     
    373398            q = i.get_quad()
    374399            qpos = pos
    375             for qpos in range(pos, pos+quad_bits):
     400            for qpos in range(pos, pos + quad_bits):
    376401                if q & 1 == 0:
    377402                    if open_range:
     
    390415    return rl
    391416
     417
    392418UCD_point_regexp = re.compile("^([0-9A-F]{4,6})\s+;")
    393419UCD_range_regexp = re.compile("^([0-9A-F]{4,6})[.][.]([0-9A-F]{4,6})\s+;")
    394 
    395 def parse_UCD_set(lines):
    396     pset = empty_set()
    397     for t in lines:
    398         m = UCD_point_regexp.match(t)
    399         if m:
    400             point = m.group(1)
    401             pval = int(point, 16)
    402             pset = union(pset, singleton_set(pval))
    403         m = UCD_range_regexp.match(t)
    404         if m:
    405             point1 = m.group(1)
    406             point2 = m.group(2)
    407             pval1 = int(point1, 16)
    408             pval2 = int(point2, 16)
    409             pset = union(pset, make_range_set(pval1, pval2))
    410     return pset
    411 
    412 def parse_UCD_file(fname, vname):
    413     f = open(fname)
    414     lines = f.readlines()
    415     f.close()
    416     s = parse_UCD_set(lines)
    417     print(s.showC(vname))
    418 
    419 
Note: See TracChangeset for help on using the changeset viewer.