Changeset 3959


Ignore:
Timestamp:
Aug 4, 2014, 3:09:25 PM (5 years ago)
Author:
cameron
Message:

Divide up subclass processing by UTF-8 length

Location:
proto/charsetcompiler
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • proto/charsetcompiler/CC_compiler.py

    r3952 r3959  
    151151                diff_count += 1
    152152                diff_bits >>= 1
    153             if n2 < n1 or diff_count > self.mEncoding.bits: raise Exception("Bad range: (%i, %i) % n1, n2.") 
     153            if n2 < n1 or diff_count > self.mEncoding.bits: raise Exception("Bad range: (%x, %x)." % (n1, n2))
    154154            mask = 2**(diff_count) - 1
    155155            #common = make_bit_test(n1 >> diff_count, 8 - diff_count)
  • proto/charsetcompiler/unicode_category_compiler.py

    r3954 r3959  
    208208     subcc1 = rangeIntersect(charClassMap[k], lo, hi)
    209209     CC_var = "CC_%s_%x_%x" % (k, lo, hi)
    210      cgo.chardef2py(CanonicalCharSetDef(CC_var, subcc1))
     210     # Divide by UTF-8 length
     211     for byte_range in [(0, 0x7F), (0x80, 0x7FF), (0x800, 0xFFFF), (0x10000, 0x10FFFF)]:
     212        subcc2 = rangeIntersect(charClassMap[k], byte_range[0], byte_range[1])
     213        ulen = utf8_length(byte_range[0])
     214        for subrange in subcc2:
     215           matched_sequence_compiler(cgo, subrange[0], subrange[1], 1, ulen, CC_var)
    211216     cgo.add_assignment("struct_%s.cc" % k, cgo.expr2py(make_or(Var("struct_%s.cc" % k), Var(CC_var))))
    212217
Note: See TracChangeset for help on using the changeset viewer.