Ignore:
Timestamp:
Aug 13, 2013, 6:48:04 AM (6 years ago)
Author:
cameron
Message:

fix

File:
1 edited

Legend:

Unmodified
Added
Removed
  • proto/RE/doc/UTF8_class.py

    r3250 r3433  
    99#
    1010# Definitions for debugging/prototyping
    11 def ByteClassCompiler(lbyte): return "%x" % lbyte
    12 def ByteRangeCompiler(lbyte, hbyte): return "[%x-%x]" % (lbyte, hbyte)
     11def ByteClassCompiler(lbyte): return "\\x%x" % lbyte
     12def ByteRangeCompiler(lbyte, hbyte): return "[\\x%x-\\x%x]" % (lbyte, hbyte)
    1313def make_or(e1, e2): return "(%s | %s)" % (e1, e2)
    1414def make_and(e1, e2): return "(%s & %s)" % (e1, e2)
     
    3939   else: return 0x10FFFF
    4040
    41 def UTF8_range_compiler(lo, hi):
    42    hlen = UTF8_length(hi)
    43    # If different length code unit sequences are involved, make
    44    # a union of equilength subranges.
    45    if hlen > UTF8_length(lo):
    46      m = max_codepoint_of_length(hlen - 1)
    47      return make_or(UTF8_range_compiler(lo, m), UTF8_range_compiler(m+1, hi))
    48    #
    49    return matched_sequence_compiler(lo, hi, 1, hlen)
    5041
    5142def matched_sequence_compiler(lo, hi, n, hlen):
     43   """ Helper function to generate the code necessary to match bytes
     44       n through hlen (1-based indexing) of the range of UTF-8 sequences
     45       for codepoints lo through hi. """
    5246   hbyte = UTF8_byte(hi, n)
    5347   lbyte = UTF8_byte(lo, n)
     
    7973   return make_shift_forward(ByteRangeCompiler(lbyte, hbyte), hlen - n)
    8074
     75def UTF8_range_compiler(lo, hi):
     76   hlen = UTF8_length(hi)
     77   # If different length code unit sequences are involved, make
     78   # a union of equilength subranges.
     79   if hlen > UTF8_length(lo):
     80     m = max_codepoint_of_length(hlen - 1)
     81     return make_or(UTF8_range_compiler(lo, m), UTF8_range_compiler(m+1, hi))
     82   #
     83   return matched_sequence_compiler(lo, hi, 1, hlen)
    8184
     85
Note: See TracChangeset for help on using the changeset viewer.