source: proto/u16u8/u16u8_compilable2.py @ 3552

Last change on this file since 3552 was 601, checked in by cameron, 9 years ago

Specify individual output variables

File size: 4.7 KB
Line 
1#!/usr/bin/python
2#  u16u8_compilable2.py  - modified for current compiler as of Feb. 7
3#                        - comment out uncompilable features with #___
4#
5#  Python bit stream logic for u16u8
6#  Robert D. Cameron and Dan Lin
7#
8#  Feb. 7, 2010
9#
10#  Licensed to the public under the Open Software License 3.0.
11#  Licensed to International Characters, Inc., under the Academic
12#  Free License 3.0.
13#
14#___import bitutil
15
16#
17#
18#     Range              Code Point Bits           UTF-16              UTF-8           
19#                                                u16h     u16l      u8_pre   butlast  u8_last
20#  000000-00007F     00000 00000000 0tuvwxyz   00000000 0tuvwxyz                     0tuvwxyz
21#
22#  000080-0007FF     00000 00000pqr stuvwxyz   00000pqr stuvwxyz            110pqrst 10uvwxyz
23#
24#  000800-00FFFF     00000 jklmnpqr stuvwxyz   jklmnpqr stuvwxyz   1110jklm 10npqrst 10uvwxyz
25#
26#  010000-10FFFF     efghi jklmnpqr stuvwxyz   110110ab cdjklmnp            11110efg 10hijklm
27#                                              110111qr stuvwxyz            10npqrst 10uvwxyz
28#                                           where abcd + 1 = efghi
29
30def main(u16h, u16l, mask, u16):
31        #optimize(above_0x7FF,allzero)
32        #optimize(above_0x7F,allzero)
33        above_0x7FF =  ((u16h[0] | u16h[1]) | (u16h[2] | u16h[3])) | u16h[4]
34        above_0x7F = ((above_0x7FF | u16h[5]) | (u16h[6] | u16h[7])) | u16l[0]
35
36        # Surrogate pairs have 0xD8 through 0xDF as the high UTF-16 byte: 11011xxx pattern
37        u16.surrogate = (u16h[0] & u16h[1]) & (u16h[3] &~ u16h[2]) & u16h[4]
38        # The first of a surrogate pair is in the 0xD8-0xDB range, the second in 0xDC-0xDF
39        u16.hsurrogate = u16.surrogate &~ u16h[5]
40        u16.lsurrogate = u16.surrogate & u16h[5]
41        # If any of the high 5 bits are set, we are above the 07FF range
42        u16.utf8_3  = above_0x7FF &~ u16.surrogate
43        u16.utf8_2 = above_0x7F & ~above_0x7FF
44        u16.ASCII = ~above_0x7F
45        u16.error = bitutil.Advance(u16.hsurrogate) ^ u16.lsurrogate
46       
47#___    u8_last = [0,0,0,0,0,0,0,0]
48        # u8_last corresponds to (a) 1st byte of a 1-byte sequence (u8unibyte),
49        # (b) 2nd byte of a 2-byte sequence (u8scope22),
50        # (c) 3rd byte of a 3-byte sequence (u8scope33),
51        # (d) 2nd byte of a 4-byte sequence (u8scope42) (from UTF-16 high surrogate)
52        # (e) 4th byte of a 4-byte sequence (u8scope44) (from UTF-16 low surrogate)
53
54        # efghi = abcd + 1 calculation using || bit streams
55        # Applies only for high surrogate position: u16.hsurrogate = 1
56        efghi_i = ~u16l[1] 
57        efghi_h = u16l[0] ^ u16l[1]
58        hcarry = u16l[0] &~ efghi_h
59        efghi_g = u16h[7] ^ hcarry
60        gcarry = u16h[7] &~ efghi_g
61        efghi_f = u16h[6] ^ gcarry
62        efghi_e = u16h[6] &~ efghi_f
63       
64        u8_last[0] = ~u16.ASCII
65        u8_last[1] = u16.ASCII & u16l[1]
66#       u8_last[2] = if u16.hsurrogate : efghi_h else: u16l[2]
67        u8_last[2] = u16l[2] &~ u16.hsurrogate | efghi_h & u16.hsurrogate
68        u8_last[3] = u16l[3] &~ u16.hsurrogate | efghi_i & u16.hsurrogate
69        u8_last[4] = u16l[4] &~ u16.hsurrogate | u16l[2] & u16.hsurrogate
70        u8_last[5] = u16l[5] &~ u16.hsurrogate | u16l[3] & u16.hsurrogate
71        u8_last[6] = u16l[6] &~ u16.hsurrogate | u16l[4] & u16.hsurrogate
72        u8_last[7] = u16l[7] &~ u16.hsurrogate | u16l[5] & u16.hsurrogate
73
74#___    u8_butlast = [0,0,0,0,0,0,0,0]
75        # u8_butlast corresponds to (a) 1st byte of a 2-byte sequence (u8prefix2),
76        # (b) 2nd byte of a 3-byte sequence (u8scope32),
77        # (c) 1st byte of a 4-byte sequence (u8prefix4) (from UTF-16 high surrogate)
78        # (d) 3rd byte of a 4-byte sequence (u8scope43) (from UTF-16 low surrogate)
79
80        u8_2or3 =  u16.utf8_2 | u16.utf8_3
81        u8_butlast[0] = ~u16.ASCII
82        u8_butlast[1] = u16.utf8_2 | u16.hsurrogate
83        u8_butlast[2] = u16.hsurrogate | (u16.utf8_3 & u16h[4]) | (u16.lsurrogate & bitutil.Advance(u16l[6]))
84        u8_butlast[3] = u16.hsurrogate | (u8_2or3 & u16h[5]) | (u16.lsurrogate & bitutil.Advance(u16l[7]))
85        u8_butlast[4] = u16h[6] &~ (u16.hsurrogate | u16.ASCII)
86        u8_butlast[5] = u16h[7] &~ (u16.hsurrogate | u16.ASCII) | efghi_e & u16.hsurrogate
87        u8_butlast[6] = u16l[0] &~ (u16.hsurrogate | u16.ASCII) | efghi_f & u16.hsurrogate
88        u8_butlast[7] = u16l[1] &~ (u16.hsurrogate | u16.ASCII) | efghi_g & u16.hsurrogate
89
90#___    u8_pre = [0,0,0,0,0,0,0,0]
91        u8_pre[0] = u16.utf8_3
92        u8_pre[1] = u16.utf8_3
93        u8_pre[2] = u16.utf8_3
94        u8_pre[3] = 0
95        u8_pre[4] = u16.utf8_3 & u16h[0]
96        u8_pre[5] = u16.utf8_3 & u16h[1]
97        u8_pre[6] = u16.utf8_3 & u16h[2]
98        u8_pre[7] = u16.utf8_3 & u16h[3]
99       
100        return (u8_pre[0],u8_pre[1],u8_pre[2],u8_pre[3],u8_pre[4],u8_pre[5],u8_pre[6], u8_pre[7],  \
101                u8_butlast[0], u8_butlast[1], u8_butlast[2], u8_butlast[3],
102                u8_butlast[4],u8_butlast[5],u8_butlast[6], u8_butlast[7], \
103                u8_last[0],u8_last[1],u8_last[2],u8_last[3],u8_last[4],u8_last[5],u8_last[6], u8_last[7],
104                u16.error)
105
Note: See TracBrowser for help on using the repository browser.