source: proto/u16u8/u16u8.py @ 361

Last change on this file since 361 was 358, checked in by cameron, 9 years ago

u16u8 prototype

File size: 2.7 KB
RevLine 
[358]1#!/usr/bin/python
2#  u16u8.py
3#
4#  Python prototype implementation
5#  Robert D. Cameron and Dan Lin
6#
7#  Feb. 7, 2010
8#
9#  Licensed to the public under the Open Software License 3.0.
10#  Licensed to International Characters, Inc., under the Academic
11#  Free License 3.0.
12#
13#  This file (u16u8.py) is the driver.  It imports two components
14#  (a) a library file of general functions to support parallel
15#      bit stream programming
16#  (b) a custom file of compilable bitstream logic for u16u8.
17#      This file contains python code for the logic of UTF-16 to
18#      UTF-8 conversion, organized according to the conventions
19#      of the bitstream compiler. 
20
21import bitutil
22import u16u8_compilable
23
24class u16_streams():
25  pass
26#
27# UTF-16 Error Messages
28#
29def IllegalSequenceMessage(pos):
30        return "Illegal UTF-16 sequence at position %i in source.\n" % pos
31
32def IncompleteSequenceMessage(pos):
33        return "EOF with incomplete UTF-16 sequence at position %i in source.\n" % pos
34
35def Interleave(u16, U8_pre, U8_butlast, U8_last, mask):
36        pair1 = bitutil.merge_bytes(U8_butlast, U8_last)
37        pair0 = bitutil.merge_bytes("\0"*len(U8_pre), U8_pre)
38        doublepair = bitutil.merge_doublebytes(pair0, pair1)
39        delmask1 = bitutil.merge_1(u16.ASCII, 0)
40        delmask0 = bitutil.merge_1((-1)&mask, (~u16.utf8_3)&mask)
41        delmask = bitutil.merge_2(delmask0, delmask1)
42       
43        return (doublepair, delmask)
44       
45
46import sys
47def main():
48
49        if len(sys.argv) < 2:
50                sys.stderr.write("Usage: u16u8.py u16file [u8file]\n")
51                exit
52        if len(sys.argv) == 3:
53                outfile = open(sys.argv[2],"w")
54        else: outfile = sys.stdout
55        u16data = bitutil.readfile(sys.argv[1])
56        u16len = len(u16data)/2
57
58        U16h = bitutil.pack_U16h(u16data)
59        U16l = bitutil.pack_U16i(u16data)
60        (u16h,mask) = bitutil.transpose_streams(U16h)
61        (u16l,mask) = bitutil.transpose_streams(U16l)
62       
63        # u16u8_compilable.main performs bitstream calculations
64
65        (u16, u8_pre, u8_butlast, u8_last) = u16u8_compilable.main(u16h, u16l, mask, u16_streams())
66       
67        if u16.error != 0:
68                err_pos = bitutil.count_leading_zeroes(u16.error)
69                at_EOF = err_pos == len(u16data)/2
70                if at_EOF:
71                        err_pos -= 1
72                        sys.stderr.write(IncompleteSequenceMessage(err_pos*2))
73                else:
74                        err_byte = ord(u16data[err_pos * 2])
75                        # The error is actually a surrogate prefix at the previous
76                        # position unless we're at a surrogate suffix.
77                        if err_byte < 0xDC or err_byte > 0xDF: err_pos -=1 
78                        sys.stderr.write(IllegalSequenceMessage(err_pos*2))
79                u16len = err_pos
80
81        U8_pre =  bitutil.inverse_transpose(u8_pre,u16len)
82        U8_butlast =  bitutil.inverse_transpose(u8_butlast,u16len)
83        U8_last =  bitutil.inverse_transpose(u8_last,u16len)
84       
85       
86        (U8, delmask) = Interleave(u16, U8_pre, U8_butlast, U8_last, mask)
87       
88        result = bitutil.filter_bytes(U8, delmask)
89       
90        outfile.write(result)
91        outfile.close()
92               
93if __name__ == "__main__": main()
94
95       
Note: See TracBrowser for help on using the repository browser.