source: proto/u8check/u8check.pablo @ 5539

Last change on this file since 5539 was 4918, checked in by cameron, 4 years ago

u8check demo

File size: 2.9 KB
Line 
1class Basis_bits():     
2        bit_0 = 0
3        bit_1 = 0
4        bit_2 = 0
5        bit_3 = 0
6        bit_4 = 0
7        bit_5 = 0
8        bit_6 = 0
9        bit_7 = 0
10
11class U8 ():
12        unibyte = 0
13        prefix = 0
14        prefix2 = 0
15        prefix3 = 0
16        prefix4 = 0
17        suffix = 0
18        badprefix2 = 0
19        badprefix4 = 0
20        xE0 = 0
21        xED = 0
22        xF0 = 0
23        xF4 = 0
24        xA0_xBF = 0
25        x80_x9F = 0
26        x90_xBF = 0
27        x80_x8F = 0
28
29class Error ():
30        mask = 0
31
32def Classify_utf8(basis_bits, u8): 
33        # generated by python charset_compiler.py inputs/UTF8
34        u8.unibyte = (~basis_bits.bit_0)
35        u8.prefix = (basis_bits.bit_0 & basis_bits.bit_1)
36        u8.prefix2 = (u8.prefix &~ basis_bits.bit_2)
37        temp1 = (basis_bits.bit_2 &~ basis_bits.bit_3)
38        u8.prefix3 = (u8.prefix & temp1)
39        temp2 = (basis_bits.bit_2 & basis_bits.bit_3)
40        u8.prefix4 = (u8.prefix & temp2)
41        u8.suffix = (basis_bits.bit_0 &~ basis_bits.bit_1)
42        temp3 = (basis_bits.bit_2 | basis_bits.bit_3)
43        temp4 = (u8.prefix &~ temp3)
44        temp5 = (basis_bits.bit_4 | basis_bits.bit_5)
45        temp6 = (temp5 | basis_bits.bit_6)
46        u8.badprefix2 = (temp4 &~ temp6)
47        temp7 = (basis_bits.bit_6 | basis_bits.bit_7)
48        temp8 = (temp5 | temp7)
49        u8.xE0 = (u8.prefix3 &~ temp8)
50        temp9 = (basis_bits.bit_4 & basis_bits.bit_5)
51        temp10 = (basis_bits.bit_7 &~ basis_bits.bit_6)
52        temp11 = (temp9 & temp10)
53        u8.xED = (u8.prefix3 & temp11)
54        u8.xA0_xBF = (u8.suffix & basis_bits.bit_2)
55        u8.x80_x9F = (u8.suffix &~ basis_bits.bit_2)
56        temp12 = (basis_bits.bit_5 & temp7)
57        temp13 = (basis_bits.bit_4 | temp12)
58        u8.badprefix4 = (u8.prefix4 & temp13)
59        u8.xF0 = (u8.prefix4 &~ temp8)
60        temp14 = (basis_bits.bit_5 &~ basis_bits.bit_4)
61        temp15 = (temp14 &~ temp7)
62        u8.xF4 = (u8.prefix4 & temp15)
63        u8.x90_xBF = (u8.suffix & temp3)
64        u8.x80_x8F = (u8.suffix &~ temp3)
65
66def Validate_utf8(u8, error):
67        u8_scope22 = pablo.Advance(u8.prefix2)
68        u8_scope32 = pablo.Advance(u8.prefix3)
69        u8_scope33 = pablo.Advance(u8_scope32)
70        u8_scope42 = pablo.Advance(u8.prefix4)
71        u8_scope43 = pablo.Advance(u8_scope42)
72        u8_scope44 = pablo.Advance(u8_scope43)
73        u8lastscope = u8_scope22 | u8_scope33 | u8_scope44
74        u8anyscope = u8lastscope | u8_scope32 | u8_scope42 | u8_scope43
75       
76        mismatch = u8anyscope ^ u8.suffix
77        error.mask = u8.badprefix2 | u8.badprefix4 | mismatch       
78        error.mask |= pablo.Advance(u8.xE0) & u8.x80_x9F
79        error.mask |= pablo.Advance(u8.xED) & u8.xA0_xBF
80        error.mask |= pablo.Advance(u8.xF0) & u8.x80_x8F
81        error.mask |= pablo.Advance(u8.xF4) & u8.x90_xBF
82
83def Main(basis_bits, u8, error): 
84        Transpose(bytedata, basis_bits)
85        Classify_utf8(basis_bits, u8)
86        Validate_utf8(u8, error)
Note: See TracBrowser for help on using the repository browser.