source: proto/charsetcompiler/inputs/XML10_Lex_UTF8 @ 3903

Last change on this file since 3903 was 1227, checked in by cameron, 8 years ago

Combined XML/UTF-8 inputs for XML 1.0

File size: 1.2 KB
Line 
1# Control Characters and whitespace
2x00_x1F = [\x00-\x1F]
3CR = [\x0D]
4LF = [\x0A]
5HT = [\x09]
6SP = [ ]
7# XML Lexical Items
8lex.RefStart = [&]
9lex.Semicolon = [;]
10lex.LAngle = [<]
11lex.RAngle = [>]
12lex.LBracket = [[]
13lex.RBracket = []]
14lex.Exclam = [!]
15lex.QMark = [?]
16lex.Hyphen =  [-]
17lex.Equals = [=]
18lex.SQuote = [']
19lex.DQuote = ["]
20lex.Slash = [/]
21lex.Hash = [#]
22lex.x = [x]
23lex.Colon = [:]
24lex.ASCII_name_start = [_:A-Za-z]
25lex.ASCII_name_char = [-_.0-:A-Za-z]
26lex.NameScan = [-_.0-:A-Za-z\x80-\xFF]
27lex.WS = [\x0D\x0A\x09 ]
28lex.Digit = [0-9]
29lex.Hex = [0-9A-Fa-f]
30#
31# UTF8
32#
33u8_unibyte = [\x00-\x7F]
34u8_prefix = [\xC0-\xFF]
35u8_suffix = [\x80-\xBF]
36# 2-byte sequences
37u8_prefix2 = [\xC0-\xDF]
38# For 2 byte-sequence validation
39u8_badprefix2 = [\xC0-\xC1]
40# 3-byte sequences
41u8_prefix3or4 = [\xE0-\xFF]
42u8_prefix3 = [\xE0-\xEF]
43# For 3 byte-sequence validation
44xE0 = [\xE0]
45xED = [\xED]
46xA0_xBF = [\xA0-\xBF]
47x80_x9F = [\x80-\x9F]
48# For FFFE/FFFF recognition in XML
49xEF = [\xEF]
50xBF = [\xBF]
51xBE = [\xBE]
52# 4-byte sequences
53u8_prefix4 = [\xF0-\xFF]
54# 4 byte sequence validation
55u8_badprefix4 = [\xF5-\xFF]
56xF0 = [\xF0]
57xF4 = [\xF4]
58x90_xBF = [\x90-\xBF]
59x80_x8F = [\x80-\x8F]
60
Note: See TracBrowser for help on using the repository browser.