source: proto/CSV/csv2xml/csvclass.py @ 4177

Last change on this file since 4177 was 2588, checked in by linmengl, 7 years ago

initial check in of csv2xml, still using strings in this version

File size: 3.0 KB
Line 
1#
2# csvclass.py
3#
4# Bit stream definitions for CSV character classes.
5# Automatically generated using
6# chardeflist2py(DefinitionSet['CSV'])
7#
8# Forked from Dan Lin
9# March, 06, 2010
10#
11# Meng Lin
12# Oct, 04, 2012
13# Add '&' to lex, use charset_compiler.py and input/CSV
14
15
16class Basis_bits():
17    bit_0 = 0
18    bit_1 = 0
19    bit_2 = 0
20    bit_3 = 0
21    bit_4 = 0
22    bit_5 = 0
23    bit_6 = 0
24    bit_7 = 0
25
26
27class Lex ():
28    BackSlash = 0
29    DQuote = 0
30    SQuote = 0
31    CR = 0
32    LF = 0
33    Comma = 0
34    Period = 0
35    HT = 0
36    AndSymbol = 0
37
38
39class Marker ():
40    delim = 0
41    quote = 0
42    quote_mask = 0
43    escape = 0
44    eol = 0
45    hide = 0
46
47
48def classify_bytes(basis_bits, lex):
49    temp1 = (basis_bits.bit_1 & ~basis_bits.bit_0)
50    temp2 = (basis_bits.bit_3 & ~basis_bits.bit_2)
51    temp3 = (temp1 & temp2)
52    temp4 = (basis_bits.bit_4 & basis_bits.bit_5)
53    temp5 = (basis_bits.bit_6 | basis_bits.bit_7)
54    temp6 = (temp4 & ~temp5)
55    lex.BackSlash = (temp3 & temp6)
56    temp7 = (basis_bits.bit_0 | basis_bits.bit_1)
57    temp8 = (basis_bits.bit_2 & ~basis_bits.bit_3)
58    temp9 = (temp8 & ~temp7)
59    temp10 = (basis_bits.bit_4 | basis_bits.bit_5)
60    temp11 = (basis_bits.bit_6 & ~basis_bits.bit_7)
61    temp12 = (temp11 & ~temp10)
62    lex.DQuote = (temp9 & temp12)
63    temp13 = (basis_bits.bit_5 & ~basis_bits.bit_4)
64    temp14 = (basis_bits.bit_6 & basis_bits.bit_7)
65    temp15 = (temp13 & temp14)
66    lex.SQuote = (temp9 & temp15)
67    temp16 = (basis_bits.bit_2 | basis_bits.bit_3)
68    temp17 = (temp7 | temp16)
69    temp18 = (basis_bits.bit_7 & ~basis_bits.bit_6)
70    temp19 = (temp4 & temp18)
71    lex.CR = (temp19 & ~temp17)
72    temp20 = (basis_bits.bit_4 & ~basis_bits.bit_5)
73    temp21 = (temp20 & temp11)
74    lex.LF = (temp21 & ~temp17)
75    lex.Comma = (temp9 & temp6)
76    temp22 = (temp20 & temp18)
77    lex.HT = (temp22 & ~temp17)
78    temp23 = (temp4 & temp11)
79    lex.Period = (temp9 & temp23)
80    temp24 = (temp13 & temp11)
81    lex.AndSymbol = (temp9 & temp24)
82
83
84def parse_marker(lex, marker):
85    #Parse escape
86    odd = Simd_const_odd
87    even = Simd_const_even
88
89    start = lex.BackSlash & ~pablo.Advance(lex.BackSlash)
90    even_start = start & even
91
92    even_final = pablo.ScanThru(even_start, lex.BackSlash)
93    escape = even_final & odd
94
95    odd_start = start & odd
96    odd_final = pablo.ScanThru(odd_start, lex.BackSlash);
97    marker.escape = escape | (odd_final & even)
98
99    #Parse_quote
100    marker.quote = lex.DQuote & ~marker.escape
101
102    #Parse quote mask, C++ function call
103    marker.quote_mask = parse_quote_mask(marker.quote)
104    if FlipSignal:
105        marker.quote_mask = ~marker.quote_mask
106
107    if UseTabSignal:
108        marker.delim = lex.HT & ~escape & ~marker.quote_mask
109    else:
110        marker.delim = lex.Comma & ~escape & ~marker.quote_mask
111
112    marker.eol = (lex.CR | lex.LF) & ~marker.quote_mask
113
114    #Hide double quote marks and hide LF after CR
115    marker.hide = marker.quote | (pablo.Advance(lex.CR) & lex.LF & ~marker.quote_mask)
116    marker.eol = marker.eol & ~marker.hide
117
118
119def main():
120    classify_bytes(basis_bits, lex)
121    parse_marker(lex, marker)
Note: See TracBrowser for help on using the repository browser.