1 | # |
---|
2 | # Recursive Parenthesis Matching - PDF style with EOL Comments |
---|
3 | # |
---|
4 | # Robert D. Cameron |
---|
5 | # April 17, 2013 |
---|
6 | # |
---|
7 | # This version ignores parentheses within EOL Comments that |
---|
8 | # start with a % character. However, % characters within |
---|
9 | # strings are ordinary data characters. |
---|
10 | # |
---|
11 | # |
---|
12 | import sys |
---|
13 | import pablo |
---|
14 | |
---|
15 | class Basis_bits(): |
---|
16 | bit_0 = 0 |
---|
17 | bit_1 = 0 |
---|
18 | bit_2 = 0 |
---|
19 | bit_3 = 0 |
---|
20 | bit_4 = 0 |
---|
21 | bit_5 = 0 |
---|
22 | bit_6 = 0 |
---|
23 | bit_7 = 0 |
---|
24 | |
---|
25 | class Lex (): |
---|
26 | LParen = 0 |
---|
27 | RParen = 0 |
---|
28 | Pct = 0 |
---|
29 | LF = 0 |
---|
30 | |
---|
31 | class Matches() : |
---|
32 | closed = 0 |
---|
33 | instring = 0 |
---|
34 | error = 0 |
---|
35 | comment_start = 0 |
---|
36 | comment_end = 0 |
---|
37 | |
---|
38 | |
---|
39 | def Classify_bytes(basis_bits, lex): |
---|
40 | temp1 = (basis_bits.bit_0 | basis_bits.bit_1) |
---|
41 | temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3) |
---|
42 | temp3 = (temp2 &~ temp1) |
---|
43 | temp4 = (basis_bits.bit_4 &~ basis_bits.bit_5) |
---|
44 | temp5 = (basis_bits.bit_6 | basis_bits.bit_7) |
---|
45 | temp6 = (temp4 &~ temp5) |
---|
46 | lex.LParen = (temp3 & temp6) |
---|
47 | temp7 = (basis_bits.bit_7 &~ basis_bits.bit_6) |
---|
48 | temp8 = (temp4 & temp7) |
---|
49 | lex.RParen = (temp3 & temp8) |
---|
50 | temp9 = (basis_bits.bit_5 &~ basis_bits.bit_4) |
---|
51 | temp10 = (temp9 & temp7) |
---|
52 | lex.Percent = (temp3 & temp10) |
---|
53 | temp11 = (basis_bits.bit_2 | basis_bits.bit_3) |
---|
54 | temp12 = (temp1 | temp11) |
---|
55 | temp13 = (basis_bits.bit_6 &~ basis_bits.bit_7) |
---|
56 | temp14 = (temp4 & temp13) |
---|
57 | lex.EOL = (temp14 &~ temp12) |
---|
58 | |
---|
59 | # |
---|
60 | # Modified version with comment processing |
---|
61 | # |
---|
62 | # Let pending_LParen be left parentheses for which we have a pending |
---|
63 | # obligation to find the corresponding right parentheses. |
---|
64 | # |
---|
65 | # Let pending_Pct be Percent marks that have not yet been |
---|
66 | # ruled out as comment opening delimiters |
---|
67 | def Match_Parens_With_Comments(lex, matches): |
---|
68 | line_starts = ~pablo.Advance(~lex.EOL) |
---|
69 | line_ends1 = pablo.ScanTo(line_starts, lex.EOL | lex.Percent) |
---|
70 | pending_Pct = line_ends1 & lex.Percent |
---|
71 | outside_Ct = pablo.SpanUpTo(line_starts, line_ends1) |
---|
72 | pending_LParen = outside_Ct & lex.LParen |
---|
73 | RParen_unmatched = lex.RParen |
---|
74 | |
---|
75 | inPlay = pending_LParen | RParen_unmatched | pending_Pct |
---|
76 | |
---|
77 | while pending_LParen: |
---|
78 | |
---|
79 | # Scan from pending ( marks to next [()%]. Everything we find |
---|
80 | # must be within a string. |
---|
81 | pscan = pablo.AdvanceThenScanTo(pending_LParen, inPlay) |
---|
82 | matches.instring |= pablo.ExclusiveSpan(pending_LParen, pscan) | pscan &~ lex.RParen |
---|
83 | matches.closed |= pscan & lex.RParen |
---|
84 | matches.error |= pablo.atEOF(pscan) |
---|
85 | pending_LParen = pscan & ~lex.RParen |
---|
86 | # Did we scan into a pending comment region? |
---|
87 | pct_found = pscan & pending_Pct |
---|
88 | |
---|
89 | if pct_found: |
---|
90 | line_ends1 &= ~pct_found |
---|
91 | line_ends1 |= pablo.AdvanceThenScanTo(pct_found, lex.EOL | lex.Percent) |
---|
92 | pending_Pct = line_ends1 & lex.Percent |
---|
93 | new_outside = pablo.SpanUpTo(pct_found, line_ends1) |
---|
94 | pending_LParen |= new_outside & lex.LParen |
---|
95 | outside_Ct |= new_outside |
---|
96 | |
---|
97 | RParen_unmatched = lex.RParen &~ matches.closed |
---|
98 | inPlay = pending_LParen | RParen_unmatched | pending_Pct |
---|
99 | # |
---|
100 | # No more scans to do. Any pending pct marks are now known |
---|
101 | # as definite comment delimiters. |
---|
102 | matches.comment_start = pending_Pct |
---|
103 | matches.comment_end = pablo.ScanTo(pending_Pct, lex.EOL) |
---|
104 | # |
---|
105 | # Any closing paren that was not actually used to close |
---|
106 | # an opener is in error. |
---|
107 | matches.error |= lex.RParen &~ matches.closed &~ pablo.SpanUpTo(matches.comment_start, matches.comment_end) |
---|
108 | |
---|
109 | |
---|
110 | |
---|
111 | basis_bits = Basis_bits() |
---|
112 | lex = Lex() |
---|
113 | matches = Matches() |
---|
114 | |
---|
115 | if __name__ == "__main__": |
---|
116 | #print "Starting ..." |
---|
117 | if len(sys.argv) > 1: |
---|
118 | u8data = pablo.readfile(sys.argv[1]) |
---|
119 | pablo.EOF_mask = pablo.transpose_streams(u8data, basis_bits) |
---|
120 | Classify_bytes(basis_bits, lex) |
---|
121 | Match_Parens_With_Comments(lex, matches) |
---|
122 | lgth = len(u8data) |
---|
123 | print "data:" + " "*(16-5) + u8data |
---|
124 | print "errors:" + " "*(16-7) + pablo.bitstream2string(matches.error, lgth+1) |
---|
125 | print "instring:" + " "*(16-9) + pablo.bitstream2string(matches.instring, lgth) |
---|
126 | print "comment_start:" + " "*(16-14) + pablo.bitstream2string(matches.comment_start, lgth) |
---|
127 | print "comment_end:" + " "*(16-12) + pablo.bitstream2string(matches.comment_end, lgth) |
---|
128 | |
---|
129 | else: |
---|
130 | print("Usage: python parenmatch.py <file>") |
---|
131 | |
---|
132 | |
---|
133 | |
---|