source: proto/matchparens/pdfparenmatch.py @ 3027

Last change on this file since 3027 was 2907, checked in by cameron, 6 years ago

pdf parenmatch

File size: 2.2 KB
Line 
1#
2# Recursive Parenthesis Matching
3#
4#
5# Robert D. Cameron
6# October 14, 2012
7#
8import sys
9import pablo
10
11class Basis_bits():     
12        bit_0 = 0
13        bit_1 = 0
14        bit_2 = 0
15        bit_3 = 0
16        bit_4 = 0
17        bit_5 = 0
18        bit_6 = 0
19        bit_7 = 0
20
21class Lex ():
22        LParen = 0
23        RParen = 0
24       
25class Matches() :
26        closed = 0
27        instring = 0
28        error = 0
29
30
31def Classify_bytes(basis_bits, lex): 
32        temp1 = (basis_bits.bit_0 | basis_bits.bit_1)
33        temp2 = (basis_bits.bit_2 &~ basis_bits.bit_3)
34        temp3 = (temp2 &~ temp1)
35        temp4 = (basis_bits.bit_4 &~ basis_bits.bit_5)
36        temp5 = (basis_bits.bit_6 | basis_bits.bit_7)
37        temp6 = (temp4 &~ temp5)
38        lex.LParen = (temp3 & temp6)
39        temp7 = (basis_bits.bit_7 &~ basis_bits.bit_6)
40        temp8 = (temp4 & temp7)
41        lex.RParen = (temp3 & temp8)
42       
43def Match_Parens(lex, matches):
44        unmatched = lex.RParen
45        pscan = 0
46        qscan = 0
47        i = 0
48        pscan = pablo.ScanTo(pablo.Advance(lex.LParen), lex.LParen | lex.RParen)
49        qscan = pablo.ScanTo(pablo.Advance(lex.RParen), lex.LParen | lex.RParen)
50        matches.instring = pablo.ExclusiveSpan(lex.LParen, pscan) 
51        matches.closed = pscan & lex.RParen
52        unclosed = pscan & lex.LParen | qscan & lex.RParen
53        matches.error = pscan &~ pablo.EOF_mask
54        all_closed = matches.closed
55        while unclosed:
56                i += 1
57                pscan = pablo.ScanTo(pablo.Advance(unclosed & lex.LParen), unclosed)
58                qscan = pablo.ScanTo(pablo.Advance(unclosed & lex.RParen), unclosed)
59                matches.instring |= pablo.SpanUpTo(unclosed & lex.LParen, pscan)
60                matches.closed = pscan & lex.RParen
61                unclosed = pscan & lex.LParen | qscan & lex.RParen
62                all_closed |= matches.closed
63                matches.error |= pscan &~ pablo.EOF_mask
64        #
65        # Any closing paren that was not actually used to close
66        # an opener is in error.
67        matches.error |= lex.RParen &~ all_closed
68
69
70
71
72basis_bits = Basis_bits()
73lex = Lex()
74matches = Matches()
75
76if __name__ == "__main__":
77        #print "Starting ..."
78        if len(sys.argv) > 1:
79                u8data = pablo.readfile(sys.argv[1]) 
80                pablo.EOF_mask = pablo.transpose_streams(u8data, basis_bits)
81                Classify_bytes(basis_bits, lex)
82                Match_Parens(lex, matches)
83                lgth = len(u8data)
84                print "data:" + " "*(16-5) + u8data
85                print "errors" + " "*(16-6) + pablo.bitstream2string(matches.error, lgth+1)
86                print "instring:" + " "*(16-9) + pablo.bitstream2string(matches.instring, lgth)
87               
88        else:
89                print("Usage: python parenmatch.py <file>")
90       
91
92
Note: See TracBrowser for help on using the repository browser.