source: proto/RE2Pablo/ref/re2pbs/antlr/antlr_python_runtime-3.1.3/build/lib.linux-i686-2.7/antlr3/treewizard.py @ 2226

Last change on this file since 2226 was 2226, checked in by ksherdy, 7 years ago

Initial check in.

File size: 17.9 KB
Line 
1""" @package antlr3.tree
2@brief ANTLR3 runtime package, treewizard module
3
4A utility module to create ASTs at runtime.
5See <http://www.antlr.org/wiki/display/~admin/2007/07/02/Exploring+Concept+of+TreeWizard> for an overview. Note that the API of the Python implementation is slightly different.
6
7"""
8
9# begin[licence]
10#
11# [The "BSD licence"]
12# Copyright (c) 2005-2008 Terence Parr
13# All rights reserved.
14#
15# Redistribution and use in source and binary forms, with or without
16# modification, are permitted provided that the following conditions
17# are met:
18# 1. Redistributions of source code must retain the above copyright
19#    notice, this list of conditions and the following disclaimer.
20# 2. Redistributions in binary form must reproduce the above copyright
21#    notice, this list of conditions and the following disclaimer in the
22#    documentation and/or other materials provided with the distribution.
23# 3. The name of the author may not be used to endorse or promote products
24#    derived from this software without specific prior written permission.
25#
26# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
27# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
28# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
29# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
30# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
31# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
35# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36#
37# end[licence]
38
39from antlr3.constants import INVALID_TOKEN_TYPE
40from antlr3.tokens import CommonToken
41from antlr3.tree import CommonTree, CommonTreeAdaptor
42
43
44def computeTokenTypes(tokenNames):
45    """
46    Compute a dict that is an inverted index of
47    tokenNames (which maps int token types to names).
48    """
49
50    if tokenNames is None:
51        return {}
52
53    return dict((name, type) for type, name in enumerate(tokenNames))
54
55
56## token types for pattern parser
57EOF = -1
58BEGIN = 1
59END = 2
60ID = 3
61ARG = 4
62PERCENT = 5
63COLON = 6
64DOT = 7
65
66class TreePatternLexer(object):
67    def __init__(self, pattern):
68        ## The tree pattern to lex like "(A B C)"
69        self.pattern = pattern
70
71        ## Index into input string
72        self.p = -1
73
74        ## Current char
75        self.c = None
76
77        ## How long is the pattern in char?
78        self.n = len(pattern)
79
80        ## Set when token type is ID or ARG
81        self.sval = None
82
83        self.error = False
84
85        self.consume()
86
87
88    __idStartChar = frozenset(
89        'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
90        )
91    __idChar = __idStartChar | frozenset('0123456789')
92   
93    def nextToken(self):
94        self.sval = ""
95        while self.c != EOF:
96            if self.c in (' ', '\n', '\r', '\t'):
97                self.consume()
98                continue
99
100            if self.c in self.__idStartChar:
101                self.sval += self.c
102                self.consume()
103                while self.c in self.__idChar:
104                    self.sval += self.c
105                    self.consume()
106
107                return ID
108
109            if self.c == '(':
110                self.consume()
111                return BEGIN
112
113            if self.c == ')':
114                self.consume()
115                return END
116
117            if self.c == '%':
118                self.consume()
119                return PERCENT
120
121            if self.c == ':':
122                self.consume()
123                return COLON
124
125            if self.c == '.':
126                self.consume()
127                return DOT
128
129            if self.c == '[': # grab [x] as a string, returning x
130                self.consume()
131                while self.c != ']':
132                    if self.c == '\\':
133                        self.consume()
134                        if self.c != ']':
135                            self.sval += '\\'
136
137                        self.sval += self.c
138
139                    else:
140                        self.sval += self.c
141
142                    self.consume()
143
144                self.consume()
145                return ARG
146
147            self.consume()
148            self.error = True
149            return EOF
150
151        return EOF
152
153
154    def consume(self):
155        self.p += 1
156        if self.p >= self.n:
157            self.c = EOF
158
159        else:
160            self.c = self.pattern[self.p]
161
162
163class TreePatternParser(object):
164    def __init__(self, tokenizer, wizard, adaptor):
165        self.tokenizer = tokenizer
166        self.wizard = wizard
167        self.adaptor = adaptor
168        self.ttype = tokenizer.nextToken() # kickstart
169
170
171    def pattern(self):
172        if self.ttype == BEGIN:
173            return self.parseTree()
174
175        elif self.ttype == ID:
176            node = self.parseNode()
177            if self.ttype == EOF:
178                return node
179
180            return None # extra junk on end
181
182        return None
183
184
185    def parseTree(self):
186        if self.ttype != BEGIN:
187            return None
188
189        self.ttype = self.tokenizer.nextToken()
190        root = self.parseNode()
191        if root is None:
192            return None
193
194        while self.ttype in (BEGIN, ID, PERCENT, DOT):
195            if self.ttype == BEGIN:
196                subtree = self.parseTree()
197                self.adaptor.addChild(root, subtree)
198
199            else:
200                child = self.parseNode()
201                if child is None:
202                    return None
203
204                self.adaptor.addChild(root, child)
205
206        if self.ttype != END:
207            return None
208
209        self.ttype = self.tokenizer.nextToken()
210        return root
211
212
213    def parseNode(self):
214        # "%label:" prefix
215        label = None
216       
217        if self.ttype == PERCENT:
218            self.ttype = self.tokenizer.nextToken()
219            if self.ttype != ID:
220                return None
221
222            label = self.tokenizer.sval
223            self.ttype = self.tokenizer.nextToken()
224            if self.ttype != COLON:
225                return None
226           
227            self.ttype = self.tokenizer.nextToken() # move to ID following colon
228
229        # Wildcard?
230        if self.ttype == DOT:
231            self.ttype = self.tokenizer.nextToken()
232            wildcardPayload = CommonToken(0, ".")
233            node = WildcardTreePattern(wildcardPayload)
234            if label is not None:
235                node.label = label
236            return node
237
238        # "ID" or "ID[arg]"
239        if self.ttype != ID:
240            return None
241
242        tokenName = self.tokenizer.sval
243        self.ttype = self.tokenizer.nextToken()
244       
245        if tokenName == "nil":
246            return self.adaptor.nil()
247
248        text = tokenName
249        # check for arg
250        arg = None
251        if self.ttype == ARG:
252            arg = self.tokenizer.sval
253            text = arg
254            self.ttype = self.tokenizer.nextToken()
255
256        # create node
257        treeNodeType = self.wizard.getTokenType(tokenName)
258        if treeNodeType == INVALID_TOKEN_TYPE:
259            return None
260
261        node = self.adaptor.createFromType(treeNodeType, text)
262        if label is not None and isinstance(node, TreePattern):
263            node.label = label
264
265        if arg is not None and isinstance(node, TreePattern):
266            node.hasTextArg = True
267
268        return node
269
270
271class TreePattern(CommonTree):
272    """
273    When using %label:TOKENNAME in a tree for parse(), we must
274    track the label.
275    """
276
277    def __init__(self, payload):
278        CommonTree.__init__(self, payload)
279
280        self.label = None
281        self.hasTextArg = None
282       
283
284    def toString(self):
285        if self.label is not None:
286            return '%' + self.label + ':' + CommonTree.toString(self)
287       
288        else:
289            return CommonTree.toString(self)
290
291
292class WildcardTreePattern(TreePattern):
293    pass
294
295
296class TreePatternTreeAdaptor(CommonTreeAdaptor):
297    """This adaptor creates TreePattern objects for use during scan()"""
298
299    def createWithPayload(self, payload):
300        return TreePattern(payload)
301
302
303class TreeWizard(object):
304    """
305    Build and navigate trees with this object.  Must know about the names
306    of tokens so you have to pass in a map or array of token names (from which
307    this class can build the map).  I.e., Token DECL means nothing unless the
308    class can translate it to a token type.
309
310    In order to create nodes and navigate, this class needs a TreeAdaptor.
311
312    This class can build a token type -> node index for repeated use or for
313    iterating over the various nodes with a particular type.
314
315    This class works in conjunction with the TreeAdaptor rather than moving
316    all this functionality into the adaptor.  An adaptor helps build and
317    navigate trees using methods.  This class helps you do it with string
318    patterns like "(A B C)".  You can create a tree from that pattern or
319    match subtrees against it.
320    """
321
322    def __init__(self, adaptor=None, tokenNames=None, typeMap=None):
323        self.adaptor = adaptor
324        if typeMap is None:
325            self.tokenNameToTypeMap = computeTokenTypes(tokenNames)
326
327        else:
328            if tokenNames is not None:
329                raise ValueError("Can't have both tokenNames and typeMap")
330
331            self.tokenNameToTypeMap = typeMap
332
333
334    def getTokenType(self, tokenName):
335        """Using the map of token names to token types, return the type."""
336
337        try:
338            return self.tokenNameToTypeMap[tokenName]
339        except KeyError:
340            return INVALID_TOKEN_TYPE
341
342
343    def create(self, pattern):
344        """
345        Create a tree or node from the indicated tree pattern that closely
346        follows ANTLR tree grammar tree element syntax:
347       
348        (root child1 ... child2).
349       
350        You can also just pass in a node: ID
351         
352        Any node can have a text argument: ID[foo]
353        (notice there are no quotes around foo--it's clear it's a string).
354       
355        nil is a special name meaning "give me a nil node".  Useful for
356        making lists: (nil A B C) is a list of A B C.
357        """
358       
359        tokenizer = TreePatternLexer(pattern)
360        parser = TreePatternParser(tokenizer, self, self.adaptor)
361        return parser.pattern()
362
363
364    def index(self, tree):
365        """Walk the entire tree and make a node name to nodes mapping.
366       
367        For now, use recursion but later nonrecursive version may be
368        more efficient.  Returns a dict int -> list where the list is
369        of your AST node type.  The int is the token type of the node.
370        """
371
372        m = {}
373        self._index(tree, m)
374        return m
375
376
377    def _index(self, t, m):
378        """Do the work for index"""
379
380        if t is None:
381            return
382
383        ttype = self.adaptor.getType(t)
384        elements = m.get(ttype)
385        if elements is None:
386            m[ttype] = elements = []
387
388        elements.append(t)
389        for i in range(self.adaptor.getChildCount(t)):
390            child = self.adaptor.getChild(t, i)
391            self._index(child, m)
392
393
394    def find(self, tree, what):
395        """Return a list of matching token.
396
397        what may either be an integer specifzing the token type to find or
398        a string with a pattern that must be matched.
399       
400        """
401       
402        if isinstance(what, (int, long)):
403            return self._findTokenType(tree, what)
404
405        elif isinstance(what, basestring):
406            return self._findPattern(tree, what)
407
408        else:
409            raise TypeError("'what' must be string or integer")
410
411
412    def _findTokenType(self, t, ttype):
413        """Return a List of tree nodes with token type ttype"""
414
415        nodes = []
416
417        def visitor(tree, parent, childIndex, labels):
418            nodes.append(tree)
419
420        self.visit(t, ttype, visitor)
421
422        return nodes
423
424
425    def _findPattern(self, t, pattern):
426        """Return a List of subtrees matching pattern."""
427       
428        subtrees = []
429       
430        # Create a TreePattern from the pattern
431        tokenizer = TreePatternLexer(pattern)
432        parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor())
433        tpattern = parser.pattern()
434       
435        # don't allow invalid patterns
436        if (tpattern is None or tpattern.isNil()
437            or isinstance(tpattern, WildcardTreePattern)):
438            return None
439
440        rootTokenType = tpattern.getType()
441
442        def visitor(tree, parent, childIndex, label):
443            if self._parse(tree, tpattern, None):
444                subtrees.append(tree)
445               
446        self.visit(t, rootTokenType, visitor)
447
448        return subtrees
449
450
451    def visit(self, tree, what, visitor):
452        """Visit every node in tree matching what, invoking the visitor.
453
454        If what is a string, it is parsed as a pattern and only matching
455        subtrees will be visited.
456        The implementation uses the root node of the pattern in combination
457        with visit(t, ttype, visitor) so nil-rooted patterns are not allowed.
458        Patterns with wildcard roots are also not allowed.
459
460        If what is an integer, it is used as a token type and visit will match
461        all nodes of that type (this is faster than the pattern match).
462        The labels arg of the visitor action method is never set (it's None)
463        since using a token type rather than a pattern doesn't let us set a
464        label.
465        """
466
467        if isinstance(what, (int, long)):
468            self._visitType(tree, None, 0, what, visitor)
469
470        elif isinstance(what, basestring):
471            self._visitPattern(tree, what, visitor)
472
473        else:
474            raise TypeError("'what' must be string or integer")
475       
476             
477    def _visitType(self, t, parent, childIndex, ttype, visitor):
478        """Do the recursive work for visit"""
479       
480        if t is None:
481            return
482
483        if self.adaptor.getType(t) == ttype:
484            visitor(t, parent, childIndex, None)
485
486        for i in range(self.adaptor.getChildCount(t)):
487            child = self.adaptor.getChild(t, i)
488            self._visitType(child, t, i, ttype, visitor)
489
490
491    def _visitPattern(self, tree, pattern, visitor):
492        """
493        For all subtrees that match the pattern, execute the visit action.
494        """
495
496        # Create a TreePattern from the pattern
497        tokenizer = TreePatternLexer(pattern)
498        parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor())
499        tpattern = parser.pattern()
500       
501        # don't allow invalid patterns
502        if (tpattern is None or tpattern.isNil()
503            or isinstance(tpattern, WildcardTreePattern)):
504            return
505
506        rootTokenType = tpattern.getType()
507
508        def rootvisitor(tree, parent, childIndex, labels):
509            labels = {}
510            if self._parse(tree, tpattern, labels):
511                visitor(tree, parent, childIndex, labels)
512               
513        self.visit(tree, rootTokenType, rootvisitor)
514       
515
516    def parse(self, t, pattern, labels=None):
517        """
518        Given a pattern like (ASSIGN %lhs:ID %rhs:.) with optional labels
519        on the various nodes and '.' (dot) as the node/subtree wildcard,
520        return true if the pattern matches and fill the labels Map with
521        the labels pointing at the appropriate nodes.  Return false if
522        the pattern is malformed or the tree does not match.
523
524        If a node specifies a text arg in pattern, then that must match
525        for that node in t.
526        """
527
528        tokenizer = TreePatternLexer(pattern)
529        parser = TreePatternParser(tokenizer, self, TreePatternTreeAdaptor())
530        tpattern = parser.pattern()
531
532        return self._parse(t, tpattern, labels)
533
534
535    def _parse(self, t1, tpattern, labels):
536        """
537        Do the work for parse. Check to see if the tpattern fits the
538        structure and token types in t1.  Check text if the pattern has
539        text arguments on nodes.  Fill labels map with pointers to nodes
540        in tree matched against nodes in pattern with labels.
541        """
542       
543        # make sure both are non-null
544        if t1 is None or tpattern is None:
545            return False
546
547        # check roots (wildcard matches anything)
548        if not isinstance(tpattern, WildcardTreePattern):
549            if self.adaptor.getType(t1) != tpattern.getType():
550                return False
551
552            # if pattern has text, check node text
553            if (tpattern.hasTextArg
554                and self.adaptor.getText(t1) != tpattern.getText()):
555                return False
556
557        if tpattern.label is not None and labels is not None:
558            # map label in pattern to node in t1
559            labels[tpattern.label] = t1
560
561        # check children
562        n1 = self.adaptor.getChildCount(t1)
563        n2 = tpattern.getChildCount()
564        if n1 != n2:
565            return False
566
567        for i in range(n1):
568            child1 = self.adaptor.getChild(t1, i)
569            child2 = tpattern.getChild(i)
570            if not self._parse(child1, child2, labels):
571                return False
572
573        return True
574
575
576    def equals(self, t1, t2, adaptor=None):
577        """
578        Compare t1 and t2; return true if token types/text, structure match
579        exactly.
580        The trees are examined in their entirety so that (A B) does not match
581        (A B C) nor (A (B C)).
582        """
583
584        if adaptor is None:
585            adaptor = self.adaptor
586
587        return self._equals(t1, t2, adaptor)
588
589
590    def _equals(self, t1, t2, adaptor):
591        # make sure both are non-null
592        if t1 is None or t2 is None:
593            return False
594
595        # check roots
596        if adaptor.getType(t1) != adaptor.getType(t2):
597            return False
598
599        if adaptor.getText(t1) != adaptor.getText(t2):
600            return False
601       
602        # check children
603        n1 = adaptor.getChildCount(t1)
604        n2 = adaptor.getChildCount(t2)
605        if n1 != n2:
606            return False
607
608        for i in range(n1):
609            child1 = adaptor.getChild(t1, i)
610            child2 = adaptor.getChild(t2, i)
611            if not self._equals(child1, child2, adaptor):
612                return False
613
614        return True
Note: See TracBrowser for help on using the repository browser.