mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 07:48:51 +00:00 
			
		
		
		
	 f37eb3a184
			
		
	
	
		f37eb3a184
		
	
	
	
	
		
			
			svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3 ........ r83852 | benjamin.peterson | 2010-08-08 15:45:44 -0500 (Sun, 08 Aug 2010) | 1 line wrap with parens ........ r83853 | benjamin.peterson | 2010-08-08 15:46:31 -0500 (Sun, 08 Aug 2010) | 1 line use parens ........ r83857 | benjamin.peterson | 2010-08-08 15:59:49 -0500 (Sun, 08 Aug 2010) | 1 line things which use touch_import should be pre order ........ r84042 | george.boutsioukis | 2010-08-14 16:10:19 -0500 (Sat, 14 Aug 2010) | 2 lines This revision incorporates into the 2to3 tool the new, faster, tree matching algorithm developed during a GSOC project. The algorithm resides in the two added modules, btm_matcher and btm_utils. New code has been added to drive the new matching process in refactor.py and a few minor changes were made in other modules. A BM_compatible flag(False by default) has been added in fixer_base and it is set to True in most of the current fixers. ........ r84216 | benjamin.peterson | 2010-08-19 16:44:05 -0500 (Thu, 19 Aug 2010) | 1 line allow star_expr in testlist_gexp ........ r84274 | benjamin.peterson | 2010-08-22 18:40:46 -0500 (Sun, 22 Aug 2010) | 1 line wrap long line ........ r84275 | benjamin.peterson | 2010-08-22 18:42:22 -0500 (Sun, 22 Aug 2010) | 1 line cleanup ........ r84276 | benjamin.peterson | 2010-08-22 18:51:01 -0500 (Sun, 22 Aug 2010) | 1 line when there's a None value and a traceback, don't call type with it #9661 ........ r84375 | george.boutsioukis | 2010-08-31 08:38:53 -0500 (Tue, 31 Aug 2010) | 3 lines Idiomatic code changes & stylistic issues fixed in the BottomMatcher module. Thanks to Benjamin Peterson for taking the time to review the code. ........ r85388 | benjamin.peterson | 2010-10-12 17:27:44 -0500 (Tue, 12 Oct 2010) | 1 line fix urllib fixer with multiple as imports on a line #10069 ........ r85478 | benjamin.peterson | 2010-10-14 08:09:56 -0500 (Thu, 14 Oct 2010) | 1 line stop abusing docstrings ........ r85506 | benjamin.peterson | 2010-10-14 17:45:19 -0500 (Thu, 14 Oct 2010) | 1 line kill sibling import ........ r85507 | benjamin.peterson | 2010-10-14 17:54:15 -0500 (Thu, 14 Oct 2010) | 1 line remove trailing whitespace ........ r85508 | benjamin.peterson | 2010-10-14 17:55:28 -0500 (Thu, 14 Oct 2010) | 1 line typo ........
		
			
				
	
	
		
			168 lines
		
	
	
	
		
			6.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			168 lines
		
	
	
	
		
			6.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """A bottom-up tree matching algorithm implementation meant to speed
 | |
| up 2to3's matching process. After the tree patterns are reduced to
 | |
| their rarest linear path, a linear Aho-Corasick automaton is
 | |
| created. The linear automaton traverses the linear paths from the
 | |
| leaves to the root of the AST and returns a set of nodes for further
 | |
| matching. This reduces significantly the number of candidate nodes."""
 | |
| 
 | |
| __author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
 | |
| 
 | |
| import logging
 | |
| import itertools
 | |
| from collections import defaultdict
 | |
| 
 | |
| from . import pytree
 | |
| from .btm_utils import reduce_tree
 | |
| 
 | |
| class BMNode(object):
 | |
|     """Class for a node of the Aho-Corasick automaton used in matching"""
 | |
|     count = itertools.count()
 | |
|     def __init__(self):
 | |
|         self.transition_table = {}
 | |
|         self.fixers = []
 | |
|         self.id = next(BMNode.count)
 | |
|         self.content = ''
 | |
| 
 | |
| class BottomMatcher(object):
 | |
|     """The main matcher class. After instantiating the patterns should
 | |
|     be added using the add_fixer method"""
 | |
| 
 | |
|     def __init__(self):
 | |
|         self.match = set()
 | |
|         self.root = BMNode()
 | |
|         self.nodes = [self.root]
 | |
|         self.fixers = []
 | |
|         self.logger = logging.getLogger("RefactoringTool")
 | |
| 
 | |
|     def add_fixer(self, fixer):
 | |
|         """Reduces a fixer's pattern tree to a linear path and adds it
 | |
|         to the matcher(a common Aho-Corasick automaton). The fixer is
 | |
|         appended on the matching states and called when they are
 | |
|         reached"""
 | |
|         self.fixers.append(fixer)
 | |
|         tree = reduce_tree(fixer.pattern_tree)
 | |
|         linear = tree.get_linear_subpattern()
 | |
|         match_nodes = self.add(linear, start=self.root)
 | |
|         for match_node in match_nodes:
 | |
|             match_node.fixers.append(fixer)
 | |
| 
 | |
|     def add(self, pattern, start):
 | |
|         "Recursively adds a linear pattern to the AC automaton"
 | |
|         #print("adding pattern", pattern, "to", start)
 | |
|         if not pattern:
 | |
|             #print("empty pattern")
 | |
|             return [start]
 | |
|         if isinstance(pattern[0], tuple):
 | |
|             #alternatives
 | |
|             #print("alternatives")
 | |
|             match_nodes = []
 | |
|             for alternative in pattern[0]:
 | |
|                 #add all alternatives, and add the rest of the pattern
 | |
|                 #to each end node
 | |
|                 end_nodes = self.add(alternative, start=start)
 | |
|                 for end in end_nodes:
 | |
|                     match_nodes.extend(self.add(pattern[1:], end))
 | |
|             return match_nodes
 | |
|         else:
 | |
|             #single token
 | |
|             #not last
 | |
|             if pattern[0] not in start.transition_table:
 | |
|                 #transition did not exist, create new
 | |
|                 next_node = BMNode()
 | |
|                 start.transition_table[pattern[0]] = next_node
 | |
|             else:
 | |
|                 #transition exists already, follow
 | |
|                 next_node = start.transition_table[pattern[0]]
 | |
| 
 | |
|             if pattern[1:]:
 | |
|                 end_nodes = self.add(pattern[1:], start=next_node)
 | |
|             else:
 | |
|                 end_nodes = [next_node]
 | |
|             return end_nodes
 | |
| 
 | |
|     def run(self, leaves):
 | |
|         """The main interface with the bottom matcher. The tree is
 | |
|         traversed from the bottom using the constructed
 | |
|         automaton. Nodes are only checked once as the tree is
 | |
|         retraversed. When the automaton fails, we give it one more
 | |
|         shot(in case the above tree matches as a whole with the
 | |
|         rejected leaf), then we break for the next leaf. There is the
 | |
|         special case of multiple arguments(see code comments) where we
 | |
|         recheck the nodes
 | |
| 
 | |
|         Args:
 | |
|            The leaves of the AST tree to be matched
 | |
| 
 | |
|         Returns:
 | |
|            A dictionary of node matches with fixers as the keys
 | |
|         """
 | |
|         current_ac_node = self.root
 | |
|         results = defaultdict(list)
 | |
|         for leaf in leaves:
 | |
|             current_ast_node = leaf
 | |
|             while current_ast_node:
 | |
|                 current_ast_node.was_checked = True
 | |
|                 for child in current_ast_node.children:
 | |
|                     # multiple statements, recheck
 | |
|                     if isinstance(child, pytree.Leaf) and child.value == ";":
 | |
|                         current_ast_node.was_checked = False
 | |
|                         break
 | |
|                 if current_ast_node.type == 1:
 | |
|                     #name
 | |
|                     node_token = current_ast_node.value
 | |
|                 else:
 | |
|                     node_token = current_ast_node.type
 | |
| 
 | |
|                 if node_token in current_ac_node.transition_table:
 | |
|                     #token matches
 | |
|                     current_ac_node = current_ac_node.transition_table[node_token]
 | |
|                     for fixer in current_ac_node.fixers:
 | |
|                         if not fixer in results:
 | |
|                             results[fixer] = []
 | |
|                         results[fixer].append(current_ast_node)
 | |
| 
 | |
|                 else:
 | |
|                     #matching failed, reset automaton
 | |
|                     current_ac_node = self.root
 | |
|                     if (current_ast_node.parent is not None
 | |
|                         and current_ast_node.parent.was_checked):
 | |
|                         #the rest of the tree upwards has been checked, next leaf
 | |
|                         break
 | |
| 
 | |
|                     #recheck the rejected node once from the root
 | |
|                     if node_token in current_ac_node.transition_table:
 | |
|                         #token matches
 | |
|                         current_ac_node = current_ac_node.transition_table[node_token]
 | |
|                         for fixer in current_ac_node.fixers:
 | |
|                             if not fixer in results.keys():
 | |
|                                 results[fixer] = []
 | |
|                             results[fixer].append(current_ast_node)
 | |
| 
 | |
|                 current_ast_node = current_ast_node.parent
 | |
|         return results
 | |
| 
 | |
|     def print_ac(self):
 | |
|         "Prints a graphviz diagram of the BM automaton(for debugging)"
 | |
|         print("digraph g{")
 | |
|         def print_node(node):
 | |
|             for subnode_key in node.transition_table.keys():
 | |
|                 subnode = node.transition_table[subnode_key]
 | |
|                 print("%d -> %d [label=%s] //%s" %
 | |
|                       (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
 | |
|                 if subnode_key == 1:
 | |
|                     print(subnode.content)
 | |
|                 print_node(subnode)
 | |
|         print_node(self.root)
 | |
|         print("}")
 | |
| 
 | |
| # taken from pytree.py for debugging; only used by print_ac
 | |
| _type_reprs = {}
 | |
| def type_repr(type_num):
 | |
|     global _type_reprs
 | |
|     if not _type_reprs:
 | |
|         from .pygram import python_symbols
 | |
|         # printing tokens is possible but not as useful
 | |
|         # from .pgen2 import token // token.__dict__.items():
 | |
|         for name, val in python_symbols.__dict__.items():
 | |
|             if type(val) == int: _type_reprs[val] = name
 | |
|     return _type_reprs.setdefault(type_num, type_num)
 |