mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 02:15:10 +00:00 
			
		
		
		
	 3059b00f65
			
		
	
	
		3059b00f65
		
	
	
	
	
		
			
			svn+ssh://pythondev@svn.python.org/python/trunk
................
  r74114 | benjamin.peterson | 2009-07-20 10:33:09 -0500 (Mon, 20 Jul 2009) | 110 lines
  Merged revisions 73771,73811,73840,73842,73848-73849,73861,73957-73960,73964-73969,73972-73974,73977,73981,73984,74065,74113 via svnmerge from
  svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3
  ........
    r73771 | benjamin.peterson | 2009-07-02 10:56:55 -0500 (Thu, 02 Jul 2009) | 1 line
    force the imports fixer to be run after the import one #6400
  ........
    r73811 | benjamin.peterson | 2009-07-03 09:03:14 -0500 (Fri, 03 Jul 2009) | 1 line
    check for sep, not pathsep when looking for a subpackage #6408
  ........
    r73840 | benjamin.peterson | 2009-07-04 09:52:28 -0500 (Sat, 04 Jul 2009) | 1 line
    don't print diffs by default; it's annoying
  ........
    r73842 | benjamin.peterson | 2009-07-04 09:58:46 -0500 (Sat, 04 Jul 2009) | 1 line
    complain when not showing diffs or writing
  ........
    r73848 | alexandre.vassalotti | 2009-07-04 23:38:19 -0500 (Sat, 04 Jul 2009) | 2 lines
    Fix test_refactor_stdin to handle print_output() method with 4 arguments.
  ........
    r73849 | alexandre.vassalotti | 2009-07-04 23:43:18 -0500 (Sat, 04 Jul 2009) | 5 lines
    Issue 2370: Add fixer for the removal of operator.isCallable() and
    operator.sequenceIncludes().
    Patch contributed by Jeff Balogh (and updated by me).
  ........
    r73861 | benjamin.peterson | 2009-07-05 09:15:53 -0500 (Sun, 05 Jul 2009) | 1 line
    cleanup and use unicode where appropiate
  ........
    r73957 | benjamin.peterson | 2009-07-11 15:49:56 -0500 (Sat, 11 Jul 2009) | 1 line
    fix calls to str() with unicode()
  ........
    r73958 | benjamin.peterson | 2009-07-11 15:51:51 -0500 (Sat, 11 Jul 2009) | 1 line
    more str() -> unicode()
  ........
    r73959 | benjamin.peterson | 2009-07-11 16:40:08 -0500 (Sat, 11 Jul 2009) | 1 line
    add tests for refactor_dir()
  ........
    r73960 | benjamin.peterson | 2009-07-11 16:44:32 -0500 (Sat, 11 Jul 2009) | 1 line
    don't parse files just because they end with 'py' (no dot)
  ........
    r73964 | benjamin.peterson | 2009-07-11 17:30:15 -0500 (Sat, 11 Jul 2009) | 1 line
    simplify
  ........
    r73965 | benjamin.peterson | 2009-07-11 17:31:30 -0500 (Sat, 11 Jul 2009) | 1 line
    remove usage of get_prefix()
  ........
    r73966 | benjamin.peterson | 2009-07-11 17:33:35 -0500 (Sat, 11 Jul 2009) | 1 line
    revert unintended change in 73965
  ........
    r73967 | benjamin.peterson | 2009-07-11 17:34:44 -0500 (Sat, 11 Jul 2009) | 1 line
    avoid expensive checks and assume the node did change
  ........
    r73968 | benjamin.peterson | 2009-07-11 20:46:46 -0500 (Sat, 11 Jul 2009) | 1 line
    use a regular dict for the heads to avoid adding lists in the loop
  ........
    r73969 | benjamin.peterson | 2009-07-11 20:50:43 -0500 (Sat, 11 Jul 2009) | 1 line
    prefix headnode functions with '_'
  ........
    r73972 | benjamin.peterson | 2009-07-11 21:25:45 -0500 (Sat, 11 Jul 2009) | 1 line
    try to make the head node dict as sparse as possible
  ........
    r73973 | benjamin.peterson | 2009-07-11 21:59:49 -0500 (Sat, 11 Jul 2009) | 1 line
    a better idea; add an option to *not* print diffs
  ........
    r73974 | benjamin.peterson | 2009-07-11 22:00:29 -0500 (Sat, 11 Jul 2009) | 1 line
    add space
  ........
    r73977 | benjamin.peterson | 2009-07-12 10:16:07 -0500 (Sun, 12 Jul 2009) | 1 line
    update get_headnode_dict tests for recent changes
  ........
    r73981 | benjamin.peterson | 2009-07-12 12:06:39 -0500 (Sun, 12 Jul 2009) | 4 lines
    detect when "from __future__ import print_function" is given
    Deprecate the 'print_function' option and the -p flag
  ........
    r73984 | benjamin.peterson | 2009-07-12 16:16:37 -0500 (Sun, 12 Jul 2009) | 1 line
    add tests for Call; thanks Joe Amenta
  ........
    r74065 | benjamin.peterson | 2009-07-17 12:52:49 -0500 (Fri, 17 Jul 2009) | 1 line
    pathname2url and url2pathname are in urllib.request not urllib.parse #6496
  ........
    r74113 | benjamin.peterson | 2009-07-20 08:56:57 -0500 (Mon, 20 Jul 2009) | 1 line
    fix deprecation warnings in tests
  ........
................
		
	
			
		
			
				
	
	
		
			184 lines
		
	
	
	
		
			5.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			184 lines
		
	
	
	
		
			5.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
 | |
| # Licensed to PSF under a Contributor Agreement.
 | |
| 
 | |
| """This module defines the data structures used to represent a grammar.
 | |
| 
 | |
| These are a bit arcane because they are derived from the data
 | |
| structures used by Python's 'pgen' parser generator.
 | |
| 
 | |
| There's also a table here mapping operators to their names in the
 | |
| token module; the Python tokenize module reports all operators as the
 | |
| fallback token code OP, but the parser needs the actual token code.
 | |
| 
 | |
| """
 | |
| 
 | |
| # Python imports
 | |
| import pickle
 | |
| 
 | |
| # Local imports
 | |
| from . import token, tokenize
 | |
| 
 | |
| 
 | |
| class Grammar(object):
 | |
|     """Pgen parsing tables tables conversion class.
 | |
| 
 | |
|     Once initialized, this class supplies the grammar tables for the
 | |
|     parsing engine implemented by parse.py.  The parsing engine
 | |
|     accesses the instance variables directly.  The class here does not
 | |
|     provide initialization of the tables; several subclasses exist to
 | |
|     do this (see the conv and pgen modules).
 | |
| 
 | |
|     The load() method reads the tables from a pickle file, which is
 | |
|     much faster than the other ways offered by subclasses.  The pickle
 | |
|     file is written by calling dump() (after loading the grammar
 | |
|     tables using a subclass).  The report() method prints a readable
 | |
|     representation of the tables to stdout, for debugging.
 | |
| 
 | |
|     The instance variables are as follows:
 | |
| 
 | |
|     symbol2number -- a dict mapping symbol names to numbers.  Symbol
 | |
|                      numbers are always 256 or higher, to distinguish
 | |
|                      them from token numbers, which are between 0 and
 | |
|                      255 (inclusive).
 | |
| 
 | |
|     number2symbol -- a dict mapping numbers to symbol names;
 | |
|                      these two are each other's inverse.
 | |
| 
 | |
|     states        -- a list of DFAs, where each DFA is a list of
 | |
|                      states, each state is is a list of arcs, and each
 | |
|                      arc is a (i, j) pair where i is a label and j is
 | |
|                      a state number.  The DFA number is the index into
 | |
|                      this list.  (This name is slightly confusing.)
 | |
|                      Final states are represented by a special arc of
 | |
|                      the form (0, j) where j is its own state number.
 | |
| 
 | |
|     dfas          -- a dict mapping symbol numbers to (DFA, first)
 | |
|                      pairs, where DFA is an item from the states list
 | |
|                      above, and first is a set of tokens that can
 | |
|                      begin this grammar rule (represented by a dict
 | |
|                      whose values are always 1).
 | |
| 
 | |
|     labels        -- a list of (x, y) pairs where x is either a token
 | |
|                      number or a symbol number, and y is either None
 | |
|                      or a string; the strings are keywords.  The label
 | |
|                      number is the index in this list; label numbers
 | |
|                      are used to mark state transitions (arcs) in the
 | |
|                      DFAs.
 | |
| 
 | |
|     start         -- the number of the grammar's start symbol.
 | |
| 
 | |
|     keywords      -- a dict mapping keyword strings to arc labels.
 | |
| 
 | |
|     tokens        -- a dict mapping token numbers to arc labels.
 | |
| 
 | |
|     """
 | |
| 
 | |
|     def __init__(self):
 | |
|         self.symbol2number = {}
 | |
|         self.number2symbol = {}
 | |
|         self.states = []
 | |
|         self.dfas = {}
 | |
|         self.labels = [(0, "EMPTY")]
 | |
|         self.keywords = {}
 | |
|         self.tokens = {}
 | |
|         self.symbol2label = {}
 | |
|         self.start = 256
 | |
| 
 | |
|     def dump(self, filename):
 | |
|         """Dump the grammar tables to a pickle file."""
 | |
|         f = open(filename, "wb")
 | |
|         pickle.dump(self.__dict__, f, 2)
 | |
|         f.close()
 | |
| 
 | |
|     def load(self, filename):
 | |
|         """Load the grammar tables from a pickle file."""
 | |
|         f = open(filename, "rb")
 | |
|         d = pickle.load(f)
 | |
|         f.close()
 | |
|         self.__dict__.update(d)
 | |
| 
 | |
|     def copy(self):
 | |
|         """
 | |
|         Copy the grammar.
 | |
|         """
 | |
|         new = self.__class__()
 | |
|         for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",
 | |
|                           "tokens", "symbol2label"):
 | |
|             setattr(new, dict_attr, getattr(self, dict_attr).copy())
 | |
|         new.labels = self.labels[:]
 | |
|         new.states = self.states[:]
 | |
|         new.start = self.start
 | |
|         return new
 | |
| 
 | |
|     def report(self):
 | |
|         """Dump the grammar tables to standard output, for debugging."""
 | |
|         from pprint import pprint
 | |
|         print("s2n")
 | |
|         pprint(self.symbol2number)
 | |
|         print("n2s")
 | |
|         pprint(self.number2symbol)
 | |
|         print("states")
 | |
|         pprint(self.states)
 | |
|         print("dfas")
 | |
|         pprint(self.dfas)
 | |
|         print("labels")
 | |
|         pprint(self.labels)
 | |
|         print("start", self.start)
 | |
| 
 | |
| 
 | |
| # Map from operator to number (since tokenize doesn't do this)
 | |
| 
 | |
| opmap_raw = """
 | |
| ( LPAR
 | |
| ) RPAR
 | |
| [ LSQB
 | |
| ] RSQB
 | |
| : COLON
 | |
| , COMMA
 | |
| ; SEMI
 | |
| + PLUS
 | |
| - MINUS
 | |
| * STAR
 | |
| / SLASH
 | |
| | VBAR
 | |
| & AMPER
 | |
| < LESS
 | |
| > GREATER
 | |
| = EQUAL
 | |
| . DOT
 | |
| % PERCENT
 | |
| ` BACKQUOTE
 | |
| { LBRACE
 | |
| } RBRACE
 | |
| @ AT
 | |
| == EQEQUAL
 | |
| != NOTEQUAL
 | |
| <> NOTEQUAL
 | |
| <= LESSEQUAL
 | |
| >= GREATEREQUAL
 | |
| ~ TILDE
 | |
| ^ CIRCUMFLEX
 | |
| << LEFTSHIFT
 | |
| >> RIGHTSHIFT
 | |
| ** DOUBLESTAR
 | |
| += PLUSEQUAL
 | |
| -= MINEQUAL
 | |
| *= STAREQUAL
 | |
| /= SLASHEQUAL
 | |
| %= PERCENTEQUAL
 | |
| &= AMPEREQUAL
 | |
| |= VBAREQUAL
 | |
| ^= CIRCUMFLEXEQUAL
 | |
| <<= LEFTSHIFTEQUAL
 | |
| >>= RIGHTSHIFTEQUAL
 | |
| **= DOUBLESTAREQUAL
 | |
| // DOUBLESLASH
 | |
| //= DOUBLESLASHEQUAL
 | |
| -> RARROW
 | |
| """
 | |
| 
 | |
| opmap = {}
 | |
| for line in opmap_raw.splitlines():
 | |
|     if line:
 | |
|         op, name = line.split()
 | |
|         opmap[op] = getattr(token, name)
 |