Merged revisions 84364 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r84364 | benjamin.peterson | 2010-08-30 09:41:20 -0500 (Mon, 30 Aug 2010) | 1 line

  handle names starting with non-ascii characters correctly #9712
........
This commit is contained in:
Benjamin Peterson 2010-08-30 14:44:53 +00:00
parent f8a08d9d36
commit 66428b2e5d
3 changed files with 25 additions and 5 deletions

View file

@ -533,6 +533,7 @@ pass the '-ucompiler' option to process the full directory.
True True
Evil tabs Evil tabs
>>> dump_tokens("def f():\\n\\tif x\\n \\tpass") >>> dump_tokens("def f():\\n\\tif x\\n \\tpass")
ENCODING 'utf-8' (0, 0) (0, 0) ENCODING 'utf-8' (0, 0) (0, 0)
NAME 'def' (1, 0) (1, 3) NAME 'def' (1, 0) (1, 3)
@ -549,6 +550,18 @@ Evil tabs
NAME 'pass' (3, 9) (3, 13) NAME 'pass' (3, 9) (3, 13)
DEDENT '' (4, 0) (4, 0) DEDENT '' (4, 0) (4, 0)
DEDENT '' (4, 0) (4, 0) DEDENT '' (4, 0) (4, 0)
Non-ascii identifiers
>>> dump_tokens("Örter = 'places'\\ngrün = 'green'")
ENCODING 'utf-8' (0, 0) (0, 0)
NAME 'Örter' (1, 0) (1, 5)
OP '=' (1, 6) (1, 7)
STRING "'places'" (1, 8) (1, 16)
NEWLINE '\\n' (1, 16) (1, 17)
NAME 'grün' (2, 0) (2, 4)
OP '=' (2, 5) (2, 6)
STRING "'green'" (2, 7) (2, 14)
""" """
from test import support from test import support

View file

@ -92,7 +92,7 @@ def maybe(*choices): return group(*choices) + '?'
Whitespace = r'[ \f\t]*' Whitespace = r'[ \f\t]*'
Comment = r'#[^\r\n]*' Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Name = r'[a-zA-Z_]\w*' Name = r'\w+'
Hexnumber = r'0[xX][0-9a-fA-F]+' Hexnumber = r'0[xX][0-9a-fA-F]+'
Binnumber = r'0[bB][01]+' Binnumber = r'0[bB][01]+'
@ -142,9 +142,12 @@ ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
PseudoExtras = group(r'\\\r?\n', Comment, Triple) PseudoExtras = group(r'\\\r?\n', Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
def _compile(expr):
return re.compile(expr, re.UNICODE)
tokenprog, pseudoprog, single3prog, double3prog = map( tokenprog, pseudoprog, single3prog, double3prog = map(
re.compile, (Token, PseudoToken, Single3, Double3)) _compile, (Token, PseudoToken, Single3, Double3))
endprogs = {"'": re.compile(Single), '"': re.compile(Double), endprogs = {"'": _compile(Single), '"': _compile(Double),
"'''": single3prog, '"""': double3prog, "'''": single3prog, '"""': double3prog,
"r'''": single3prog, 'r"""': double3prog, "r'''": single3prog, 'r"""': double3prog,
"b'''": single3prog, 'b"""': double3prog, "b'''": single3prog, 'b"""': double3prog,
@ -171,6 +174,8 @@ for t in ("'", '"',
"bR'", 'bR"', "BR'", 'BR"' ): "bR'", 'bR"', "BR'", 'BR"' ):
single_quoted[t] = t single_quoted[t] = t
del _compile
tabsize = 8 tabsize = 8
class TokenError(Exception): pass class TokenError(Exception): pass
@ -392,7 +397,7 @@ def tokenize(readline):
def _tokenize(readline, encoding): def _tokenize(readline, encoding):
lnum = parenlev = continued = 0 lnum = parenlev = continued = 0
namechars, numchars = string.ascii_letters + '_', '0123456789' numchars = '0123456789'
contstr, needcont = '', 0 contstr, needcont = '', 0
contline = None contline = None
indents = [0] indents = [0]
@ -516,7 +521,7 @@ def _tokenize(readline, encoding):
break break
else: # ordinary string else: # ordinary string
yield TokenInfo(STRING, token, spos, epos, line) yield TokenInfo(STRING, token, spos, epos, line)
elif initial in namechars: # ordinary name elif initial.isidentifier(): # ordinary name
yield TokenInfo(NAME, token, spos, epos, line) yield TokenInfo(NAME, token, spos, epos, line)
elif initial == '\\': # continued stmt elif initial == '\\': # continued stmt
continued = 1 continued = 1

View file

@ -14,6 +14,8 @@ Core and Builtins
- Restore GIL in nis_cat in case of error. - Restore GIL in nis_cat in case of error.
- Issue #9712: Fix tokenize on identifiers that start with non-ascii names.
- Issue #9688: __basicsize__ and __itemsize__ must be accessed as Py_ssize_t. - Issue #9688: __basicsize__ and __itemsize__ must be accessed as Py_ssize_t.
- Issue #5319: Print an error if flushing stdout fails at interpreter - Issue #5319: Print an error if flushing stdout fails at interpreter