mirror of
https://github.com/python/cpython.git
synced 2025-08-02 16:13:13 +00:00
Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
This commit is contained in:
parent
2a8b3f26b9
commit
def0a4c298
3 changed files with 64 additions and 7 deletions
|
@ -1,25 +1,43 @@
|
||||||
"""Fixer that changes unicode to str, unichr to chr, and u"..." into "...".
|
r"""Fixer for unicode.
|
||||||
|
|
||||||
|
* Changes unicode to str and unichr to chr.
|
||||||
|
|
||||||
|
* If "...\u..." is not unicode literal change it into "...\\u...".
|
||||||
|
|
||||||
|
* Change u"..." into "...".
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
|
||||||
from ..pgen2 import token
|
from ..pgen2 import token
|
||||||
from .. import fixer_base
|
from .. import fixer_base
|
||||||
|
|
||||||
_mapping = {"unichr" : "chr", "unicode" : "str"}
|
_mapping = {"unichr" : "chr", "unicode" : "str"}
|
||||||
_literal_re = re.compile(r"[uU][rR]?[\'\"]")
|
|
||||||
|
|
||||||
class FixUnicode(fixer_base.BaseFix):
|
class FixUnicode(fixer_base.BaseFix):
|
||||||
BM_compatible = True
|
BM_compatible = True
|
||||||
PATTERN = "STRING | 'unicode' | 'unichr'"
|
PATTERN = "STRING | 'unicode' | 'unichr'"
|
||||||
|
|
||||||
|
def start_tree(self, tree, filename):
|
||||||
|
super(FixUnicode, self).start_tree(tree, filename)
|
||||||
|
self.unicode_literals = 'unicode_literals' in tree.future_features
|
||||||
|
|
||||||
def transform(self, node, results):
|
def transform(self, node, results):
|
||||||
if node.type == token.NAME:
|
if node.type == token.NAME:
|
||||||
new = node.clone()
|
new = node.clone()
|
||||||
new.value = _mapping[node.value]
|
new.value = _mapping[node.value]
|
||||||
return new
|
return new
|
||||||
elif node.type == token.STRING:
|
elif node.type == token.STRING:
|
||||||
if _literal_re.match(node.value):
|
val = node.value
|
||||||
|
if (not self.unicode_literals and val[0] in 'rR\'"' and
|
||||||
|
'\\' in val):
|
||||||
|
val = r'\\'.join([
|
||||||
|
v.replace('\\u', r'\\u').replace('\\U', r'\\U')
|
||||||
|
for v in val.split(r'\\')
|
||||||
|
])
|
||||||
|
if val[0] in 'uU':
|
||||||
|
val = val[1:]
|
||||||
|
if val == node.value:
|
||||||
|
return node
|
||||||
new = node.clone()
|
new = node.clone()
|
||||||
new.value = new.value[1:]
|
new.value = val
|
||||||
return new
|
return new
|
||||||
|
|
|
@ -2824,6 +2824,43 @@ class Test_unicode(FixerTestCase):
|
||||||
a = """R'''x''' """
|
a = """R'''x''' """
|
||||||
self.check(b, a)
|
self.check(b, a)
|
||||||
|
|
||||||
|
def test_native_literal_escape_u(self):
|
||||||
|
b = r"""'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
a = r"""'\\\\u20ac\\U0001d121\\u20ac'"""
|
||||||
|
self.check(b, a)
|
||||||
|
|
||||||
|
b = r"""r'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
a = r"""r'\\\\u20ac\\U0001d121\\u20ac'"""
|
||||||
|
self.check(b, a)
|
||||||
|
|
||||||
|
def test_bytes_literal_escape_u(self):
|
||||||
|
b = r"""b'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
a = r"""b'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
self.check(b, a)
|
||||||
|
|
||||||
|
b = r"""br'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
a = r"""br'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
self.check(b, a)
|
||||||
|
|
||||||
|
def test_unicode_literal_escape_u(self):
|
||||||
|
b = r"""u'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
a = r"""'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
self.check(b, a)
|
||||||
|
|
||||||
|
b = r"""ur'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
a = r"""r'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
self.check(b, a)
|
||||||
|
|
||||||
|
def test_native_unicode_literal_escape_u(self):
|
||||||
|
f = 'from __future__ import unicode_literals\n'
|
||||||
|
b = f + r"""'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
a = f + r"""'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
self.check(b, a)
|
||||||
|
|
||||||
|
b = f + r"""r'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
a = f + r"""r'\\\u20ac\U0001d121\\u20ac'"""
|
||||||
|
self.check(b, a)
|
||||||
|
|
||||||
class Test_callable(FixerTestCase):
|
class Test_callable(FixerTestCase):
|
||||||
fixer = "callable"
|
fixer = "callable"
|
||||||
|
|
||||||
|
|
|
@ -71,6 +71,8 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #18037: 2to3 now escapes '\u' and '\U' in native strings.
|
||||||
|
|
||||||
- Issue #19137: The pprint module now correctly formats instances of set and
|
- Issue #19137: The pprint module now correctly formats instances of set and
|
||||||
frozenset subclasses.
|
frozenset subclasses.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue