mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
bpo-29104: Fixed parsing backslashes in f-strings. (#490)
This commit is contained in:
parent
d1c3c13fed
commit
0cd7a3f196
3 changed files with 48 additions and 21 deletions
|
@ -361,6 +361,20 @@ f'{a * x()}'"""
|
||||||
self.assertEqual(f'2\x203', '2 3')
|
self.assertEqual(f'2\x203', '2 3')
|
||||||
self.assertEqual(f'\x203', ' 3')
|
self.assertEqual(f'\x203', ' 3')
|
||||||
|
|
||||||
|
with self.assertWarns(DeprecationWarning): # invalid escape sequence
|
||||||
|
value = eval(r"f'\{6*7}'")
|
||||||
|
self.assertEqual(value, '\\42')
|
||||||
|
self.assertEqual(f'\\{6*7}', '\\42')
|
||||||
|
self.assertEqual(fr'\{6*7}', '\\42')
|
||||||
|
|
||||||
|
AMPERSAND = 'spam'
|
||||||
|
# Get the right unicode character (&), or pick up local variable
|
||||||
|
# depending on the number of backslashes.
|
||||||
|
self.assertEqual(f'\N{AMPERSAND}', '&')
|
||||||
|
self.assertEqual(f'\\N{AMPERSAND}', '\\Nspam')
|
||||||
|
self.assertEqual(fr'\N{AMPERSAND}', '\\Nspam')
|
||||||
|
self.assertEqual(f'\\\N{AMPERSAND}', '\\&')
|
||||||
|
|
||||||
def test_misformed_unicode_character_name(self):
|
def test_misformed_unicode_character_name(self):
|
||||||
# These test are needed because unicode names are parsed
|
# These test are needed because unicode names are parsed
|
||||||
# differently inside f-strings.
|
# differently inside f-strings.
|
||||||
|
|
|
@ -10,6 +10,8 @@ What's New in Python 3.7.0 alpha 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- bpo-29104: Fixed parsing backslashes in f-strings.
|
||||||
|
|
||||||
- bpo-27945: Fixed various segfaults with dict when input collections are
|
- bpo-27945: Fixed various segfaults with dict when input collections are
|
||||||
mutated during searching, inserting or comparing. Based on patches by
|
mutated during searching, inserting or comparing. Based on patches by
|
||||||
Duane Griffin and Tim Mitchell.
|
Duane Griffin and Tim Mitchell.
|
||||||
|
|
53
Python/ast.c
53
Python/ast.c
|
@ -4197,9 +4197,11 @@ decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
|
||||||
while (s < end) {
|
while (s < end) {
|
||||||
if (*s == '\\') {
|
if (*s == '\\') {
|
||||||
*p++ = *s++;
|
*p++ = *s++;
|
||||||
if (*s & 0x80) {
|
if (s >= end || *s & 0x80) {
|
||||||
strcpy(p, "u005c");
|
strcpy(p, "u005c");
|
||||||
p += 5;
|
p += 5;
|
||||||
|
if (s >= end)
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (*s & 0x80) { /* XXX inefficient */
|
if (*s & 0x80) { /* XXX inefficient */
|
||||||
|
@ -4352,30 +4354,37 @@ fstring_find_literal(const char **str, const char *end, int raw,
|
||||||
brace (which isn't part of a unicode name escape such as
|
brace (which isn't part of a unicode name escape such as
|
||||||
"\N{EULER CONSTANT}"), or the end of the string. */
|
"\N{EULER CONSTANT}"), or the end of the string. */
|
||||||
|
|
||||||
const char *literal_start = *str;
|
const char *s = *str;
|
||||||
const char *literal_end;
|
const char *literal_start = s;
|
||||||
int in_named_escape = 0;
|
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
|
||||||
assert(*literal == NULL);
|
assert(*literal == NULL);
|
||||||
for (; *str < end; (*str)++) {
|
while (s < end) {
|
||||||
char ch = **str;
|
char ch = *s++;
|
||||||
if (!in_named_escape && ch == '{' && (*str)-literal_start >= 2 &&
|
if (!raw && ch == '\\' && s < end) {
|
||||||
*(*str-2) == '\\' && *(*str-1) == 'N') {
|
ch = *s++;
|
||||||
in_named_escape = 1;
|
if (ch == 'N') {
|
||||||
} else if (in_named_escape && ch == '}') {
|
if (s < end && *s++ == '{') {
|
||||||
in_named_escape = 0;
|
while (s < end && *s++ != '}') {
|
||||||
} else if (ch == '{' || ch == '}') {
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ch == '{' || ch == '}') {
|
||||||
/* Check for doubled braces, but only at the top level. If
|
/* Check for doubled braces, but only at the top level. If
|
||||||
we checked at every level, then f'{0:{3}}' would fail
|
we checked at every level, then f'{0:{3}}' would fail
|
||||||
with the two closing braces. */
|
with the two closing braces. */
|
||||||
if (recurse_lvl == 0) {
|
if (recurse_lvl == 0) {
|
||||||
if (*str+1 < end && *(*str+1) == ch) {
|
if (s < end && *s == ch) {
|
||||||
/* We're going to tell the caller that the literal ends
|
/* We're going to tell the caller that the literal ends
|
||||||
here, but that they should continue scanning. But also
|
here, but that they should continue scanning. But also
|
||||||
skip over the second brace when we resume scanning. */
|
skip over the second brace when we resume scanning. */
|
||||||
literal_end = *str+1;
|
*str = s + 1;
|
||||||
*str += 2;
|
|
||||||
result = 1;
|
result = 1;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
@ -4383,6 +4392,7 @@ fstring_find_literal(const char **str, const char *end, int raw,
|
||||||
/* Where a single '{' is the start of a new expression, a
|
/* Where a single '{' is the start of a new expression, a
|
||||||
single '}' is not allowed. */
|
single '}' is not allowed. */
|
||||||
if (ch == '}') {
|
if (ch == '}') {
|
||||||
|
*str = s - 1;
|
||||||
ast_error(c, n, "f-string: single '}' is not allowed");
|
ast_error(c, n, "f-string: single '}' is not allowed");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
@ -4390,21 +4400,22 @@ fstring_find_literal(const char **str, const char *end, int raw,
|
||||||
/* We're either at a '{', which means we're starting another
|
/* We're either at a '{', which means we're starting another
|
||||||
expression; or a '}', which means we're at the end of this
|
expression; or a '}', which means we're at the end of this
|
||||||
f-string (for a nested format_spec). */
|
f-string (for a nested format_spec). */
|
||||||
|
s--;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
literal_end = *str;
|
*str = s;
|
||||||
assert(*str <= end);
|
assert(s <= end);
|
||||||
assert(*str == end || **str == '{' || **str == '}');
|
assert(s == end || *s == '{' || *s == '}');
|
||||||
done:
|
done:
|
||||||
if (literal_start != literal_end) {
|
if (literal_start != s) {
|
||||||
if (raw)
|
if (raw)
|
||||||
*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
|
*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
|
||||||
literal_end-literal_start,
|
s - literal_start,
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
else
|
else
|
||||||
*literal = decode_unicode_with_escapes(c, n, literal_start,
|
*literal = decode_unicode_with_escapes(c, n, literal_start,
|
||||||
literal_end-literal_start);
|
s - literal_start);
|
||||||
if (!*literal)
|
if (!*literal)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue