mirror of
https://github.com/python/cpython.git
synced 2025-11-25 04:34:37 +00:00
gh-135148: Correctly handle f/t strings with comments and debug expressions (#135198)
This commit is contained in:
parent
e89923d366
commit
ef66fb597b
3 changed files with 83 additions and 20 deletions
|
|
@ -1651,6 +1651,18 @@ x = (
|
|||
self.assertEqual(f"{1+2 = # my comment
|
||||
}", '1+2 = \n 3')
|
||||
|
||||
self.assertEqual(f'{""" # booo
|
||||
"""=}', '""" # booo\n """=\' # booo\\n \'')
|
||||
|
||||
self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
|
||||
self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
|
||||
|
||||
self.assertEqual(f'{ # some comment goes here
|
||||
"""hello"""=}', ' \n """hello"""=\'hello\'')
|
||||
self.assertEqual(f'{"""# this is not a comment
|
||||
a""" # this is a comment
|
||||
}', '# this is not a comment\n a')
|
||||
|
||||
# These next lines contains tabs. Backslash escapes don't
|
||||
# work in f-strings.
|
||||
# patchcheck doesn't like these tabs. So the only way to test
|
||||
|
|
|
|||
|
|
@ -0,0 +1,3 @@
|
|||
Fixed a bug where f-string debug expressions (using =) would incorrectly
|
||||
strip out parts of strings containing escaped quotes and # characters. Patch
|
||||
by Pablo Galindo.
|
||||
|
|
@ -121,38 +121,88 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
|
|||
}
|
||||
PyObject *res = NULL;
|
||||
|
||||
// Check if there is a # character in the expression
|
||||
// Look for a # character outside of string literals
|
||||
int hash_detected = 0;
|
||||
int in_string = 0;
|
||||
char quote_char = 0;
|
||||
|
||||
for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
|
||||
if (tok_mode->last_expr_buffer[i] == '#') {
|
||||
char ch = tok_mode->last_expr_buffer[i];
|
||||
|
||||
// Skip escaped characters
|
||||
if (ch == '\\') {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle quotes
|
||||
if (ch == '"' || ch == '\'') {
|
||||
// The following if/else block works becase there is an off number
|
||||
// of quotes in STRING tokens and the lexer only ever reaches this
|
||||
// function with valid STRING tokens.
|
||||
// For example: """hello"""
|
||||
// First quote: in_string = 1
|
||||
// Second quote: in_string = 0
|
||||
// Third quote: in_string = 1
|
||||
if (!in_string) {
|
||||
in_string = 1;
|
||||
quote_char = ch;
|
||||
}
|
||||
else if (ch == quote_char) {
|
||||
in_string = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for # outside strings
|
||||
if (ch == '#' && !in_string) {
|
||||
hash_detected = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we found a # character in the expression, we need to handle comments
|
||||
if (hash_detected) {
|
||||
Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
|
||||
char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
|
||||
// Allocate buffer for processed result
|
||||
char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
|
||||
if (!result) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
Py_ssize_t i = 0;
|
||||
Py_ssize_t j = 0;
|
||||
Py_ssize_t i = 0; // Input position
|
||||
Py_ssize_t j = 0; // Output position
|
||||
in_string = 0; // Whether we're in a string
|
||||
quote_char = 0; // Current string quote char
|
||||
|
||||
for (i = 0, j = 0; i < input_length; i++) {
|
||||
if (tok_mode->last_expr_buffer[i] == '#') {
|
||||
// Skip characters until newline or end of string
|
||||
while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
|
||||
if (tok_mode->last_expr_buffer[i] == '\n') {
|
||||
result[j++] = tok_mode->last_expr_buffer[i];
|
||||
break;
|
||||
}
|
||||
// Process each character
|
||||
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
|
||||
char ch = tok_mode->last_expr_buffer[i];
|
||||
|
||||
// Handle string quotes
|
||||
if (ch == '"' || ch == '\'') {
|
||||
// See comment above to understand this part
|
||||
if (!in_string) {
|
||||
in_string = 1;
|
||||
quote_char = ch;
|
||||
} else if (ch == quote_char) {
|
||||
in_string = 0;
|
||||
}
|
||||
result[j++] = ch;
|
||||
}
|
||||
// Skip comments
|
||||
else if (ch == '#' && !in_string) {
|
||||
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
|
||||
tok_mode->last_expr_buffer[i] != '\n') {
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
result[j++] = tok_mode->last_expr_buffer[i];
|
||||
if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
|
||||
result[j++] = '\n';
|
||||
}
|
||||
}
|
||||
// Copy other chars
|
||||
else {
|
||||
result[j++] = ch;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
result[j] = '\0'; // Null-terminate the result string
|
||||
|
|
@ -164,11 +214,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
|
|||
tok_mode->last_expr_size - tok_mode->last_expr_end,
|
||||
NULL
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (!res) {
|
||||
if (!res) {
|
||||
return -1;
|
||||
}
|
||||
token->metadata = res;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue