gh-135148: Correctly handle f/t strings with comments and debug expressions (#135198)

This commit is contained in:
Pablo Galindo Salgado 2025-07-16 11:47:13 +02:00 committed by GitHub
parent e89923d366
commit ef66fb597b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 83 additions and 20 deletions

View file

@ -1651,6 +1651,18 @@ x = (
self.assertEqual(f"{1+2 = # my comment
}", '1+2 = \n 3')
self.assertEqual(f'{""" # booo
"""=}', '""" # booo\n """=\' # booo\\n \'')
self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
self.assertEqual(f'{ # some comment goes here
"""hello"""=}', ' \n """hello"""=\'hello\'')
self.assertEqual(f'{"""# this is not a comment
a""" # this is a comment
}', '# this is not a comment\n a')
# These next lines contains tabs. Backslash escapes don't
# work in f-strings.
# patchcheck doesn't like these tabs. So the only way to test

View file

@ -0,0 +1,3 @@
Fixed a bug where f-string debug expressions (using =) would incorrectly
strip out parts of strings containing escaped quotes and # characters. Patch
by Pablo Galindo.

View file

@ -121,38 +121,88 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
}
PyObject *res = NULL;
// Check if there is a # character in the expression
// Look for a # character outside of string literals
int hash_detected = 0;
int in_string = 0;
char quote_char = 0;
for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
if (tok_mode->last_expr_buffer[i] == '#') {
char ch = tok_mode->last_expr_buffer[i];
// Skip escaped characters
if (ch == '\\') {
i++;
continue;
}
// Handle quotes
if (ch == '"' || ch == '\'') {
// The following if/else block works becase there is an off number
// of quotes in STRING tokens and the lexer only ever reaches this
// function with valid STRING tokens.
// For example: """hello"""
// First quote: in_string = 1
// Second quote: in_string = 0
// Third quote: in_string = 1
if (!in_string) {
in_string = 1;
quote_char = ch;
}
else if (ch == quote_char) {
in_string = 0;
}
continue;
}
// Check for # outside strings
if (ch == '#' && !in_string) {
hash_detected = 1;
break;
}
}
// If we found a # character in the expression, we need to handle comments
if (hash_detected) {
Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
// Allocate buffer for processed result
char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
if (!result) {
return -1;
}
Py_ssize_t i = 0;
Py_ssize_t j = 0;
Py_ssize_t i = 0; // Input position
Py_ssize_t j = 0; // Output position
in_string = 0; // Whether we're in a string
quote_char = 0; // Current string quote char
for (i = 0, j = 0; i < input_length; i++) {
if (tok_mode->last_expr_buffer[i] == '#') {
// Skip characters until newline or end of string
while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
if (tok_mode->last_expr_buffer[i] == '\n') {
result[j++] = tok_mode->last_expr_buffer[i];
break;
}
// Process each character
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
char ch = tok_mode->last_expr_buffer[i];
// Handle string quotes
if (ch == '"' || ch == '\'') {
// See comment above to understand this part
if (!in_string) {
in_string = 1;
quote_char = ch;
} else if (ch == quote_char) {
in_string = 0;
}
result[j++] = ch;
}
// Skip comments
else if (ch == '#' && !in_string) {
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
tok_mode->last_expr_buffer[i] != '\n') {
i++;
}
} else {
result[j++] = tok_mode->last_expr_buffer[i];
if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
result[j++] = '\n';
}
}
// Copy other chars
else {
result[j++] = ch;
}
i++;
}
result[j] = '\0'; // Null-terminate the result string
@ -164,11 +214,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
tok_mode->last_expr_size - tok_mode->last_expr_end,
NULL
);
}
if (!res) {
if (!res) {
return -1;
}
token->metadata = res;