[3.12] gh-121130: Fix f-string format specifiers with debug expressions (GH-121150) (#122063)

This commit is contained in:
Pablo Galindo Salgado 2024-07-20 19:05:01 +02:00 committed by GitHub
parent ca531e4326
commit a9daa4fd04
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 8569 additions and 5795 deletions

View file

@ -302,9 +302,7 @@ Literals
Name(id='a', ctx=Load())], Name(id='a', ctx=Load())],
keywords=[]), keywords=[]),
conversion=-1, conversion=-1,
format_spec=JoinedStr( format_spec=Constant(value='.3'))]))
values=[
Constant(value='.3')]))]))
.. class:: List(elts, ctx) .. class:: List(elts, ctx)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,2 @@
Fix f-strings with debug expressions in format specifiers. Patch by Pablo
Galindo

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -14,133 +14,136 @@ extern "C" {
#define MAXLEVEL 200 /* Max parentheses level */ #define MAXLEVEL 200 /* Max parentheses level */
#define MAXFSTRINGLEVEL 150 /* Max f-string nesting level */ #define MAXFSTRINGLEVEL 150 /* Max f-string nesting level */
enum decoding_state { enum decoding_state { STATE_INIT, STATE_SEEK_CODING, STATE_NORMAL };
STATE_INIT,
STATE_SEEK_CODING,
STATE_NORMAL
};
enum interactive_underflow_t { enum interactive_underflow_t {
/* Normal mode of operation: return a new token when asked in interactive mode */ /* Normal mode of operation: return a new token when asked in interactive mode
IUNDERFLOW_NORMAL, */
/* Forcefully return ENDMARKER when asked for a new token in interactive mode. This IUNDERFLOW_NORMAL,
* can be used to prevent the tokenizer to prompt the user for new tokens */ /* Forcefully return ENDMARKER when asked for a new token in interactive mode.
IUNDERFLOW_STOP, * This can be used to prevent the tokenizer to prompt the user for new tokens
*/
IUNDERFLOW_STOP,
}; };
struct token { struct token {
int level; int level;
int lineno, col_offset, end_lineno, end_col_offset; int lineno, col_offset, end_lineno, end_col_offset;
const char *start, *end; const char *start, *end;
PyObject *metadata; PyObject *metadata;
}; };
enum tokenizer_mode_kind_t { enum tokenizer_mode_kind_t {
TOK_REGULAR_MODE, TOK_REGULAR_MODE,
TOK_FSTRING_MODE, TOK_FSTRING_MODE,
}; };
#define MAX_EXPR_NESTING 3 #define MAX_EXPR_NESTING 3
typedef struct _tokenizer_mode { typedef struct _tokenizer_mode {
enum tokenizer_mode_kind_t kind; enum tokenizer_mode_kind_t kind;
int curly_bracket_depth; int curly_bracket_depth;
int curly_bracket_expr_start_depth; int curly_bracket_expr_start_depth;
char f_string_quote; char f_string_quote;
int f_string_quote_size; int f_string_quote_size;
int f_string_raw; int f_string_raw;
const char* f_string_start; const char *f_string_start;
const char* f_string_multi_line_start; const char *f_string_multi_line_start;
int f_string_line_start; int f_string_line_start;
Py_ssize_t f_string_start_offset; Py_ssize_t f_string_start_offset;
Py_ssize_t f_string_multi_line_start_offset; Py_ssize_t f_string_multi_line_start_offset;
Py_ssize_t last_expr_size; Py_ssize_t last_expr_size;
Py_ssize_t last_expr_end; Py_ssize_t last_expr_end;
char* last_expr_buffer; char *last_expr_buffer;
int f_string_debug; int f_string_debug;
int in_format_spec;
} tokenizer_mode; } tokenizer_mode;
/* Tokenizer state */ /* Tokenizer state */
struct tok_state { struct tok_state {
/* Input state; buf <= cur <= inp <= end */ /* Input state; buf <= cur <= inp <= end */
/* NB an entire line is held in the buffer */ /* NB an entire line is held in the buffer */
char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL or readline != NULL */ char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL or readline !=
char *cur; /* Next character in buffer */ NULL */
char *inp; /* End of data in buffer */ char *cur; /* Next character in buffer */
int fp_interactive; /* If the file descriptor is interactive */ char *inp; /* End of data in buffer */
char *interactive_src_start; /* The start of the source parsed so far in interactive mode */ int fp_interactive; /* If the file descriptor is interactive */
char *interactive_src_end; /* The end of the source parsed so far in interactive mode */ char *interactive_src_start; /* The start of the source parsed so far in
const char *end; /* End of input buffer if buf != NULL */ interactive mode */
const char *start; /* Start of current token if not NULL */ char *interactive_src_end; /* The end of the source parsed so far in
int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ interactive mode */
/* NB If done != E_OK, cur must be == inp!!! */ const char *end; /* End of input buffer if buf != NULL */
FILE *fp; /* Rest of input; NULL if tokenizing a string */ const char *start; /* Start of current token if not NULL */
int tabsize; /* Tab spacing */ int done; /* E_OK normally, E_EOF at EOF, otherwise error code */
int indent; /* Current indentation index */ /* NB If done != E_OK, cur must be == inp!!! */
int indstack[MAXINDENT]; /* Stack of indents */ FILE *fp; /* Rest of input; NULL if tokenizing a string */
int atbol; /* Nonzero if at begin of new line */ int tabsize; /* Tab spacing */
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ int indent; /* Current indentation index */
const char *prompt, *nextprompt; /* For interactive prompting */ int indstack[MAXINDENT]; /* Stack of indents */
int lineno; /* Current line number */ int atbol; /* Nonzero if at begin of new line */
int first_lineno; /* First line of a single line or multi line string int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
expression (cf. issue 16806) */ const char *prompt, *nextprompt; /* For interactive prompting */
int starting_col_offset; /* The column offset at the beginning of a token */ int lineno; /* Current line number */
int col_offset; /* Current col offset */ int first_lineno; /* First line of a single line or multi line string
int level; /* () [] {} Parentheses nesting level */ expression (cf. issue 16806) */
/* Used to allow free continuations inside them */ int starting_col_offset; /* The column offset at the beginning of a token */
char parenstack[MAXLEVEL]; int col_offset; /* Current col offset */
int parenlinenostack[MAXLEVEL]; int level; /* () [] {} Parentheses nesting level */
int parencolstack[MAXLEVEL]; /* Used to allow free continuations inside them */
PyObject *filename; char parenstack[MAXLEVEL];
/* Stuff for checking on different tab sizes */ int parenlinenostack[MAXLEVEL];
int altindstack[MAXINDENT]; /* Stack of alternate indents */ int parencolstack[MAXLEVEL];
/* Stuff for PEP 0263 */ PyObject *filename;
enum decoding_state decoding_state; /* Stuff for checking on different tab sizes */
int decoding_erred; /* whether erred in decoding */ int altindstack[MAXINDENT]; /* Stack of alternate indents */
char *encoding; /* Source encoding. */ /* Stuff for PEP 0263 */
int cont_line; /* whether we are in a continuation line. */ enum decoding_state decoding_state;
const char* line_start; /* pointer to start of current line */ int decoding_erred; /* whether erred in decoding */
const char* multi_line_start; /* pointer to start of first line of char *encoding; /* Source encoding. */
a single line or multi line string int cont_line; /* whether we are in a continuation line. */
expression (cf. issue 16806) */ const char *line_start; /* pointer to start of current line */
PyObject *decoding_readline; /* open(...).readline */ const char *multi_line_start; /* pointer to start of first line of
PyObject *decoding_buffer; a single line or multi line string
PyObject *readline; /* readline() function */ expression (cf. issue 16806) */
const char* enc; /* Encoding for the current str. */ PyObject *decoding_readline; /* open(...).readline */
char* str; /* Source string being tokenized (if tokenizing from a string)*/ PyObject *decoding_buffer;
char* input; /* Tokenizer's newline translated copy of the string. */ PyObject *readline; /* readline() function */
const char *enc; /* Encoding for the current str. */
char *str; /* Source string being tokenized (if tokenizing from a string)*/
char *input; /* Tokenizer's newline translated copy of the string. */
int type_comments; /* Whether to look for type comments */ int type_comments; /* Whether to look for type comments */
/* async/await related fields (still needed depending on feature_version) */ /* async/await related fields (still needed depending on feature_version) */
int async_hacks; /* =1 if async/await aren't always keywords */ int async_hacks; /* =1 if async/await aren't always keywords */
int async_def; /* =1 if tokens are inside an 'async def' body. */ int async_def; /* =1 if tokens are inside an 'async def' body. */
int async_def_indent; /* Indentation level of the outermost 'async def'. */ int async_def_indent; /* Indentation level of the outermost 'async def'. */
int async_def_nl; /* =1 if the outermost 'async def' had at least one int async_def_nl; /* =1 if the outermost 'async def' had at least one
NEWLINE token after it. */ NEWLINE token after it. */
/* How to proceed when asked for a new token in interactive mode */ /* How to proceed when asked for a new token in interactive mode */
enum interactive_underflow_t interactive_underflow; enum interactive_underflow_t interactive_underflow;
int report_warnings; int report_warnings;
// TODO: Factor this into its own thing // TODO: Factor this into its own thing
tokenizer_mode tok_mode_stack[MAXFSTRINGLEVEL]; tokenizer_mode tok_mode_stack[MAXFSTRINGLEVEL];
int tok_mode_stack_index; int tok_mode_stack_index;
int tok_extra_tokens; int tok_extra_tokens;
int comment_newline; int comment_newline;
int implicit_newline; int implicit_newline;
#ifdef Py_DEBUG #ifdef Py_DEBUG
int debug; int debug;
#endif #endif
}; };
extern struct tok_state *_PyTokenizer_FromString(const char *, int, int); extern struct tok_state *_PyTokenizer_FromString(const char *, int, int);
extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int, int); extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int, int);
extern struct tok_state *_PyTokenizer_FromReadline(PyObject*, const char*, int, int); extern struct tok_state *_PyTokenizer_FromReadline(PyObject *, const char *,
extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*, int, int);
const char *, const char *); extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char *,
const char *, const char *);
extern void _PyTokenizer_Free(struct tok_state *); extern void _PyTokenizer_Free(struct tok_state *);
extern void _PyToken_Free(struct token *); extern void _PyToken_Free(struct token *);
extern void _PyToken_Init(struct token *); extern void _PyToken_Init(struct token *);