[3.12] gh-120343: Do not reset byte_col_offset_diff after multiline tokens (GH-120352) (#120356)

(cherry picked from commit 1b62bcee94) Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
2025-09-23 08:53:45 +00:00 · 2024-06-11 19:22:16 +02:00 · 2024-06-11 19:22:16 +02:00 · 0315fdc24d
commit 0315fdc24d
parent 92e1c136b5
3 changed files with 18 additions and 1 deletions
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -1204,6 +1204,17 @@ async def f():
    NAME       'x'           (1, 3) (1, 4)
    """)
    def test_multiline_non_ascii_fstring(self):
        self.check_tokenize("""\
 a = f'''
    Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli'''""", """\
    NAME       'a'           (1, 0) (1, 1)
    OP         '='           (1, 2) (1, 3)
    FSTRING_START "f\'\'\'"        (1, 4) (1, 8)
    FSTRING_MIDDLE '\\n    Autorzy, którzy tą jednostkę mają wpisani jako AKTUALNA -- czyli' (1, 8) (2, 68)
    FSTRING_END "\'\'\'"         (2, 68) (2, 71)
    """)
 class GenerateTokensTest(TokenizeTest):
    def check_tokenize(self, s, expected):
        # Format the tokens in s in a table format.
--- a/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst
+++ b/Misc/NEWS.d/next/Library/2024-06-11-16-34-41.gh-issue-120343.hdiXeU.rst
@ -0,0 +1 @@
 Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@ -35,6 +35,7 @@ typedef struct
    /* Needed to cache line for performance */
    PyObject *last_line;
    Py_ssize_t last_lineno;
    Py_ssize_t last_end_lineno;
    Py_ssize_t byte_col_offset_diff;
 } tokenizeriterobject;
@ -76,6 +77,7 @@ tokenizeriter_new_impl(PyTypeObject *type, PyObject *readline,
    self->last_line = NULL;
    self->byte_col_offset_diff = 0;
    self->last_lineno = 0;
    self->last_end_lineno = 0;
    return (PyObject *)self;
 }
@ -226,7 +228,9 @@ tokenizeriter_next(tokenizeriterobject *it)
            Py_XDECREF(it->last_line);
            line = PyUnicode_DecodeUTF8(line_start, size, "replace");
            it->last_line = line;
            if (it->tok->lineno != it->last_end_lineno) {
                it->byte_col_offset_diff = 0;
            }
        } else {
            // Line hasn't changed so we reuse the cached one.
            line = it->last_line;
@ -240,6 +244,7 @@ tokenizeriter_next(tokenizeriterobject *it)
    Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
    Py_ssize_t end_lineno = it->tok->lineno;
    it->last_lineno = lineno;
    it->last_end_lineno = end_lineno;
    Py_ssize_t col_offset = -1;
    Py_ssize_t end_col_offset = -1;
		`@ -0,0 +1 @@`
							Fix column offset reporting for tokens that come after multiline f-strings in the :mod:`tokenize` module.