bpo-40958: Avoid buffer overflow in the parser when indexing the current line (GH-20875) (GH-20919)

(cherry picked from commit 51c5896b62) Co-authored-by: Pablo Galindo <Pablogsal@gmail.com>
2025-09-26 10:19:53 +00:00 · 2020-06-16 10:36:59 -07:00 · 2020-06-16 10:36:59 -07:00 · 7795ae8f05
commit 7795ae8f05
parent 3cf809475a
4 changed files with 16 additions and 16 deletions
--- a/Lib/test/test_peg_parser.py
+++ b/Lib/test/test_peg_parser.py
@ -669,7 +669,7 @@ FSTRINGS_TRACEBACKS = {
            {a$b}
            '''
        """,
-        '(a$b)',
+        '(a$b)\n',
    ),
    'multiline_fstring_brace_on_next_line': (
        """
@ -677,7 +677,7 @@ FSTRINGS_TRACEBACKS = {
            {a$b
            }'''
        """,
-        '(a$b',
+        '(a$b\n',
    ),
    'multiline_fstring_brace_on_previous_line': (
        """
@ -685,7 +685,7 @@ FSTRINGS_TRACEBACKS = {
            {
            a$b}'''
        """,
-        'a$b)',
+        'a$b)\n',
    ),
 }
--- a/Builtins/2020-06-15-01-20-44.bpo-40958.7O2Wh1.rst
+++ b/Builtins/2020-06-15-01-20-44.bpo-40958.7O2Wh1.rst
@ -0,0 +1,2 @@
 Fix a possible buffer overflow in the PEG parser when gathering information
 for emitting syntax errors. Patch by Pablo Galindo.
--- a/Parser/pegen/pegen.c
+++ b/Parser/pegen/pegen.c
@ -140,21 +140,18 @@ _create_dummy_identifier(Parser *p)
 }
 static inline Py_ssize_t
-byte_offset_to_character_offset(PyObject *line, int col_offset)
+byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
 {
    const char *str = PyUnicode_AsUTF8(line);
    if (!str) {
        return 0;
    }
    assert(col_offset >= 0 && (unsigned long)col_offset <= strlen(str));
    PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, "replace");
    if (!text) {
        return 0;
    }
    Py_ssize_t size = PyUnicode_GET_LENGTH(text);
    str = PyUnicode_AsUTF8(text);
    if (str != NULL && (int)strlen(str) == col_offset) {
        size = strlen(str);
    }
    Py_DECREF(text);
    return size;
 }
@ -366,7 +363,7 @@ void *
 _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
 {
    Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
-    int col_offset;
+    Py_ssize_t col_offset;
    if (t->col_offset == -1) {
        col_offset = Py_SAFE_DOWNCAST(p->tok->cur - p->tok->buf,
                                      intptr_t, int);
@ -386,7 +383,7 @@ _PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
 void *
 _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
-                                    int lineno, int col_offset,
+                                    Py_ssize_t lineno, Py_ssize_t col_offset,
                                    const char *errmsg, va_list va)
 {
    PyObject *value = NULL;
@ -406,16 +403,17 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
    if (!error_line) {
        Py_ssize_t size = p->tok->inp - p->tok->buf;
        if (size && p->tok->buf[size-1] == '\n') {
            size--;
        }
        error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
        if (!error_line) {
            goto error;
        }
    }
-    Py_ssize_t col_number = byte_offset_to_character_offset(error_line, col_offset);
+    Py_ssize_t col_number = col_offset;
    if (p->tok->encoding != NULL) {
        col_number = byte_offset_to_character_offset(error_line, col_offset);
    }
    tmp = Py_BuildValue("(OiiN)", p->tok->filename, lineno, col_number, error_line);
    if (!tmp) {
--- a/Parser/pegen/pegen.h
+++ b/Parser/pegen/pegen.h
@ -34,7 +34,7 @@ typedef struct _memo {
 typedef struct {
    int type;
    PyObject *bytes;
-    int lineno, col_offset, end_lineno, end_col_offset;
+    Py_ssize_t lineno, col_offset, end_lineno, end_col_offset;
    Memo *memo;
 } Token;
@ -132,7 +132,7 @@ void *_PyPegen_string_token(Parser *p);
 const char *_PyPegen_get_expr_name(expr_ty);
 void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
 void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
-                                          int lineno, int col_offset,
+                                          Py_ssize_t lineno, Py_ssize_t col_offset,
                                          const char *errmsg, va_list va);
 void *_PyPegen_dummy_name(Parser *p, ...);
		`@ -0,0 +1,2 @@`
							`Fix a possible buffer overflow in the PEG parser when gathering information`
							`for emitting syntax errors. Patch by Pablo Galindo.`