bpo-45494: Fix parser crash when reporting errors involving invalid continuation characters (GH-28993)

There are two errors that this commit fixes: * The parser was not correctly computing the offset and the string source for E_LINECONT errors due to the incorrect usage of strtok(). * The parser was not correctly unwinding the call stack when a tokenizer exception happened in rules involving optionals ('?', [...]) as we always make them return valid results by using the comma operator. We need to check first if we don't have an error before continuing.
2025-11-02 03:01:58 +00:00 · 2021-10-19 20:24:12 +01:00 · 2021-10-19 20:24:12 +01:00 · a106343f63
commit a106343f63
parent bda69abe84
5 changed files with 137 additions and 123 deletions
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@ -350,10 +350,18 @@ tokenizer_error(Parser *p)
            errtype = PyExc_IndentationError;
            msg = "too many levels of indentation";
            break;
-        case E_LINECONT:
-            col_offset = strlen(strtok(p->tok->buf, "\n")) - 1;
+        case E_LINECONT: {
+            char* loc = strrchr(p->tok->buf, '\n');
+            const char* last_char = p->tok->cur - 1;
+            if (loc != NULL && loc != last_char) {
+                col_offset = p->tok->cur - loc - 1;
+                p->tok->buf = loc;
+            } else {
+                col_offset = last_char - p->tok->buf - 1;
+            }
            msg = "unexpected character after line continuation character";
            break;
+        }
        default:
            msg = "unknown parsing error";
    }