gh-112943: Correctly compute end offsets for multiline tokens in the tokenize module (#112949)

This commit is contained in:
Pablo Galindo Salgado 2023-12-11 11:44:22 +00:00 committed by GitHub
parent 4c5b9c107a
commit a135a6d2c6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 25 additions and 6 deletions

View file

@ -19,12 +19,8 @@ _PyPegen_interactive_exit(Parser *p)
}
Py_ssize_t
_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
_PyPegen_byte_offset_to_character_offset_raw(const char* str, Py_ssize_t col_offset)
{
const char *str = PyUnicode_AsUTF8(line);
if (!str) {
return -1;
}
Py_ssize_t len = strlen(str);
if (col_offset > len + 1) {
col_offset = len + 1;
@ -39,6 +35,16 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
return size;
}
Py_ssize_t
_PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
{
const char *str = PyUnicode_AsUTF8(line);
if (!str) {
return -1;
}
return _PyPegen_byte_offset_to_character_offset_raw(str, col_offset);
}
// Here, mark is the start of the node, while p->mark is the end.
// If node==NULL, they should be the same.
int