gh-102856: Tokenize performance improvement (#104731)

This commit is contained in:
Marta Gómez Macías 2023-05-22 02:29:04 +02:00 committed by GitHub
parent 4b107d86f3
commit 8817886ae5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 13 deletions

View file

@ -207,7 +207,22 @@ tokenizeriter_next(tokenizeriterobject *it)
end_col_offset = _PyPegen_byte_offset_to_character_offset(line, token.end - it->tok->line_start);
}
result = Py_BuildValue("(NinnnnN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line);
if (it->tok->tok_extra_tokens) {
// Necessary adjustments to match the original Python tokenize
// implementation
if (type > DEDENT && type < OP) {
type = OP;
}
else if (type == ASYNC || type == AWAIT) {
type = NAME;
}
else if (type == NEWLINE) {
str = PyUnicode_FromString("\n");
end_col_offset++;
}
}
result = Py_BuildValue("(iN(nn)(nn)N)", type, str, lineno, col_offset, end_lineno, end_col_offset, line);
exit:
_PyToken_Free(&token);
return result;