gh-104169: Refactor tokenizer into lexer and wrappers (#110684)

* The lexer, which include the actual lexeme producing logic, goes into the `lexer` directory. * The wrappers, one wrapper per input mode (file, string, utf-8, and readline), go into the `tokenizer` directory and include logic for creating a lexer instance and managing the buffer for different modes. --------- Co-authored-by: Pablo Galindo <pablogsal@gmail.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
2025-10-13 10:23:28 +00:00 · 2023-10-11 17:14:44 +02:00 · 2023-10-11 17:14:44 +02:00 · 01481f2dc1
commit 01481f2dc1
parent eb50cd37ea
29 changed files with 3185 additions and 2988 deletions
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@ -1,6 +1,8 @@
 #include "Python.h"
 #include "errcode.h"
-#include "../Parser/tokenizer.h"
+#include "../Parser/lexer/state.h"
+#include "../Parser/lexer/lexer.h"
+#include "../Parser/tokenizer/tokenizer.h"
 #include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()
 #include "../Parser/pegen.h"      // _PyPegen_byte_offset_to_character_offset()