mirror of
https://github.com/python/cpython.git
synced 2025-10-23 23:22:11 +00:00

* The lexer, which include the actual lexeme producing logic, goes into the `lexer` directory. * The wrappers, one wrapper per input mode (file, string, utf-8, and readline), go into the `tokenizer` directory and include logic for creating a lexer instance and managing the buffer for different modes. --------- Co-authored-by: Pablo Galindo <pablogsal@gmail.com> Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
55 lines
1.3 KiB
C
55 lines
1.3 KiB
C
#include "Python.h"
|
|
#include "errcode.h"
|
|
|
|
#include "helpers.h"
|
|
#include "../lexer/state.h"
|
|
|
|
static int
|
|
tok_underflow_string(struct tok_state *tok) {
|
|
char *end = strchr(tok->inp, '\n');
|
|
if (end != NULL) {
|
|
end++;
|
|
}
|
|
else {
|
|
end = strchr(tok->inp, '\0');
|
|
if (end == tok->inp) {
|
|
tok->done = E_EOF;
|
|
return 0;
|
|
}
|
|
}
|
|
if (tok->start == NULL) {
|
|
tok->buf = tok->cur;
|
|
}
|
|
tok->line_start = tok->cur;
|
|
ADVANCE_LINENO();
|
|
tok->inp = end;
|
|
return 1;
|
|
}
|
|
|
|
/* Set up tokenizer for UTF-8 string */
|
|
struct tok_state *
|
|
_PyTokenizer_FromUTF8(const char *str, int exec_input, int preserve_crlf)
|
|
{
|
|
struct tok_state *tok = _PyTokenizer_tok_new();
|
|
char *translated;
|
|
if (tok == NULL)
|
|
return NULL;
|
|
tok->input = translated = _PyTokenizer_translate_newlines(str, exec_input, preserve_crlf, tok);
|
|
if (translated == NULL) {
|
|
_PyTokenizer_Free(tok);
|
|
return NULL;
|
|
}
|
|
tok->decoding_state = STATE_NORMAL;
|
|
tok->enc = NULL;
|
|
tok->str = translated;
|
|
tok->encoding = _PyTokenizer_new_string("utf-8", 5, tok);
|
|
if (!tok->encoding) {
|
|
_PyTokenizer_Free(tok);
|
|
return NULL;
|
|
}
|
|
|
|
tok->buf = tok->cur = tok->inp = translated;
|
|
tok->end = translated;
|
|
tok->underflow = &tok_underflow_string;
|
|
return tok;
|
|
}
|