mirror of
https://github.com/python/cpython.git
synced 2025-07-09 20:35:26 +00:00

We only statically initialize for core code and builtin modules. Extension modules still create the tuple at runtime. We'll solve that part of interpreter isolation separately. This change includes generated code. The non-generated changes are in: * Tools/clinic/clinic.py * Python/getargs.c * Include/cpython/modsupport.h * Makefile.pre.in (re-generate global strings after running clinic) * very minor tweaks to Modules/_codecsmodule.c and Python/Python-tokenize.c All other changes are generated code (clinic, global strings).
195 lines
5 KiB
C
195 lines
5 KiB
C
#include "Python.h"
|
|
#include "../Parser/tokenizer.h"
|
|
|
|
static struct PyModuleDef _tokenizemodule;
|
|
|
|
typedef struct {
|
|
PyTypeObject *TokenizerIter;
|
|
} tokenize_state;
|
|
|
|
static tokenize_state *
|
|
get_tokenize_state(PyObject *module) {
|
|
return (tokenize_state *)PyModule_GetState(module);
|
|
}
|
|
|
|
#define _tokenize_get_state_by_type(type) \
|
|
get_tokenize_state(PyType_GetModuleByDef(type, &_tokenizemodule))
|
|
|
|
#include "pycore_runtime.h"
|
|
#include "clinic/Python-tokenize.c.h"
|
|
|
|
/*[clinic input]
|
|
module _tokenizer
|
|
class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter"
|
|
[clinic start generated code]*/
|
|
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/
|
|
|
|
typedef struct
|
|
{
|
|
PyObject_HEAD struct tok_state *tok;
|
|
} tokenizeriterobject;
|
|
|
|
/*[clinic input]
|
|
@classmethod
|
|
_tokenizer.tokenizeriter.__new__ as tokenizeriter_new
|
|
|
|
source: str
|
|
[clinic start generated code]*/
|
|
|
|
static PyObject *
|
|
tokenizeriter_new_impl(PyTypeObject *type, const char *source)
|
|
/*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/
|
|
{
|
|
tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0);
|
|
if (self == NULL) {
|
|
return NULL;
|
|
}
|
|
PyObject *filename = PyUnicode_FromString("<string>");
|
|
if (filename == NULL) {
|
|
return NULL;
|
|
}
|
|
self->tok = _PyTokenizer_FromUTF8(source, 1);
|
|
if (self->tok == NULL) {
|
|
Py_DECREF(filename);
|
|
return NULL;
|
|
}
|
|
self->tok->filename = filename;
|
|
return (PyObject *)self;
|
|
}
|
|
|
|
static PyObject *
|
|
tokenizeriter_next(tokenizeriterobject *it)
|
|
{
|
|
const char *start;
|
|
const char *end;
|
|
int type = _PyTokenizer_Get(it->tok, &start, &end);
|
|
if (type == ERRORTOKEN && PyErr_Occurred()) {
|
|
return NULL;
|
|
}
|
|
if (type == ERRORTOKEN || type == ENDMARKER) {
|
|
PyErr_SetString(PyExc_StopIteration, "EOF");
|
|
return NULL;
|
|
}
|
|
PyObject *str = NULL;
|
|
if (start == NULL || end == NULL) {
|
|
str = PyUnicode_FromString("");
|
|
}
|
|
else {
|
|
str = PyUnicode_FromStringAndSize(start, end - start);
|
|
}
|
|
if (str == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
Py_ssize_t size = it->tok->inp - it->tok->buf;
|
|
PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
|
|
if (line == NULL) {
|
|
Py_DECREF(str);
|
|
return NULL;
|
|
}
|
|
const char *line_start = type == STRING ? it->tok->multi_line_start : it->tok->line_start;
|
|
int lineno = type == STRING ? it->tok->first_lineno : it->tok->lineno;
|
|
int end_lineno = it->tok->lineno;
|
|
int col_offset = -1;
|
|
int end_col_offset = -1;
|
|
if (start != NULL && start >= line_start) {
|
|
col_offset = (int)(start - line_start);
|
|
}
|
|
if (end != NULL && end >= it->tok->line_start) {
|
|
end_col_offset = (int)(end - it->tok->line_start);
|
|
}
|
|
|
|
return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line);
|
|
}
|
|
|
|
static void
|
|
tokenizeriter_dealloc(tokenizeriterobject *it)
|
|
{
|
|
PyTypeObject *tp = Py_TYPE(it);
|
|
_PyTokenizer_Free(it->tok);
|
|
tp->tp_free(it);
|
|
Py_DECREF(tp);
|
|
}
|
|
|
|
static PyType_Slot tokenizeriter_slots[] = {
|
|
{Py_tp_new, tokenizeriter_new},
|
|
{Py_tp_dealloc, tokenizeriter_dealloc},
|
|
{Py_tp_getattro, PyObject_GenericGetAttr},
|
|
{Py_tp_iter, PyObject_SelfIter},
|
|
{Py_tp_iternext, tokenizeriter_next},
|
|
{0, NULL},
|
|
};
|
|
|
|
static PyType_Spec tokenizeriter_spec = {
|
|
.name = "_tokenize.TokenizerIter",
|
|
.basicsize = sizeof(tokenizeriterobject),
|
|
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
|
|
.slots = tokenizeriter_slots,
|
|
};
|
|
|
|
static int
|
|
tokenizemodule_exec(PyObject *m)
|
|
{
|
|
tokenize_state *state = get_tokenize_state(m);
|
|
if (state == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(m, &tokenizeriter_spec, NULL);
|
|
if (state->TokenizerIter == NULL) {
|
|
return -1;
|
|
}
|
|
if (PyModule_AddType(m, state->TokenizerIter) < 0) {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static PyMethodDef tokenize_methods[] = {
|
|
{NULL, NULL, 0, NULL} /* Sentinel */
|
|
};
|
|
|
|
static PyModuleDef_Slot tokenizemodule_slots[] = {
|
|
{Py_mod_exec, tokenizemodule_exec},
|
|
{0, NULL}
|
|
};
|
|
|
|
static int
|
|
tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg)
|
|
{
|
|
tokenize_state *state = get_tokenize_state(m);
|
|
Py_VISIT(state->TokenizerIter);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
tokenizemodule_clear(PyObject *m)
|
|
{
|
|
tokenize_state *state = get_tokenize_state(m);
|
|
Py_CLEAR(state->TokenizerIter);
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
tokenizemodule_free(void *m)
|
|
{
|
|
tokenizemodule_clear((PyObject *)m);
|
|
}
|
|
|
|
static struct PyModuleDef _tokenizemodule = {
|
|
PyModuleDef_HEAD_INIT,
|
|
.m_name = "_tokenize",
|
|
.m_size = sizeof(tokenize_state),
|
|
.m_slots = tokenizemodule_slots,
|
|
.m_methods = tokenize_methods,
|
|
.m_traverse = tokenizemodule_traverse,
|
|
.m_clear = tokenizemodule_clear,
|
|
.m_free = tokenizemodule_free,
|
|
};
|
|
|
|
PyMODINIT_FUNC
|
|
PyInit__tokenize(void)
|
|
{
|
|
return PyModuleDef_Init(&_tokenizemodule);
|
|
}
|