mirror of
https://github.com/python/cpython.git
synced 2025-08-02 08:02:56 +00:00
streamline normalizer identification a bit
This commit is contained in:
parent
ea470af71a
commit
55e0043a51
1 changed files with 56 additions and 26 deletions
82
Python/ast.c
82
Python/ast.c
|
@ -500,6 +500,8 @@ struct compiling {
|
||||||
char *c_encoding; /* source encoding */
|
char *c_encoding; /* source encoding */
|
||||||
PyArena *c_arena; /* arena for allocating memeory */
|
PyArena *c_arena; /* arena for allocating memeory */
|
||||||
const char *c_filename; /* filename */
|
const char *c_filename; /* filename */
|
||||||
|
PyObject *c_normalize; /* Normalization function from unicodedata. */
|
||||||
|
PyObject *c_normalize_args; /* Normalization argument tuple. */
|
||||||
};
|
};
|
||||||
|
|
||||||
static asdl_seq *seq_for_testlist(struct compiling *, const node *);
|
static asdl_seq *seq_for_testlist(struct compiling *, const node *);
|
||||||
|
@ -527,36 +529,54 @@ static PyObject *parsestrplus(struct compiling *, const node *n,
|
||||||
#define COMP_LISTCOMP 1
|
#define COMP_LISTCOMP 1
|
||||||
#define COMP_SETCOMP 2
|
#define COMP_SETCOMP 2
|
||||||
|
|
||||||
static identifier
|
static int
|
||||||
new_identifier(const char* n, PyArena *arena)
|
init_normalization(struct compiling *c)
|
||||||
|
{
|
||||||
|
PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
|
||||||
|
if (!m)
|
||||||
|
return 0;
|
||||||
|
c->c_normalize = PyObject_GetAttrString(m, "normalize");
|
||||||
|
Py_DECREF(m);
|
||||||
|
if (!c->c_normalize)
|
||||||
|
return 0;
|
||||||
|
c->c_normalize_args = Py_BuildValue("(sN)", "NFKC", Py_None);
|
||||||
|
PyTuple_SET_ITEM(c->c_normalize_args, 1, NULL);
|
||||||
|
if (!c->c_normalize_args) {
|
||||||
|
Py_CLEAR(c->c_normalize);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static identifier
|
||||||
|
new_identifier(const char* n, struct compiling *c)
|
||||||
{
|
{
|
||||||
_Py_IDENTIFIER(normalize);
|
|
||||||
PyObject* id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
|
PyObject* id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
|
||||||
if (!id)
|
if (!id)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
/* PyUnicode_DecodeUTF8 should always return a ready string. */
|
||||||
assert(PyUnicode_IS_READY(id));
|
assert(PyUnicode_IS_READY(id));
|
||||||
/* Check whether there are non-ASCII characters in the
|
/* Check whether there are non-ASCII characters in the
|
||||||
identifier; if so, normalize to NFKC. */
|
identifier; if so, normalize to NFKC. */
|
||||||
if (!PyUnicode_IS_ASCII(id)) {
|
if (!PyUnicode_IS_ASCII(id)) {
|
||||||
PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
|
|
||||||
PyObject *id2;
|
PyObject *id2;
|
||||||
if (!m) {
|
if (!c->c_normalize && !init_normalization(c)) {
|
||||||
Py_DECREF(id);
|
Py_DECREF(id);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
id2 = _PyObject_CallMethodId(m, &PyId_normalize, "sO", "NFKC", id);
|
PyTuple_SET_ITEM(c->c_normalize_args, 1, id);
|
||||||
Py_DECREF(m);
|
id2 = PyObject_Call(c->c_normalize, c->c_normalize_args, NULL);
|
||||||
Py_DECREF(id);
|
Py_DECREF(id);
|
||||||
if (!id2)
|
if (!id2)
|
||||||
return NULL;
|
return NULL;
|
||||||
id = id2;
|
id = id2;
|
||||||
}
|
}
|
||||||
PyUnicode_InternInPlace(&id);
|
PyUnicode_InternInPlace(&id);
|
||||||
PyArena_AddPyObject(arena, id);
|
PyArena_AddPyObject(c->c_arena, id);
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
|
#define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
|
||||||
|
|
||||||
/* This routine provides an invalid object for the syntax error.
|
/* This routine provides an invalid object for the syntax error.
|
||||||
The outermost routine must unpack this error and create the
|
The outermost routine must unpack this error and create the
|
||||||
|
@ -706,13 +726,14 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
|
||||||
stmt_ty s;
|
stmt_ty s;
|
||||||
node *ch;
|
node *ch;
|
||||||
struct compiling c;
|
struct compiling c;
|
||||||
|
mod_ty res = NULL;
|
||||||
|
|
||||||
if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
|
if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
|
||||||
c.c_encoding = "utf-8";
|
c.c_encoding = "utf-8";
|
||||||
if (TYPE(n) == encoding_decl) {
|
if (TYPE(n) == encoding_decl) {
|
||||||
#if 0
|
#if 0
|
||||||
ast_error(n, "encoding declaration in Unicode string");
|
ast_error(n, "encoding declaration in Unicode string");
|
||||||
goto error;
|
goto out;
|
||||||
#endif
|
#endif
|
||||||
n = CHILD(n, 0);
|
n = CHILD(n, 0);
|
||||||
}
|
}
|
||||||
|
@ -725,13 +746,14 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
|
||||||
}
|
}
|
||||||
c.c_arena = arena;
|
c.c_arena = arena;
|
||||||
c.c_filename = filename;
|
c.c_filename = filename;
|
||||||
|
c.c_normalize = c.c_normalize_args = NULL;
|
||||||
|
|
||||||
k = 0;
|
k = 0;
|
||||||
switch (TYPE(n)) {
|
switch (TYPE(n)) {
|
||||||
case file_input:
|
case file_input:
|
||||||
stmts = asdl_seq_new(num_stmts(n), arena);
|
stmts = asdl_seq_new(num_stmts(n), arena);
|
||||||
if (!stmts)
|
if (!stmts)
|
||||||
return NULL;
|
goto out;
|
||||||
for (i = 0; i < NCH(n) - 1; i++) {
|
for (i = 0; i < NCH(n) - 1; i++) {
|
||||||
ch = CHILD(n, i);
|
ch = CHILD(n, i);
|
||||||
if (TYPE(ch) == NEWLINE)
|
if (TYPE(ch) == NEWLINE)
|
||||||
|
@ -741,7 +763,7 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
|
||||||
if (num == 1) {
|
if (num == 1) {
|
||||||
s = ast_for_stmt(&c, ch);
|
s = ast_for_stmt(&c, ch);
|
||||||
if (!s)
|
if (!s)
|
||||||
goto error;
|
goto out;
|
||||||
asdl_seq_SET(stmts, k++, s);
|
asdl_seq_SET(stmts, k++, s);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -750,42 +772,44 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
|
||||||
for (j = 0; j < num; j++) {
|
for (j = 0; j < num; j++) {
|
||||||
s = ast_for_stmt(&c, CHILD(ch, j * 2));
|
s = ast_for_stmt(&c, CHILD(ch, j * 2));
|
||||||
if (!s)
|
if (!s)
|
||||||
goto error;
|
goto out;
|
||||||
asdl_seq_SET(stmts, k++, s);
|
asdl_seq_SET(stmts, k++, s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Module(stmts, arena);
|
res = Module(stmts, arena);
|
||||||
|
break;
|
||||||
case eval_input: {
|
case eval_input: {
|
||||||
expr_ty testlist_ast;
|
expr_ty testlist_ast;
|
||||||
|
|
||||||
/* XXX Why not comp_for here? */
|
/* XXX Why not comp_for here? */
|
||||||
testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
|
testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
|
||||||
if (!testlist_ast)
|
if (!testlist_ast)
|
||||||
goto error;
|
goto out;
|
||||||
return Expression(testlist_ast, arena);
|
res = Expression(testlist_ast, arena);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
case single_input:
|
case single_input:
|
||||||
if (TYPE(CHILD(n, 0)) == NEWLINE) {
|
if (TYPE(CHILD(n, 0)) == NEWLINE) {
|
||||||
stmts = asdl_seq_new(1, arena);
|
stmts = asdl_seq_new(1, arena);
|
||||||
if (!stmts)
|
if (!stmts)
|
||||||
goto error;
|
goto out;
|
||||||
asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
|
asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
|
||||||
arena));
|
arena));
|
||||||
if (!asdl_seq_GET(stmts, 0))
|
if (!asdl_seq_GET(stmts, 0))
|
||||||
goto error;
|
goto out;
|
||||||
return Interactive(stmts, arena);
|
res = Interactive(stmts, arena);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
n = CHILD(n, 0);
|
n = CHILD(n, 0);
|
||||||
num = num_stmts(n);
|
num = num_stmts(n);
|
||||||
stmts = asdl_seq_new(num, arena);
|
stmts = asdl_seq_new(num, arena);
|
||||||
if (!stmts)
|
if (!stmts)
|
||||||
goto error;
|
goto out;
|
||||||
if (num == 1) {
|
if (num == 1) {
|
||||||
s = ast_for_stmt(&c, n);
|
s = ast_for_stmt(&c, n);
|
||||||
if (!s)
|
if (!s)
|
||||||
goto error;
|
goto out;
|
||||||
asdl_seq_SET(stmts, 0, s);
|
asdl_seq_SET(stmts, 0, s);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -796,21 +820,27 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
|
||||||
break;
|
break;
|
||||||
s = ast_for_stmt(&c, CHILD(n, i));
|
s = ast_for_stmt(&c, CHILD(n, i));
|
||||||
if (!s)
|
if (!s)
|
||||||
goto error;
|
goto out;
|
||||||
asdl_seq_SET(stmts, i / 2, s);
|
asdl_seq_SET(stmts, i / 2, s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Interactive(stmts, arena);
|
res = Interactive(stmts, arena);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
PyErr_Format(PyExc_SystemError,
|
PyErr_Format(PyExc_SystemError,
|
||||||
"invalid node %d for PyAST_FromNode", TYPE(n));
|
"invalid node %d for PyAST_FromNode", TYPE(n));
|
||||||
goto error;
|
goto out;
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
if (c.c_normalize) {
|
||||||
|
Py_DECREF(c.c_normalize);
|
||||||
|
PyTuple_SET_ITEM(c.c_normalize_args, 1, NULL);
|
||||||
|
Py_DECREF(c.c_normalize_args);
|
||||||
}
|
}
|
||||||
error:
|
|
||||||
ast_error_finish(filename);
|
ast_error_finish(filename);
|
||||||
return NULL;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
|
/* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue