bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree. (#12883)

* bpo-36673: Implement comment/PI parsing support for the TreeBuilder in ElementTree.

* bpo-36673: Rewrite the comment/PI factory handling for the TreeBuilder in "_elementtree" to make it use the same factories as the ElementTree module, and to make it explicit when the comments/PIs are inserted into the tree and when they are not (which is the default).
This commit is contained in:
Stefan Behnel 2019-05-01 21:20:38 +02:00 committed by GitHub
parent 3d37ea25dc
commit 43851a202c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 630 additions and 54 deletions

View file

@ -92,6 +92,8 @@ typedef struct {
PyObject *parseerror_obj;
PyObject *deepcopy_obj;
PyObject *elementpath_obj;
PyObject *comment_factory;
PyObject *pi_factory;
} elementtreestate;
static struct PyModuleDef elementtreemodule;
@ -114,6 +116,8 @@ elementtree_clear(PyObject *m)
Py_CLEAR(st->parseerror_obj);
Py_CLEAR(st->deepcopy_obj);
Py_CLEAR(st->elementpath_obj);
Py_CLEAR(st->comment_factory);
Py_CLEAR(st->pi_factory);
return 0;
}
@ -124,6 +128,8 @@ elementtree_traverse(PyObject *m, visitproc visit, void *arg)
Py_VISIT(st->parseerror_obj);
Py_VISIT(st->deepcopy_obj);
Py_VISIT(st->elementpath_obj);
Py_VISIT(st->comment_factory);
Py_VISIT(st->pi_factory);
return 0;
}
@ -2385,6 +2391,8 @@ typedef struct {
Py_ssize_t index; /* current stack size (0 means empty) */
PyObject *element_factory;
PyObject *comment_factory;
PyObject *pi_factory;
/* element tracing */
PyObject *events_append; /* the append method of the list of events, or NULL */
@ -2392,6 +2400,11 @@ typedef struct {
PyObject *end_event_obj;
PyObject *start_ns_event_obj;
PyObject *end_ns_event_obj;
PyObject *comment_event_obj;
PyObject *pi_event_obj;
char insert_comments;
char insert_pis;
} TreeBuilderObject;
#define TreeBuilder_CheckExact(op) (Py_TYPE(op) == &TreeBuilder_Type)
@ -2413,6 +2426,8 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
t->data = NULL;
t->element_factory = NULL;
t->comment_factory = NULL;
t->pi_factory = NULL;
t->stack = PyList_New(20);
if (!t->stack) {
Py_DECREF(t->this);
@ -2425,6 +2440,8 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
t->events_append = NULL;
t->start_event_obj = t->end_event_obj = NULL;
t->start_ns_event_obj = t->end_ns_event_obj = NULL;
t->comment_event_obj = t->pi_event_obj = NULL;
t->insert_comments = t->insert_pis = 0;
}
return (PyObject *)t;
}
@ -2433,17 +2450,53 @@ treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
_elementtree.TreeBuilder.__init__
element_factory: object = NULL
*
comment_factory: object = NULL
pi_factory: object = NULL
insert_comments: bool = False
insert_pis: bool = False
[clinic start generated code]*/
static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
PyObject *element_factory)
/*[clinic end generated code: output=91cfa7558970ee96 input=1b424eeefc35249c]*/
PyObject *element_factory,
PyObject *comment_factory,
PyObject *pi_factory,
int insert_comments, int insert_pis)
/*[clinic end generated code: output=8571d4dcadfdf952 input=1f967b5c245e0a71]*/
{
if (element_factory) {
if (element_factory && element_factory != Py_None) {
Py_INCREF(element_factory);
Py_XSETREF(self->element_factory, element_factory);
} else {
Py_CLEAR(self->element_factory);
}
if (!comment_factory || comment_factory == Py_None) {
elementtreestate *st = ET_STATE_GLOBAL;
comment_factory = st->comment_factory;
}
if (comment_factory) {
Py_INCREF(comment_factory);
Py_XSETREF(self->comment_factory, comment_factory);
self->insert_comments = insert_comments;
} else {
Py_CLEAR(self->comment_factory);
self->insert_comments = 0;
}
if (!pi_factory || pi_factory == Py_None) {
elementtreestate *st = ET_STATE_GLOBAL;
pi_factory = st->pi_factory;
}
if (pi_factory) {
Py_INCREF(pi_factory);
Py_XSETREF(self->pi_factory, pi_factory);
self->insert_pis = insert_pis;
} else {
Py_CLEAR(self->pi_factory);
self->insert_pis = 0;
}
return 0;
@ -2452,6 +2505,8 @@ _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
static int
treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
{
Py_VISIT(self->pi_event_obj);
Py_VISIT(self->comment_event_obj);
Py_VISIT(self->end_ns_event_obj);
Py_VISIT(self->start_ns_event_obj);
Py_VISIT(self->end_event_obj);
@ -2462,6 +2517,8 @@ treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
Py_VISIT(self->last);
Py_VISIT(self->data);
Py_VISIT(self->stack);
Py_VISIT(self->pi_factory);
Py_VISIT(self->comment_factory);
Py_VISIT(self->element_factory);
return 0;
}
@ -2469,6 +2526,8 @@ treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
static int
treebuilder_gc_clear(TreeBuilderObject *self)
{
Py_CLEAR(self->pi_event_obj);
Py_CLEAR(self->comment_event_obj);
Py_CLEAR(self->end_ns_event_obj);
Py_CLEAR(self->start_ns_event_obj);
Py_CLEAR(self->end_event_obj);
@ -2478,6 +2537,8 @@ treebuilder_gc_clear(TreeBuilderObject *self)
Py_CLEAR(self->data);
Py_CLEAR(self->last);
Py_CLEAR(self->this);
Py_CLEAR(self->pi_factory);
Py_CLEAR(self->comment_factory);
Py_CLEAR(self->element_factory);
Py_CLEAR(self->root);
return 0;
@ -2494,6 +2555,57 @@ treebuilder_dealloc(TreeBuilderObject *self)
/* -------------------------------------------------------------------- */
/* helpers for handling of arbitrary element-like objects */
/*[clinic input]
_elementtree._set_factories
comment_factory: object
pi_factory: object
/
Change the factories used to create comments and processing instructions.
For internal use only.
[clinic start generated code]*/
static PyObject *
_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
PyObject *pi_factory)
/*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
{
elementtreestate *st = ET_STATE_GLOBAL;
PyObject *old;
if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
Py_TYPE(comment_factory)->tp_name);
return NULL;
}
if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
Py_TYPE(pi_factory)->tp_name);
return NULL;
}
old = PyTuple_Pack(2,
st->comment_factory ? st->comment_factory : Py_None,
st->pi_factory ? st->pi_factory : Py_None);
if (comment_factory == Py_None) {
Py_CLEAR(st->comment_factory);
} else {
Py_INCREF(comment_factory);
Py_XSETREF(st->comment_factory, comment_factory);
}
if (pi_factory == Py_None) {
Py_CLEAR(st->pi_factory);
} else {
Py_INCREF(pi_factory);
Py_XSETREF(st->pi_factory, pi_factory);
}
return old;
}
static int
treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
PyObject **dest, _Py_Identifier *name)
@ -2569,7 +2681,7 @@ treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
PyObject *event = PyTuple_Pack(2, action, node);
if (event == NULL)
return -1;
res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
res = _PyObject_FastCall(self->events_append, &event, 1);
Py_DECREF(event);
if (res == NULL)
return -1;
@ -2593,7 +2705,7 @@ treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
return NULL;
}
if (!self->element_factory || self->element_factory == Py_None) {
if (!self->element_factory) {
node = create_new_element(tag, attrib);
} else if (attrib == Py_None) {
attrib = PyDict_New();
@ -2721,6 +2833,84 @@ treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
return (PyObject*) self->last;
}
LOCAL(PyObject*)
treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
{
PyObject* comment = NULL;
PyObject* this;
if (treebuilder_flush_data(self) < 0) {
return NULL;
}
if (self->comment_factory) {
comment = _PyObject_FastCall(self->comment_factory, &text, 1);
if (!comment)
return NULL;
this = self->this;
if (self->insert_comments && this != Py_None) {
if (treebuilder_add_subelement(this, comment) < 0)
goto error;
}
} else {
Py_INCREF(text);
comment = text;
}
if (self->events_append && self->comment_event_obj) {
if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
goto error;
}
return comment;
error:
Py_DECREF(comment);
return NULL;
}
LOCAL(PyObject*)
treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
{
PyObject* pi = NULL;
PyObject* this;
PyObject* stack[2] = {target, text};
if (treebuilder_flush_data(self) < 0) {
return NULL;
}
if (self->pi_factory) {
pi = _PyObject_FastCall(self->pi_factory, stack, 2);
if (!pi) {
return NULL;
}
this = self->this;
if (self->insert_pis && this != Py_None) {
if (treebuilder_add_subelement(this, pi) < 0)
goto error;
}
} else {
pi = PyTuple_Pack(2, target, text);
if (!pi) {
return NULL;
}
}
if (self->events_append && self->pi_event_obj) {
if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
goto error;
}
return pi;
error:
Py_DECREF(pi);
return NULL;
}
/* -------------------------------------------------------------------- */
/* methods (in alphabetical order) */
@ -2754,6 +2944,38 @@ _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
return treebuilder_handle_end(self, tag);
}
/*[clinic input]
_elementtree.TreeBuilder.comment
text: object
/
[clinic start generated code]*/
static PyObject *
_elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
/*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
{
return treebuilder_handle_comment(self, text);
}
/*[clinic input]
_elementtree.TreeBuilder.pi
target: object
text: object = None
/
[clinic start generated code]*/
static PyObject *
_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
PyObject *text)
/*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
{
return treebuilder_handle_pi(self, target, text);
}
LOCAL(PyObject*)
treebuilder_done(TreeBuilderObject* self)
{
@ -2925,7 +3147,7 @@ expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
if (errmsg == NULL)
return;
error = PyObject_CallFunctionObjArgs(st->parseerror_obj, errmsg, NULL);
error = _PyObject_FastCall(st->parseerror_obj, &errmsg, 1);
Py_DECREF(errmsg);
if (!error)
return;
@ -2988,7 +3210,7 @@ expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
(TreeBuilderObject*) self->target, value
);
else if (self->handle_data)
res = PyObject_CallFunctionObjArgs(self->handle_data, value, NULL);
res = _PyObject_FastCall(self->handle_data, &value, 1);
else
res = NULL;
Py_XDECREF(res);
@ -3099,7 +3321,7 @@ expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
/* shortcut */
res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
else if (self->handle_data)
res = PyObject_CallFunctionObjArgs(self->handle_data, data, NULL);
res = _PyObject_FastCall(self->handle_data, &data, 1);
else
res = NULL;
@ -3126,7 +3348,7 @@ expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
else if (self->handle_end) {
tag = makeuniversal(self, tag_in);
if (tag) {
res = PyObject_CallFunctionObjArgs(self->handle_end, tag, NULL);
res = _PyObject_FastCall(self->handle_end, &tag, 1);
Py_DECREF(tag);
}
}
@ -3176,21 +3398,31 @@ expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
static void
expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
{
PyObject* comment;
PyObject* res;
PyObject* comment = NULL;
PyObject* res = NULL;
if (PyErr_Occurred())
return;
if (self->handle_comment) {
if (TreeBuilder_CheckExact(self->target)) {
/* shortcut */
TreeBuilderObject *target = (TreeBuilderObject*) self->target;
comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
if (comment) {
res = PyObject_CallFunctionObjArgs(self->handle_comment,
comment, NULL);
Py_XDECREF(res);
Py_DECREF(comment);
}
if (!comment)
return; /* parser will look for errors */
res = treebuilder_handle_comment(target, comment);
} else if (self->handle_comment) {
comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
if (!comment)
return;
res = _PyObject_FastCall(self->handle_comment, &comment, 1);
}
Py_XDECREF(res);
Py_DECREF(comment);
}
static void
@ -3258,27 +3490,51 @@ static void
expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
const XML_Char* data_in)
{
PyObject* target;
PyObject* pi_target = NULL;
PyObject* data;
PyObject* res;
PyObject* stack[2];
if (PyErr_Occurred())
return;
if (self->handle_pi) {
target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
if (target && data) {
res = PyObject_CallFunctionObjArgs(self->handle_pi,
target, data, NULL);
if (TreeBuilder_CheckExact(self->target)) {
/* shortcut */
TreeBuilderObject *target = (TreeBuilderObject*) self->target;
if (target->events_append && target->pi_event_obj) {
pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
if (!pi_target)
goto error;
data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
if (!data)
goto error;
res = treebuilder_handle_pi(target, pi_target, data);
Py_XDECREF(res);
Py_DECREF(data);
Py_DECREF(target);
} else {
Py_XDECREF(data);
Py_XDECREF(target);
Py_DECREF(pi_target);
}
} else if (self->handle_pi) {
pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
if (!pi_target)
goto error;
data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
if (!data)
goto error;
stack[0] = pi_target;
stack[1] = data;
res = _PyObject_FastCall(self->handle_pi, stack, 2);
Py_XDECREF(res);
Py_DECREF(data);
Py_DECREF(pi_target);
}
return;
error:
Py_XDECREF(pi_target);
return;
}
/* -------------------------------------------------------------------- */
@ -3695,6 +3951,8 @@ _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
Py_CLEAR(target->end_event_obj);
Py_CLEAR(target->start_ns_event_obj);
Py_CLEAR(target->end_ns_event_obj);
Py_CLEAR(target->comment_event_obj);
Py_CLEAR(target->pi_event_obj);
if (events_to_report == Py_None) {
/* default is "end" only */
@ -3740,6 +3998,18 @@ _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
(XML_StartNamespaceDeclHandler) expat_start_ns_handler,
(XML_EndNamespaceDeclHandler) expat_end_ns_handler
);
} else if (strcmp(event_name, "comment") == 0) {
Py_XSETREF(target->comment_event_obj, event_name_obj);
EXPAT(SetCommentHandler)(
self->parser,
(XML_CommentHandler) expat_comment_handler
);
} else if (strcmp(event_name, "pi") == 0) {
Py_XSETREF(target->pi_event_obj, event_name_obj);
EXPAT(SetProcessingInstructionHandler)(
self->parser,
(XML_ProcessingInstructionHandler) expat_pi_handler
);
} else {
Py_DECREF(event_name_obj);
Py_DECREF(events_seq);
@ -3882,6 +4152,8 @@ static PyMethodDef treebuilder_methods[] = {
_ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
_ELEMENTTREE_TREEBUILDER_START_METHODDEF
_ELEMENTTREE_TREEBUILDER_END_METHODDEF
_ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
_ELEMENTTREE_TREEBUILDER_PI_METHODDEF
_ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
{NULL, NULL}
};
@ -3983,6 +4255,7 @@ static PyTypeObject XMLParser_Type = {
static PyMethodDef _functions[] = {
{"SubElement", (PyCFunction)(void(*)(void)) subelement, METH_VARARGS | METH_KEYWORDS},
_ELEMENTTREE__SET_FACTORIES_METHODDEF
{NULL, NULL}
};

View file

@ -635,19 +635,26 @@ exit:
static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
PyObject *element_factory);
PyObject *element_factory,
PyObject *comment_factory,
PyObject *pi_factory,
int insert_comments, int insert_pis);
static int
_elementtree_TreeBuilder___init__(PyObject *self, PyObject *args, PyObject *kwargs)
{
int return_value = -1;
static const char * const _keywords[] = {"element_factory", NULL};
static const char * const _keywords[] = {"element_factory", "comment_factory", "pi_factory", "insert_comments", "insert_pis", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "TreeBuilder", 0};
PyObject *argsbuf[1];
PyObject *argsbuf[5];
PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0;
PyObject *element_factory = NULL;
PyObject *comment_factory = NULL;
PyObject *pi_factory = NULL;
int insert_comments = 0;
int insert_pis = 0;
fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 1, 0, argsbuf);
if (!fastargs) {
@ -656,9 +663,76 @@ _elementtree_TreeBuilder___init__(PyObject *self, PyObject *args, PyObject *kwar
if (!noptargs) {
goto skip_optional_pos;
}
element_factory = fastargs[0];
if (fastargs[0]) {
element_factory = fastargs[0];
if (!--noptargs) {
goto skip_optional_pos;
}
}
skip_optional_pos:
return_value = _elementtree_TreeBuilder___init___impl((TreeBuilderObject *)self, element_factory);
if (!noptargs) {
goto skip_optional_kwonly;
}
if (fastargs[1]) {
comment_factory = fastargs[1];
if (!--noptargs) {
goto skip_optional_kwonly;
}
}
if (fastargs[2]) {
pi_factory = fastargs[2];
if (!--noptargs) {
goto skip_optional_kwonly;
}
}
if (fastargs[3]) {
insert_comments = PyObject_IsTrue(fastargs[3]);
if (insert_comments < 0) {
goto exit;
}
if (!--noptargs) {
goto skip_optional_kwonly;
}
}
insert_pis = PyObject_IsTrue(fastargs[4]);
if (insert_pis < 0) {
goto exit;
}
skip_optional_kwonly:
return_value = _elementtree_TreeBuilder___init___impl((TreeBuilderObject *)self, element_factory, comment_factory, pi_factory, insert_comments, insert_pis);
exit:
return return_value;
}
PyDoc_STRVAR(_elementtree__set_factories__doc__,
"_set_factories($module, comment_factory, pi_factory, /)\n"
"--\n"
"\n"
"Change the factories used to create comments and processing instructions.\n"
"\n"
"For internal use only.");
#define _ELEMENTTREE__SET_FACTORIES_METHODDEF \
{"_set_factories", (PyCFunction)(void(*)(void))_elementtree__set_factories, METH_FASTCALL, _elementtree__set_factories__doc__},
static PyObject *
_elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
PyObject *pi_factory);
static PyObject *
_elementtree__set_factories(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
PyObject *comment_factory;
PyObject *pi_factory;
if (!_PyArg_CheckPositional("_set_factories", nargs, 2, 2)) {
goto exit;
}
comment_factory = args[0];
pi_factory = args[1];
return_value = _elementtree__set_factories_impl(module, comment_factory, pi_factory);
exit:
return return_value;
@ -680,6 +754,48 @@ PyDoc_STRVAR(_elementtree_TreeBuilder_end__doc__,
#define _ELEMENTTREE_TREEBUILDER_END_METHODDEF \
{"end", (PyCFunction)_elementtree_TreeBuilder_end, METH_O, _elementtree_TreeBuilder_end__doc__},
PyDoc_STRVAR(_elementtree_TreeBuilder_comment__doc__,
"comment($self, text, /)\n"
"--\n"
"\n");
#define _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF \
{"comment", (PyCFunction)_elementtree_TreeBuilder_comment, METH_O, _elementtree_TreeBuilder_comment__doc__},
PyDoc_STRVAR(_elementtree_TreeBuilder_pi__doc__,
"pi($self, target, text=None, /)\n"
"--\n"
"\n");
#define _ELEMENTTREE_TREEBUILDER_PI_METHODDEF \
{"pi", (PyCFunction)(void(*)(void))_elementtree_TreeBuilder_pi, METH_FASTCALL, _elementtree_TreeBuilder_pi__doc__},
static PyObject *
_elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
PyObject *text);
static PyObject *
_elementtree_TreeBuilder_pi(TreeBuilderObject *self, PyObject *const *args, Py_ssize_t nargs)
{
PyObject *return_value = NULL;
PyObject *target;
PyObject *text = Py_None;
if (!_PyArg_CheckPositional("pi", nargs, 1, 2)) {
goto exit;
}
target = args[0];
if (nargs < 2) {
goto skip_optional;
}
text = args[1];
skip_optional:
return_value = _elementtree_TreeBuilder_pi_impl(self, target, text);
exit:
return return_value;
}
PyDoc_STRVAR(_elementtree_TreeBuilder_close__doc__,
"close($self, /)\n"
"--\n"
@ -853,4 +969,4 @@ skip_optional:
exit:
return return_value;
}
/*[clinic end generated code: output=440b5d90a4b86590 input=a9049054013a1b77]*/
/*[clinic end generated code: output=386a68425d072b5c input=a9049054013a1b77]*/