gh-133157: remove usage of _Py_NO_SANITIZE_UNDEFINED in pyexpat (#135346)

This was the last usage, so the macro is removed as well.

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
Petr Viktorin 2025-07-01 10:54:08 +02:00 committed by GitHub
parent 23caccf74c
commit 845263adc6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 96 additions and 29 deletions

View file

@ -667,25 +667,6 @@ extern "C" {
#endif #endif
// _Py_NO_SANITIZE_UNDEFINED(): Disable Undefined Behavior sanitizer (UBsan)
// on a function.
//
// Clang and GCC 9.0+ use __attribute__((no_sanitize("undefined"))).
// GCC 4.9+ uses __attribute__((no_sanitize_undefined)).
#if defined(__has_feature)
# if __has_feature(undefined_behavior_sanitizer)
# define _Py_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined")))
# endif
#endif
#if !defined(_Py_NO_SANITIZE_UNDEFINED) && defined(__GNUC__) \
&& ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 9))
# define _Py_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize_undefined))
#endif
#ifndef _Py_NO_SANITIZE_UNDEFINED
# define _Py_NO_SANITIZE_UNDEFINED
#endif
// _Py_NONSTRING: The nonstring variable attribute specifies that an object or // _Py_NONSTRING: The nonstring variable attribute specifies that an object or
// member declaration with type array of char, signed char, or unsigned char, // member declaration with type array of char, signed char, or unsigned char,
// or pointer to such a type is intended to store character arrays that do not // or pointer to such a type is intended to store character arrays that do not

View file

@ -9,12 +9,11 @@ import traceback
from io import BytesIO from io import BytesIO
from test import support from test import support
from test.support import os_helper from test.support import os_helper
from test.support import sortdict
from unittest import mock
from xml.parsers import expat from xml.parsers import expat
from xml.parsers.expat import errors from xml.parsers.expat import errors
from test.support import sortdict
class SetAttributeTest(unittest.TestCase): class SetAttributeTest(unittest.TestCase):
def setUp(self): def setUp(self):
@ -436,6 +435,19 @@ class BufferTextTest(unittest.TestCase):
"<!--abc-->", "4", "<!--def-->", "5", "</a>"], "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
"buffered text not properly split") "buffered text not properly split")
def test_change_character_data_handler_in_callback(self):
# Test that xmlparse_handler_setter() properly handles
# the special case "parser.CharacterDataHandler = None".
def handler(*args):
parser.CharacterDataHandler = None
handler_wrapper = mock.Mock(wraps=handler)
parser = expat.ParserCreate()
parser.CharacterDataHandler = handler_wrapper
parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", True)
handler_wrapper.assert_called_once()
self.assertIsNone(parser.CharacterDataHandler)
# Test handling of exception from callback: # Test handling of exception from callback:
class HandlerExceptionTest(unittest.TestCase): class HandlerExceptionTest(unittest.TestCase):
@ -595,7 +607,7 @@ class ChardataBufferTest(unittest.TestCase):
def test_disabling_buffer(self): def test_disabling_buffer(self):
xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
xml2 = b'b' * 1024 xml2 = b'b' * 1024
xml3 = b'c' * 1024 + b'</a>'; xml3 = b'c' * 1024 + b'</a>'
parser = expat.ParserCreate() parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler parser.CharacterDataHandler = self.counting_handler
parser.buffer_text = 1 parser.buffer_text = 1

View file

@ -0,0 +1 @@
Remove the private, undocumented macro :c:macro:`!_Py_NO_SANITIZE_UNDEFINED`.

View file

@ -98,7 +98,11 @@ typedef struct {
#define CHARACTER_DATA_BUFFER_SIZE 8192 #define CHARACTER_DATA_BUFFER_SIZE 8192
typedef const void *xmlhandler; // A generic function type for storage.
// To avoid undefined behaviors, a handler must be cast to the correct
// function type before it's called; see SETTER_WRAPPER below.
typedef void (*xmlhandler)(void);
typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler); typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler);
struct HandlerInfo { struct HandlerInfo {
@ -110,9 +114,7 @@ struct HandlerInfo {
static struct HandlerInfo handler_info[64]; static struct HandlerInfo handler_info[64];
// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED, rather than using the exact static inline void
// handler API for each handler.
static inline void _Py_NO_SANITIZE_UNDEFINED
CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info, CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info,
XML_Parser xml_parser, xmlhandler xml_handler) XML_Parser xml_parser, xmlhandler xml_handler)
{ {
@ -1365,7 +1367,7 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
elaborate system of handlers and state could remove the elaborate system of handlers and state could remove the
C handler more effectively. */ C handler more effectively. */
if (handlernum == CharacterData && self->in_callback) { if (handlernum == CharacterData && self->in_callback) {
c_handler = noop_character_data_handler; c_handler = (xmlhandler)noop_character_data_handler;
} }
v = NULL; v = NULL;
} }
@ -2222,13 +2224,84 @@ clear_handlers(xmlparseobject *self, int initial)
} }
} }
/* To avoid undefined behaviors, a function must be *called* via a function
* pointer of the correct type.
* So, for each `XML_Set*` function, we define a wrapper that calls `XML_Set*`
* with its argument cast to the appropriate type.
*/
typedef void (*parser_only)(void *);
typedef int (*not_standalone)(void *);
typedef void (*parser_and_data)(void *, const XML_Char *);
typedef void (*parser_and_data_and_int)(void *, const XML_Char *, int);
typedef void (*parser_and_data_and_data)(
void *, const XML_Char *, const XML_Char *);
typedef void (*start_element)(void *, const XML_Char *, const XML_Char **);
typedef void (*element_decl)(void *, const XML_Char *, XML_Content *);
typedef void (*xml_decl)(
void *, const XML_Char *, const XML_Char *, int);
typedef void (*start_doctype_decl)(
void *, const XML_Char *, const XML_Char *, const XML_Char *, int);
typedef void (*notation_decl)(
void *,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
typedef void (*attlist_decl)(
void *,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *,
int);
typedef void (*unparsed_entity_decl)(
void *,
const XML_Char *, const XML_Char *,
const XML_Char *, const XML_Char *, const XML_Char *);
typedef void (*entity_decl)(
void *,
const XML_Char *, int,
const XML_Char *, int,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
typedef int (*external_entity_ref)(
XML_Parser,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
#define SETTER_WRAPPER(NAME, TYPE) \
static inline void \
pyexpat_Set ## NAME (XML_Parser parser, xmlhandler handler) \
{ \
(void)XML_Set ## NAME (parser, (TYPE)handler); \
}
SETTER_WRAPPER(StartElementHandler, start_element)
SETTER_WRAPPER(EndElementHandler, parser_and_data)
SETTER_WRAPPER(ProcessingInstructionHandler, parser_and_data_and_data)
SETTER_WRAPPER(CharacterDataHandler, parser_and_data_and_int)
SETTER_WRAPPER(UnparsedEntityDeclHandler, unparsed_entity_decl)
SETTER_WRAPPER(NotationDeclHandler, notation_decl)
SETTER_WRAPPER(StartNamespaceDeclHandler, parser_and_data_and_data)
SETTER_WRAPPER(EndNamespaceDeclHandler, parser_and_data)
SETTER_WRAPPER(CommentHandler, parser_and_data)
SETTER_WRAPPER(StartCdataSectionHandler, parser_only)
SETTER_WRAPPER(EndCdataSectionHandler, parser_only)
SETTER_WRAPPER(DefaultHandler, parser_and_data_and_int)
SETTER_WRAPPER(DefaultHandlerExpand, parser_and_data_and_int)
SETTER_WRAPPER(NotStandaloneHandler, not_standalone)
SETTER_WRAPPER(ExternalEntityRefHandler, external_entity_ref)
SETTER_WRAPPER(StartDoctypeDeclHandler, start_doctype_decl)
SETTER_WRAPPER(EndDoctypeDeclHandler, parser_only)
SETTER_WRAPPER(EntityDeclHandler, entity_decl)
SETTER_WRAPPER(XmlDeclHandler, xml_decl)
SETTER_WRAPPER(ElementDeclHandler, element_decl)
SETTER_WRAPPER(AttlistDeclHandler, attlist_decl)
#if XML_COMBINED_VERSION >= 19504
SETTER_WRAPPER(SkippedEntityHandler, parser_and_data_and_int)
#endif
#undef SETTER_WRAPPER
static struct HandlerInfo handler_info[] = { static struct HandlerInfo handler_info[] = {
// The cast to `xmlhandlersetter` is needed as the signature of XML // The cast to `xmlhandlersetter` is needed as the signature of XML
// handler functions is not compatible with `xmlhandlersetter` since // handler functions is not compatible with `xmlhandlersetter` since
// their second parameter is narrower than a `const void *`. // their second parameter is narrower than a `const void *`.
#define HANDLER_INFO(name) \ #define HANDLER_INFO(name) \
{#name, (xmlhandlersetter)XML_Set##name, my_##name}, {#name, (xmlhandlersetter)pyexpat_Set##name, (xmlhandler)my_##name},
HANDLER_INFO(StartElementHandler) HANDLER_INFO(StartElementHandler)
HANDLER_INFO(EndElementHandler) HANDLER_INFO(EndElementHandler)