From e3ec869063bc4bf333b112459ba03e6b2b4b0b19 Mon Sep 17 00:00:00 2001 From: Jesper Jensen Date: Wed, 11 Sep 2024 18:32:27 +0200 Subject: [PATCH] Expose GetCurrentByteCount from expat --- Doc/library/pyexpat.rst | 9 +++++++++ Lib/test/test_pyexpat.py | 5 +++-- Misc/ACKS | 1 + .../2024-09-11-16-40-12.gh-issue-123963.TzzIY8.rst | 3 +++ Modules/pyexpat.c | 2 ++ 5 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-11-16-40-12.gh-issue-123963.TzzIY8.rst diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index c0e9999f4b1..1efdb36a4b3 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -316,6 +316,15 @@ just past the last parse event (regardless of whether there was an associated callback). +.. attribute:: xmlparser.CurrentByteCount + + Number of bytes in the current event. ``0`` if the event is for the end tag + event for *empty-element* tags or is inside a reference to an internal + entity. + + .. versionadded:: 3.14 + + .. attribute:: xmlparser.CurrentByteIndex Current byte index in the parser input. diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 1d56ccd71cf..d1935aaa764 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -506,6 +506,7 @@ class PositionTest(unittest.TestCase): def check_pos(self, event): pos = (event, self.parser.CurrentByteIndex, + self.parser.CurrentByteCount, self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber) self.assertTrue(self.upto < len(self.expected_list), @@ -520,8 +521,8 @@ class PositionTest(unittest.TestCase): self.parser.StartElementHandler = self.StartElementHandler self.parser.EndElementHandler = self.EndElementHandler self.upto = 0 - self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), - ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] + self.expected_list = [('s', 0, 3, 1, 0), ('s', 5, 3, 2, 1), ('s', 11, 4, 3, 2), + ('e', 15, 0, 3, 6), ('e', 17, 4, 4, 1), ('e', 22, 4, 5, 0)] xml = b'\n \n \n \n' self.parser.Parse(xml, True) diff --git a/Misc/ACKS b/Misc/ACKS index b031eb7c11f..f02dd1b5f3b 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -882,6 +882,7 @@ Muhammad Jehanzeb Drew Jenkins Flemming Kjær Jensen Philip H. Jensen +Jesper Jensen Philip Jenvey MunSic Jeong Chris Jerdonek diff --git a/Misc/NEWS.d/next/Library/2024-09-11-16-40-12.gh-issue-123963.TzzIY8.rst b/Misc/NEWS.d/next/Library/2024-09-11-16-40-12.gh-issue-123963.TzzIY8.rst new file mode 100644 index 00000000000..2b7057d72e0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-11-16-40-12.gh-issue-123963.TzzIY8.rst @@ -0,0 +1,3 @@ +Expose the :attr:`xmlparser.CurrentByteCount` field for :mod:`Expat XML +` parsers. +Patch by Jesper Jensen. diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index 9733bc34f7c..c4ba8a92b8d 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1349,6 +1349,7 @@ INT_GETTER(ErrorByteIndex) INT_GETTER(CurrentLineNumber) INT_GETTER(CurrentColumnNumber) INT_GETTER(CurrentByteIndex) +INT_GETTER(CurrentByteCount) #undef INT_GETTER @@ -1529,6 +1530,7 @@ static PyGetSetDef xmlparse_getsetlist[] = { XMLPARSE_GETTER_DEF(CurrentLineNumber) XMLPARSE_GETTER_DEF(CurrentColumnNumber) XMLPARSE_GETTER_DEF(CurrentByteIndex) + XMLPARSE_GETTER_DEF(CurrentByteCount) XMLPARSE_GETTER_SETTER_DEF(buffer_size) XMLPARSE_GETTER_SETTER_DEF(buffer_text) XMLPARSE_GETTER_DEF(buffer_used)