From e7e105f51b4ea4087a3bbd87c445aaff0124d4a6 Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Sat, 10 May 2025 20:02:23 +0200
Subject: [PATCH] [3.14] gh-86155: Fix data loss after unclosed script or style
tag in HTMLParser (GH-22658) (GH-133844)
When calling .close() the HTMLParser should flush all remaining content,
even when that content is in an unclosed script or style tag.
(cherry picked from commit 53383e90e4df7029f792b7aa81aa2e4cff348ed0)
Co-authored-by: Waylan Limberg
---
Lib/html/parser.py | 2 +-
Lib/test/test_htmlparser.py | 10 ++++++++++
.../2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst | 2 ++
3 files changed, 13 insertions(+), 1 deletion(-)
create mode 100644 Misc/NEWS.d/next/Library/2023-02-13-21-41-34.gh-issue-86155.ppIGSC.rst
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 1b8b6ea0e5a..1e30956fe24 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -260,7 +260,7 @@ class HTMLParser(_markupbase.ParserBase):
else:
assert 0, "interesting.search() lied"
# end while
- if end and i < n and not self.cdata_elem:
+ if end and i < n:
if self.convert_charrefs and not self.cdata_elem:
self.handle_data(unescape(rawdata[i:n]))
else:
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 68649e9d6d5..61fa24fab57 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -317,6 +317,16 @@ text
("endtag", element_lower)],
collector=Collector(convert_charrefs=False))
+ def test_EOF_in_cdata(self):
+ content = """ ¬-an-entity-ref;
+
+ ''"""
+ s = f'