[3.9] gh-98433: Fix quadratic time idna decoding. (GH-99092) (GH-99222) (#99230)

There was an unnecessary quadratic loop in idna decoding. This restores
the behavior to linear.

(cherry picked from commit d315722564)
(cherry picked from commit a6f6c3a3d6)

Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
Miss Islington (bot) 2022-11-10 07:57:41 -08:00 committed by GitHub
parent b43496c01a
commit c09dba57cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 17 deletions

View file

@ -39,8 +39,7 @@ def nameprep(label):
# Check bidi # Check bidi
RandAL = [stringprep.in_table_d1(x) for x in label] RandAL = [stringprep.in_table_d1(x) for x in label]
for c in RandAL: if any(RandAL):
if c:
# There is a RandAL char in the string. Must perform further # There is a RandAL char in the string. Must perform further
# tests: # tests:
# 1) The characters in section 5.8 MUST be prohibited. # 1) The characters in section 5.8 MUST be prohibited.
@ -49,7 +48,6 @@ def nameprep(label):
# MUST NOT contain any LCat character. # MUST NOT contain any LCat character.
if any(stringprep.in_table_d2(x) for x in label): if any(stringprep.in_table_d2(x) for x in label):
raise UnicodeError("Violation of BIDI requirement 2") raise UnicodeError("Violation of BIDI requirement 2")
# 3) If a string contains any RandALCat character, a # 3) If a string contains any RandALCat character, a
# RandALCat character MUST be the first character of the # RandALCat character MUST be the first character of the
# string, and a RandALCat character MUST be the last # string, and a RandALCat character MUST be the last

View file

@ -1532,6 +1532,12 @@ class IDNACodecTest(unittest.TestCase):
self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org") self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.") self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
def test_builtin_decode_length_limit(self):
with self.assertRaisesRegex(UnicodeError, "too long"):
(b"xn--016c"+b"a"*1100).decode("idna")
with self.assertRaisesRegex(UnicodeError, "too long"):
(b"xn--016c"+b"a"*70).decode("idna")
def test_stream(self): def test_stream(self):
r = codecs.getreader("idna")(io.BytesIO(b"abc")) r = codecs.getreader("idna")(io.BytesIO(b"abc"))
r.read(3) r.read(3)

View file

@ -0,0 +1,6 @@
The IDNA codec decoder used on DNS hostnames by :mod:`socket` or :mod:`asyncio`
related name resolution functions no longer involves a quadratic algorithm.
This prevents a potential CPU denial of service if an out-of-spec excessive
length hostname involving bidirectional characters were decoded. Some protocols
such as :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker
to supply such a name.