[3.9] gh-98433: Fix quadratic time idna decoding. (GH-99092) (GH-99222) (#99230)

There was an unnecessary quadratic loop in idna decoding. This restores
the behavior to linear.

(cherry picked from commit d315722564)
(cherry picked from commit a6f6c3a3d6)

Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
Miss Islington (bot) 2022-11-10 07:57:41 -08:00 committed by GitHub
parent b43496c01a
commit c09dba57cf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 17 deletions

View file

@ -39,23 +39,21 @@ def nameprep(label):
# Check bidi # Check bidi
RandAL = [stringprep.in_table_d1(x) for x in label] RandAL = [stringprep.in_table_d1(x) for x in label]
for c in RandAL: if any(RandAL):
if c: # There is a RandAL char in the string. Must perform further
# There is a RandAL char in the string. Must perform further # tests:
# tests: # 1) The characters in section 5.8 MUST be prohibited.
# 1) The characters in section 5.8 MUST be prohibited. # This is table C.8, which was already checked
# This is table C.8, which was already checked # 2) If a string contains any RandALCat character, the string
# 2) If a string contains any RandALCat character, the string # MUST NOT contain any LCat character.
# MUST NOT contain any LCat character. if any(stringprep.in_table_d2(x) for x in label):
if any(stringprep.in_table_d2(x) for x in label): raise UnicodeError("Violation of BIDI requirement 2")
raise UnicodeError("Violation of BIDI requirement 2") # 3) If a string contains any RandALCat character, a
# RandALCat character MUST be the first character of the
# 3) If a string contains any RandALCat character, a # string, and a RandALCat character MUST be the last
# RandALCat character MUST be the first character of the # character of the string.
# string, and a RandALCat character MUST be the last if not RandAL[0] or not RandAL[-1]:
# character of the string. raise UnicodeError("Violation of BIDI requirement 3")
if not RandAL[0] or not RandAL[-1]:
raise UnicodeError("Violation of BIDI requirement 3")
return label return label

View file

@ -1532,6 +1532,12 @@ class IDNACodecTest(unittest.TestCase):
self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org") self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org")
self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.") self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.")
def test_builtin_decode_length_limit(self):
with self.assertRaisesRegex(UnicodeError, "too long"):
(b"xn--016c"+b"a"*1100).decode("idna")
with self.assertRaisesRegex(UnicodeError, "too long"):
(b"xn--016c"+b"a"*70).decode("idna")
def test_stream(self): def test_stream(self):
r = codecs.getreader("idna")(io.BytesIO(b"abc")) r = codecs.getreader("idna")(io.BytesIO(b"abc"))
r.read(3) r.read(3)

View file

@ -0,0 +1,6 @@
The IDNA codec decoder used on DNS hostnames by :mod:`socket` or :mod:`asyncio`
related name resolution functions no longer involves a quadratic algorithm.
This prevents a potential CPU denial of service if an out-of-spec excessive
length hostname involving bidirectional characters were decoded. Some protocols
such as :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker
to supply such a name.