gh-63283: IDNA prefix should be case insensitive (GH-17726)

Any capitalization of "xn--" should be acceptable for the ACE prefix
(see https://tools.ietf.org/html/rfc3490#section-5).

Co-authored-by: Pepijn de Vos <pepijndevos@gmail.com>
Co-authored-by: Erlend E. Aasland <erlend@python.org>
Co-authored-by: Petr Viktorin <encukou@gmail.com>
This commit is contained in:
Zackery Spytz 2024-03-15 07:38:13 -07:00 committed by GitHub
parent ce2c996b2f
commit d180b507c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 12 additions and 3 deletions

View file

@ -86,7 +86,7 @@ def ToASCII(label):
raise UnicodeError("label empty or too long") raise UnicodeError("label empty or too long")
# Step 5: Check ACE prefix # Step 5: Check ACE prefix
if label.startswith(sace_prefix): if label[:4].lower() == sace_prefix:
raise UnicodeError("Label starts with ACE prefix") raise UnicodeError("Label starts with ACE prefix")
# Step 6: Encode with PUNYCODE # Step 6: Encode with PUNYCODE
@ -129,7 +129,7 @@ def ToUnicode(label):
except UnicodeError: except UnicodeError:
raise UnicodeError("Invalid character in IDN label") raise UnicodeError("Invalid character in IDN label")
# Step 3: Check for ACE prefix # Step 3: Check for ACE prefix
if not label.startswith(ace_prefix): if not label[:4].lower() == ace_prefix:
return str(label, "ascii") return str(label, "ascii")
# Step 4: Remove ACE prefix # Step 4: Remove ACE prefix
@ -202,7 +202,7 @@ class Codec(codecs.Codec):
# XXX obviously wrong, see #3232 # XXX obviously wrong, see #3232
input = bytes(input) input = bytes(input)
if ace_prefix not in input: if ace_prefix not in input.lower():
# Fast path # Fast path
try: try:
return input.decode('ascii'), len(input) return input.decode('ascii'), len(input)

View file

@ -1547,6 +1547,13 @@ class IDNACodecTest(unittest.TestCase):
self.assertEqual(str(b"python.org.", "idna"), "python.org.") self.assertEqual(str(b"python.org.", "idna"), "python.org.")
self.assertEqual(str(b"xn--pythn-mua.org", "idna"), "pyth\xf6n.org") self.assertEqual(str(b"xn--pythn-mua.org", "idna"), "pyth\xf6n.org")
self.assertEqual(str(b"xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.") self.assertEqual(str(b"xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.")
self.assertEqual(str(b"XN--pythn-mua.org.", "idna"), "pyth\xf6n.org.")
self.assertEqual(str(b"xN--pythn-mua.org.", "idna"), "pyth\xf6n.org.")
self.assertEqual(str(b"Xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.")
self.assertEqual(str(b"bugs.xn--pythn-mua.org.", "idna"),
"bugs.pyth\xf6n.org.")
self.assertEqual(str(b"bugs.XN--pythn-mua.org.", "idna"),
"bugs.pyth\xf6n.org.")
def test_builtin_encode(self): def test_builtin_encode(self):
self.assertEqual("python.org".encode("idna"), b"python.org") self.assertEqual("python.org".encode("idna"), b"python.org")

View file

@ -0,0 +1,2 @@
In :mod:`encodings.idna`, any capitalization of the the ACE prefix
(``xn--``) is now acceptable. Patch by Pepijn de Vos and Zackery Spytz.