mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Enhance the punycode decoder so that it can decode
unicode objects. Fix the idna codec and the tests.
This commit is contained in:
parent
1f05a3b7fb
commit
0ac30f82fe
3 changed files with 136 additions and 130 deletions
|
@ -7,7 +7,8 @@ from unicodedata import ucd_3_2_0 as unicodedata
|
|||
dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
|
||||
|
||||
# IDNA section 5
|
||||
ace_prefix = "xn--"
|
||||
ace_prefix = b"xn--"
|
||||
sace_prefix = "xn--"
|
||||
|
||||
# This assumes query strings, so AllowUnassigned is true
|
||||
def nameprep(label):
|
||||
|
@ -87,7 +88,7 @@ def ToASCII(label):
|
|||
raise UnicodeError("label empty or too long")
|
||||
|
||||
# Step 5: Check ACE prefix
|
||||
if label.startswith(ace_prefix):
|
||||
if label.startswith(sace_prefix):
|
||||
raise UnicodeError("Label starts with ACE prefix")
|
||||
|
||||
# Step 6: Encode with PUNYCODE
|
||||
|
@ -134,7 +135,7 @@ def ToUnicode(label):
|
|||
|
||||
# Step 7: Compare the result of step 6 with the one of step 3
|
||||
# label2 will already be in lower case.
|
||||
if label.lower() != label2:
|
||||
if str(label, "ascii").lower() != str(label2, "ascii"):
|
||||
raise UnicodeError("IDNA does not round-trip", label, label2)
|
||||
|
||||
# Step 8: return the result of step 5
|
||||
|
@ -143,7 +144,7 @@ def ToUnicode(label):
|
|||
### Codec APIs
|
||||
|
||||
class Codec(codecs.Codec):
|
||||
def encode(self,input,errors='strict'):
|
||||
def encode(self, input, errors='strict'):
|
||||
|
||||
if errors != 'strict':
|
||||
# IDNA is quite clear that implementations must be strict
|
||||
|
@ -152,19 +153,21 @@ class Codec(codecs.Codec):
|
|||
if not input:
|
||||
return b"", 0
|
||||
|
||||
result = []
|
||||
result = b""
|
||||
labels = dots.split(input)
|
||||
if labels and len(labels[-1])==0:
|
||||
if labels and not labels[-1]:
|
||||
trailing_dot = b'.'
|
||||
del labels[-1]
|
||||
else:
|
||||
trailing_dot = b''
|
||||
for label in labels:
|
||||
result.append(ToASCII(label))
|
||||
# Join with U+002E
|
||||
return b".".join(result)+trailing_dot, len(input)
|
||||
if result:
|
||||
# Join with U+002E
|
||||
result.extend(b'.')
|
||||
result.extend(ToASCII(label))
|
||||
return result+trailing_dot, len(input)
|
||||
|
||||
def decode(self,input,errors='strict'):
|
||||
def decode(self, input, errors='strict'):
|
||||
|
||||
if errors != 'strict':
|
||||
raise UnicodeError("Unsupported error handling "+errors)
|
||||
|
@ -199,30 +202,31 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
|
|||
raise UnicodeError("unsupported error handling "+errors)
|
||||
|
||||
if not input:
|
||||
return ("", 0)
|
||||
return (b'', 0)
|
||||
|
||||
labels = dots.split(input)
|
||||
trailing_dot = ''
|
||||
trailing_dot = b''
|
||||
if labels:
|
||||
if not labels[-1]:
|
||||
trailing_dot = '.'
|
||||
trailing_dot = b'.'
|
||||
del labels[-1]
|
||||
elif not final:
|
||||
# Keep potentially unfinished label until the next call
|
||||
del labels[-1]
|
||||
if labels:
|
||||
trailing_dot = '.'
|
||||
trailing_dot = b'.'
|
||||
|
||||
result = []
|
||||
result = b""
|
||||
size = 0
|
||||
for label in labels:
|
||||
result.append(ToASCII(label))
|
||||
if size:
|
||||
# Join with U+002E
|
||||
result.extend(b'.')
|
||||
size += 1
|
||||
result.extend(ToASCII(label))
|
||||
size += len(label)
|
||||
|
||||
# Join with U+002E
|
||||
result = ".".join(result) + trailing_dot
|
||||
result += trailing_dot
|
||||
size += len(trailing_dot)
|
||||
return (result, size)
|
||||
|
||||
|
@ -239,8 +243,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
|||
labels = dots.split(input)
|
||||
else:
|
||||
# Must be ASCII string
|
||||
input = str(input)
|
||||
str(input, "ascii")
|
||||
input = str(input, "ascii")
|
||||
labels = input.split(".")
|
||||
|
||||
trailing_dot = ''
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue