Fixed #32713, Fixed CVE-2021-32052 -- Prevented newlines and tabs from being accepted in URLValidator on Python 3.9.5+.

In Python 3.9.5+ urllib.parse() automatically removes ASCII newlines
and tabs from URLs [1, 2]. Unfortunately it created an issue in
the URLValidator. URLValidator uses urllib.urlsplit() and
urllib.urlunsplit() for creating a URL variant with Punycode which no
longer contains newlines and tabs in Python 3.9.5+. As a consequence,
the regular expression matched the URL (without unsafe characters) and
the source value (with unsafe characters) was considered valid.

[1] https://bugs.python.org/issue43882 and
[2] 76cd81d603
This commit is contained in:
Mariusz Felisiak 2021-05-06 08:45:23 +02:00 committed by GitHub
parent a708f39ce6
commit e1e81aa1c4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 73 additions and 3 deletions

View file

@ -92,6 +92,7 @@ class URLValidator(RegexValidator):
r'\Z', re.IGNORECASE)
message = _('Enter a valid URL.')
schemes = ['http', 'https', 'ftp', 'ftps']
unsafe_chars = frozenset('\t\r\n')
def __init__(self, schemes=None, **kwargs):
super().__init__(**kwargs)
@ -101,6 +102,8 @@ class URLValidator(RegexValidator):
def __call__(self, value):
if not isinstance(value, str):
raise ValidationError(self.message, code=self.code, params={'value': value})
if self.unsafe_chars.intersection(value):
raise ValidationError(self.message, code=self.code, params={'value': value})
# Check if the scheme is valid.
scheme = value.split('://')[0].lower()
if scheme not in self.schemes: