Issue #17214: Percent-encode non-ASCII bytes in redirect targets

Some servers send Location header fields with non-ASCII bytes, but "http.
client" requires the request target to be ASCII-encodable, otherwise a
UnicodeEncodeError is raised. Based on patch by Christian Heimes.

Python 2 does not suffer any problem because it allows non-ASCII bytes in the
HTTP request target.
This commit is contained in:
Martin Panter 2016-05-16 01:14:20 +00:00
parent ce6e06874b
commit e6f060903c
3 changed files with 52 additions and 1 deletions

View file

@ -91,6 +91,7 @@ import os
import posixpath
import re
import socket
import string
import sys
import time
import collections
@ -616,8 +617,12 @@ class HTTPRedirectHandler(BaseHandler):
# from the user (of urllib.request, in this case). In practice,
# essentially all clients do redirect in this case, so we do
# the same.
# be conciliant with URIs containing a space
# Be conciliant with URIs containing a space. This is mainly
# redundant with the more complete encoding done in http_error_302(),
# but it is kept for compatibility with other callers.
newurl = newurl.replace(' ', '%20')
CONTENT_HEADERS = ("content-length", "content-type")
newheaders = dict((k, v) for k, v in req.headers.items()
if k.lower() not in CONTENT_HEADERS)
@ -657,6 +662,11 @@ class HTTPRedirectHandler(BaseHandler):
urlparts[2] = "/"
newurl = urlunparse(urlparts)
# http.client.parse_headers() decodes as ISO-8859-1. Recover the
# original bytes and percent-encode non-ASCII bytes, and any special
# characters such as the space.
newurl = quote(
newurl, encoding="iso-8859-1", safe=string.punctuation)
newurl = urljoin(req.full_url, newurl)
# XXX Probably want to forget about the state of the current