Issue #22165: SimpleHTTPRequestHandler now supports undecodable file names.

This commit is contained in:
Serhiy Storchaka 2014-08-17 08:22:11 +03:00
parent 402df0975c
commit cb5bc408ad
3 changed files with 36 additions and 4 deletions

View file

@ -747,7 +747,12 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
return None return None
list.sort(key=lambda a: a.lower()) list.sort(key=lambda a: a.lower())
r = [] r = []
displaypath = html.escape(urllib.parse.unquote(self.path)) try:
displaypath = urllib.parse.unquote(self.path,
errors='surrogatepass')
except UnicodeDecodeError:
displaypath = urllib.parse.unquote(path)
displaypath = html.escape(displaypath)
enc = sys.getfilesystemencoding() enc = sys.getfilesystemencoding()
title = 'Directory listing for %s' % displaypath title = 'Directory listing for %s' % displaypath
r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" ' r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
@ -769,9 +774,11 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
displayname = name + "@" displayname = name + "@"
# Note: a link to a directory displays with @ and links with / # Note: a link to a directory displays with @ and links with /
r.append('<li><a href="%s">%s</a></li>' r.append('<li><a href="%s">%s</a></li>'
% (urllib.parse.quote(linkname), html.escape(displayname))) % (urllib.parse.quote(linkname,
errors='surrogatepass'),
html.escape(displayname)))
r.append('</ul>\n<hr>\n</body>\n</html>\n') r.append('</ul>\n<hr>\n</body>\n</html>\n')
encoded = '\n'.join(r).encode(enc) encoded = '\n'.join(r).encode(enc, 'surrogateescape')
f = io.BytesIO() f = io.BytesIO()
f.write(encoded) f.write(encoded)
f.seek(0) f.seek(0)
@ -794,7 +801,11 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
path = path.split('#',1)[0] path = path.split('#',1)[0]
# Don't forget explicit trailing slash when normalizing. Issue17324 # Don't forget explicit trailing slash when normalizing. Issue17324
trailing_slash = path.rstrip().endswith('/') trailing_slash = path.rstrip().endswith('/')
path = posixpath.normpath(urllib.parse.unquote(path)) try:
path = urllib.parse.unquote(path, errors='surrogatepass')
except UnicodeDecodeError:
path = urllib.parse.unquote(path)
path = posixpath.normpath(path)
words = path.split('/') words = path.split('/')
words = filter(None, words) words = filter(None, words)
path = os.getcwd() path = os.getcwd()

View file

@ -14,6 +14,7 @@ import re
import base64 import base64
import shutil import shutil
import urllib.parse import urllib.parse
import html
import http.client import http.client
import tempfile import tempfile
from io import BytesIO from io import BytesIO
@ -266,6 +267,24 @@ class SimpleHTTPServerTestCase(BaseTestCase):
self.assertIsNotNone(response.reason) self.assertIsNotNone(response.reason)
if data: if data:
self.assertEqual(data, body) self.assertEqual(data, body)
return body
@unittest.skipUnless(support.TESTFN_UNDECODABLE,
'need support.TESTFN_UNDECODABLE')
def test_undecodable_filename(self):
filename = os.fsdecode(support.TESTFN_UNDECODABLE) + '.txt'
with open(os.path.join(self.tempdir, filename), 'wb') as f:
f.write(support.TESTFN_UNDECODABLE)
response = self.request(self.tempdir_name + '/')
body = self.check_status_and_reason(response, 200)
quotedname = urllib.parse.quote(filename, errors='surrogatepass')
self.assertIn(('href="%s"' % quotedname)
.encode('utf-8', 'surrogateescape'), body)
self.assertIn(('>%s<' % html.escape(filename))
.encode('utf-8', 'surrogateescape'), body)
response = self.request(self.tempdir_name + '/' + quotedname)
self.check_status_and_reason(response, 200,
data=support.TESTFN_UNDECODABLE)
def test_get(self): def test_get(self):
#constructs the path relative to the root directory of the HTTPServer #constructs the path relative to the root directory of the HTTPServer

View file

@ -27,6 +27,8 @@ Core and Builtins
Library Library
------- -------
- Issue #22165: SimpleHTTPRequestHandler now supports undecodable file names.
- Issue #20729: Restored the use of lazy iterkeys()/itervalues()/iteritems() - Issue #20729: Restored the use of lazy iterkeys()/itervalues()/iteritems()
in the mailbox module. in the mailbox module.