mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
[3.12] gh-66543: Fix mimetype.guess_type() (GH-117217) (GH-117256)
Fix parsing of the following corner cases:
* URLs with only a host name
* URLs containing a fragment
* URLs containing a query
* filenames with only a UNC sharepoint on Windows
(cherry picked from commit 9654daf793
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
Co-authored-by: Dong-hee Na <donghee.na92@gmail.com>
This commit is contained in:
parent
54514467c2
commit
36b1a20851
4 changed files with 45 additions and 9 deletions
|
@ -120,7 +120,13 @@ class MimeTypes:
|
||||||
but non-standard types.
|
but non-standard types.
|
||||||
"""
|
"""
|
||||||
url = os.fspath(url)
|
url = os.fspath(url)
|
||||||
scheme, url = urllib.parse._splittype(url)
|
p = urllib.parse.urlparse(url)
|
||||||
|
if p.scheme and len(p.scheme) > 1:
|
||||||
|
scheme = p.scheme
|
||||||
|
url = p.path
|
||||||
|
else:
|
||||||
|
scheme = None
|
||||||
|
url = os.path.splitdrive(url)[1]
|
||||||
if scheme == 'data':
|
if scheme == 'data':
|
||||||
# syntax of data URLs:
|
# syntax of data URLs:
|
||||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import io
|
import io
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import sys
|
import sys
|
||||||
import unittest.mock
|
import unittest.mock
|
||||||
|
@ -111,15 +112,40 @@ class MimeTypesTestCase(unittest.TestCase):
|
||||||
# compared to when interpreted as filename because of the semicolon.
|
# compared to when interpreted as filename because of the semicolon.
|
||||||
eq = self.assertEqual
|
eq = self.assertEqual
|
||||||
gzip_expected = ('application/x-tar', 'gzip')
|
gzip_expected = ('application/x-tar', 'gzip')
|
||||||
eq(self.db.guess_type(";1.tar.gz"), gzip_expected)
|
for name in (
|
||||||
eq(self.db.guess_type("?1.tar.gz"), gzip_expected)
|
';1.tar.gz',
|
||||||
eq(self.db.guess_type("#1.tar.gz"), gzip_expected)
|
'?1.tar.gz',
|
||||||
eq(self.db.guess_type("#1#.tar.gz"), gzip_expected)
|
'#1.tar.gz',
|
||||||
eq(self.db.guess_type(";1#.tar.gz"), gzip_expected)
|
'#1#.tar.gz',
|
||||||
eq(self.db.guess_type(";&1=123;?.tar.gz"), gzip_expected)
|
';1#.tar.gz',
|
||||||
eq(self.db.guess_type("?k1=v1&k2=v2.tar.gz"), gzip_expected)
|
';&1=123;?.tar.gz',
|
||||||
|
'?k1=v1&k2=v2.tar.gz',
|
||||||
|
):
|
||||||
|
for prefix in ('', '/', '\\',
|
||||||
|
'c:', 'c:/', 'c:\\', 'c:/d/', 'c:\\d\\',
|
||||||
|
'//share/server/', '\\\\share\\server\\'):
|
||||||
|
path = prefix + name
|
||||||
|
with self.subTest(path=path):
|
||||||
|
eq(self.db.guess_type(path), gzip_expected)
|
||||||
|
expected = (None, None) if os.name == 'nt' else gzip_expected
|
||||||
|
for prefix in ('//', '\\\\', '//share/', '\\\\share\\'):
|
||||||
|
path = prefix + name
|
||||||
|
with self.subTest(path=path):
|
||||||
|
eq(self.db.guess_type(path), expected)
|
||||||
eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
|
eq(self.db.guess_type(r" \"\`;b&b&c |.tar.gz"), gzip_expected)
|
||||||
|
|
||||||
|
def test_url(self):
|
||||||
|
result = self.db.guess_type('http://host.html')
|
||||||
|
msg = 'URL only has a host name, not a file'
|
||||||
|
self.assertSequenceEqual(result, (None, None), msg)
|
||||||
|
result = self.db.guess_type('http://example.com/host.html')
|
||||||
|
msg = 'Should be text/html'
|
||||||
|
self.assertSequenceEqual(result, ('text/html', None), msg)
|
||||||
|
result = self.db.guess_type('http://example.com/host.html#x.tar')
|
||||||
|
self.assertSequenceEqual(result, ('text/html', None))
|
||||||
|
result = self.db.guess_type('http://example.com/host.html?q=x.tar')
|
||||||
|
self.assertSequenceEqual(result, ('text/html', None))
|
||||||
|
|
||||||
def test_guess_all_types(self):
|
def test_guess_all_types(self):
|
||||||
# First try strict. Use a set here for testing the results because if
|
# First try strict. Use a set here for testing the results because if
|
||||||
# test_urllib2 is run before test_mimetypes, global state is modified
|
# test_urllib2 is run before test_mimetypes, global state is modified
|
||||||
|
|
|
@ -776,7 +776,7 @@ class HandlerTests(unittest.TestCase):
|
||||||
["foo", "bar"], "", None),
|
["foo", "bar"], "", None),
|
||||||
("ftp://localhost/baz.gif;type=a",
|
("ftp://localhost/baz.gif;type=a",
|
||||||
"localhost", ftplib.FTP_PORT, "", "", "A",
|
"localhost", ftplib.FTP_PORT, "", "", "A",
|
||||||
[], "baz.gif", None), # XXX really this should guess image/gif
|
[], "baz.gif", "image/gif"),
|
||||||
]:
|
]:
|
||||||
req = Request(url)
|
req = Request(url)
|
||||||
req.timeout = None
|
req.timeout = None
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
Make :func:`mimetypes.guess_type` properly parsing of URLs with only a host
|
||||||
|
name, URLs containing fragment or query, and filenames with only a UNC
|
||||||
|
sharepoint on Windows.
|
||||||
|
Based on patch by Dong-hee Na.
|
Loading…
Add table
Add a link
Reference in a new issue