gh-66543: Add mimetypes.guess_file_type() (GH-117258)

This commit is contained in:
Serhiy Storchaka 2024-05-06 15:50:52 +03:00 committed by GitHub
parent d3c7821335
commit d6fa1d4bee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 129 additions and 35 deletions

View file

@ -40,7 +40,7 @@ except ImportError:
__all__ = [
"knownfiles", "inited", "MimeTypes",
"guess_type", "guess_all_extensions", "guess_extension",
"guess_type", "guess_file_type", "guess_all_extensions", "guess_extension",
"add_type", "init", "read_mime_types",
"suffix_map", "encodings_map", "types_map", "common_types"
]
@ -119,14 +119,14 @@ class MimeTypes:
Optional `strict' argument when False adds a bunch of commonly found,
but non-standard types.
"""
# TODO: Deprecate accepting file paths (in particular path-like objects).
url = os.fspath(url)
p = urllib.parse.urlparse(url)
if p.scheme and len(p.scheme) > 1:
scheme = p.scheme
url = p.path
else:
scheme = None
url = os.path.splitdrive(url)[1]
return self.guess_file_type(url, strict=strict)
if scheme == 'data':
# syntax of data URLs:
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
@ -146,13 +146,25 @@ class MimeTypes:
if '=' in type or '/' not in type:
type = 'text/plain'
return type, None # never compressed, so encoding is None
base, ext = posixpath.splitext(url)
return self._guess_file_type(url, strict, posixpath.splitext)
def guess_file_type(self, path, *, strict=True):
"""Guess the type of a file based on its path.
Similar to guess_type(), but takes file path istead of URL.
"""
path = os.fsdecode(path)
path = os.path.splitdrive(path)[1]
return self._guess_file_type(path, strict, os.path.splitext)
def _guess_file_type(self, path, strict, splitext):
base, ext = splitext(path)
while (ext_lower := ext.lower()) in self.suffix_map:
base, ext = posixpath.splitext(base + self.suffix_map[ext_lower])
base, ext = splitext(base + self.suffix_map[ext_lower])
# encodings_map is case sensitive
if ext in self.encodings_map:
encoding = self.encodings_map[ext]
base, ext = posixpath.splitext(base)
base, ext = splitext(base)
else:
encoding = None
ext = ext.lower()
@ -310,6 +322,16 @@ def guess_type(url, strict=True):
return _db.guess_type(url, strict)
def guess_file_type(path, *, strict=True):
"""Guess the type of a file based on its path.
Similar to guess_type(), but takes file path istead of URL.
"""
if _db is None:
init()
return _db.guess_file_type(path, strict=strict)
def guess_all_extensions(type, strict=True):
"""Guess the extensions for a file based on its MIME type.