mirror of
https://github.com/python/cpython.git
synced 2025-08-29 21:25:01 +00:00
Refactor so that it is easier to work with alternate MIME types databases,
and programmatically extend the database in different ways. This closes the SF bug (feature request) #439710.
This commit is contained in:
parent
e861365dab
commit
eeee4ec4f1
1 changed files with 138 additions and 67 deletions
205
Lib/mimetypes.py
205
Lib/mimetypes.py
|
@ -12,7 +12,7 @@ Data:
|
||||||
|
|
||||||
knownfiles -- list of files to parse
|
knownfiles -- list of files to parse
|
||||||
inited -- flag set when init() has been called
|
inited -- flag set when init() has been called
|
||||||
suffixes_map -- dictionary mapping suffixes to suffixes
|
suffix_map -- dictionary mapping suffixes to suffixes
|
||||||
encodings_map -- dictionary mapping suffixes to encodings
|
encodings_map -- dictionary mapping suffixes to encodings
|
||||||
types_map -- dictionary mapping suffixes to types
|
types_map -- dictionary mapping suffixes to types
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@ read_mime_types(file) -- parse one file, return a dictionary or None
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
import posixpath
|
import posixpath
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
|
@ -37,6 +38,117 @@ knownfiles = [
|
||||||
|
|
||||||
inited = 0
|
inited = 0
|
||||||
|
|
||||||
|
|
||||||
|
class MimeTypes:
|
||||||
|
"""MIME-types datastore.
|
||||||
|
|
||||||
|
This datastore can handle information from mime.types-style files
|
||||||
|
and supports basic determination of MIME type from a filename or
|
||||||
|
URL, and can guess a reasonable extension given a MIME type.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, filenames=()):
|
||||||
|
if not inited:
|
||||||
|
init()
|
||||||
|
self.encodings_map = encodings_map.copy()
|
||||||
|
self.suffix_map = suffix_map.copy()
|
||||||
|
self.types_map = types_map.copy()
|
||||||
|
for name in filenames:
|
||||||
|
self.read(name)
|
||||||
|
|
||||||
|
def guess_type(self, url):
|
||||||
|
"""Guess the type of a file based on its URL.
|
||||||
|
|
||||||
|
Return value is a tuple (type, encoding) where type is None if
|
||||||
|
the type can't be guessed (no or unknown suffix) or a string
|
||||||
|
of the form type/subtype, usable for a MIME Content-type
|
||||||
|
header; and encoding is None for no encoding or the name of
|
||||||
|
the program used to encode (e.g. compress or gzip). The
|
||||||
|
mappings are table driven. Encoding suffixes are case
|
||||||
|
sensitive; type suffixes are first tried case sensitive, then
|
||||||
|
case insensitive.
|
||||||
|
|
||||||
|
The suffixes .tgz, .taz and .tz (case sensitive!) are all
|
||||||
|
mapped to '.tar.gz'. (This is table-driven too, using the
|
||||||
|
dictionary suffix_map.)
|
||||||
|
"""
|
||||||
|
scheme, url = urllib.splittype(url)
|
||||||
|
if scheme == 'data':
|
||||||
|
# syntax of data URLs:
|
||||||
|
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
||||||
|
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
||||||
|
# data := *urlchar
|
||||||
|
# parameter := attribute "=" value
|
||||||
|
# type/subtype defaults to "text/plain"
|
||||||
|
comma = url.find(',')
|
||||||
|
if comma < 0:
|
||||||
|
# bad data URL
|
||||||
|
return None, None
|
||||||
|
semi = url.find(';', 0, comma)
|
||||||
|
if semi >= 0:
|
||||||
|
type = url[:semi]
|
||||||
|
else:
|
||||||
|
type = url[:comma]
|
||||||
|
if '=' in type or '/' not in type:
|
||||||
|
type = 'text/plain'
|
||||||
|
return type, None # never compressed, so encoding is None
|
||||||
|
base, ext = posixpath.splitext(url)
|
||||||
|
while self.suffix_map.has_key(ext):
|
||||||
|
base, ext = posixpath.splitext(base + self.suffix_map[ext])
|
||||||
|
if self.encodings_map.has_key(ext):
|
||||||
|
encoding = self.encodings_map[ext]
|
||||||
|
base, ext = posixpath.splitext(base)
|
||||||
|
else:
|
||||||
|
encoding = None
|
||||||
|
types_map = self.types_map
|
||||||
|
if types_map.has_key(ext):
|
||||||
|
return types_map[ext], encoding
|
||||||
|
elif types_map.has_key(ext.lower()):
|
||||||
|
return types_map[ext.lower()], encoding
|
||||||
|
else:
|
||||||
|
return None, encoding
|
||||||
|
|
||||||
|
def guess_extension(self, type):
|
||||||
|
"""Guess the extension for a file based on its MIME type.
|
||||||
|
|
||||||
|
Return value is a string giving a filename extension,
|
||||||
|
including the leading dot ('.'). The extension is not
|
||||||
|
guaranteed to have been associated with any particular data
|
||||||
|
stream, but would be mapped to the MIME type `type' by
|
||||||
|
guess_type(). If no extension can be guessed for `type', None
|
||||||
|
is returned.
|
||||||
|
"""
|
||||||
|
type = type.lower()
|
||||||
|
for ext, stype in self.types_map.items():
|
||||||
|
if type == stype:
|
||||||
|
return ext
|
||||||
|
return None
|
||||||
|
|
||||||
|
def read(self, filename):
|
||||||
|
"""Read a single mime.types-format file, specified by pathname."""
|
||||||
|
fp = open(filename)
|
||||||
|
self.readfp(fp)
|
||||||
|
fp.close()
|
||||||
|
|
||||||
|
def readfp(self):
|
||||||
|
"""Read a single mime.types-format file."""
|
||||||
|
map = self.types_map
|
||||||
|
while 1:
|
||||||
|
line = f.readline()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
words = line.split()
|
||||||
|
for i in range(len(words)):
|
||||||
|
if words[i][0] == '#':
|
||||||
|
del words[i:]
|
||||||
|
break
|
||||||
|
if not words:
|
||||||
|
continue
|
||||||
|
type, suffixes = words[0], words[1:]
|
||||||
|
for suff in suffixes:
|
||||||
|
map['.' + suff] = type
|
||||||
|
|
||||||
|
|
||||||
def guess_type(url):
|
def guess_type(url):
|
||||||
"""Guess the type of a file based on its URL.
|
"""Guess the type of a file based on its URL.
|
||||||
|
|
||||||
|
@ -51,44 +163,10 @@ def guess_type(url):
|
||||||
The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
|
The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
|
||||||
to ".tar.gz". (This is table-driven too, using the dictionary
|
to ".tar.gz". (This is table-driven too, using the dictionary
|
||||||
suffix_map).
|
suffix_map).
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not inited:
|
init()
|
||||||
init()
|
return guess_type(url)
|
||||||
scheme, url = urllib.splittype(url)
|
|
||||||
if scheme == 'data':
|
|
||||||
# syntax of data URLs:
|
|
||||||
# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
|
|
||||||
# mediatype := [ type "/" subtype ] *( ";" parameter )
|
|
||||||
# data := *urlchar
|
|
||||||
# parameter := attribute "=" value
|
|
||||||
# type/subtype defaults to "text/plain"
|
|
||||||
comma = url.find(',')
|
|
||||||
if comma < 0:
|
|
||||||
# bad data URL
|
|
||||||
return None, None
|
|
||||||
semi = url.find(';', 0, comma)
|
|
||||||
if semi >= 0:
|
|
||||||
type = url[:semi]
|
|
||||||
else:
|
|
||||||
type = url[:comma]
|
|
||||||
if '=' in type or '/' not in type:
|
|
||||||
type = 'text/plain'
|
|
||||||
return type, None # never compressed, so encoding is None
|
|
||||||
base, ext = posixpath.splitext(url)
|
|
||||||
while suffix_map.has_key(ext):
|
|
||||||
base, ext = posixpath.splitext(base + suffix_map[ext])
|
|
||||||
if encodings_map.has_key(ext):
|
|
||||||
encoding = encodings_map[ext]
|
|
||||||
base, ext = posixpath.splitext(base)
|
|
||||||
else:
|
|
||||||
encoding = None
|
|
||||||
if types_map.has_key(ext):
|
|
||||||
return types_map[ext], encoding
|
|
||||||
elif types_map.has_key(ext.lower()):
|
|
||||||
return types_map[ext.lower()], encoding
|
|
||||||
else:
|
|
||||||
return None, encoding
|
|
||||||
|
|
||||||
def guess_extension(type):
|
def guess_extension(type):
|
||||||
"""Guess the extension for a file based on its MIME type.
|
"""Guess the extension for a file based on its MIME type.
|
||||||
|
@ -99,50 +177,43 @@ def guess_extension(type):
|
||||||
MIME type `type' by guess_type(). If no extension can be guessed for
|
MIME type `type' by guess_type(). If no extension can be guessed for
|
||||||
`type', None is returned.
|
`type', None is returned.
|
||||||
"""
|
"""
|
||||||
global inited
|
init()
|
||||||
if not inited:
|
return guess_extension(type)
|
||||||
init()
|
|
||||||
type = type.lower()
|
|
||||||
for ext, stype in types_map.items():
|
|
||||||
if type == stype:
|
|
||||||
return ext
|
|
||||||
return None
|
|
||||||
|
|
||||||
def init(files=None):
|
def init(files=None):
|
||||||
|
global guess_extension, guess_type
|
||||||
|
global suffix_map, types_map, encodings_map
|
||||||
global inited
|
global inited
|
||||||
for file in files or knownfiles:
|
|
||||||
s = read_mime_types(file)
|
|
||||||
if s:
|
|
||||||
for key, value in s.items():
|
|
||||||
types_map[key] = value
|
|
||||||
inited = 1
|
inited = 1
|
||||||
|
db = MimeTypes()
|
||||||
|
if files is None:
|
||||||
|
files = knownfiles
|
||||||
|
for file in files:
|
||||||
|
if os.path.isfile(file):
|
||||||
|
db.readfp(open(file))
|
||||||
|
encodings_map = db.encodings_map
|
||||||
|
suffix_map = db.encodings_map
|
||||||
|
types_map = db.types_map
|
||||||
|
guess_extension = db.guess_extension
|
||||||
|
guess_type = db.guess_type
|
||||||
|
|
||||||
|
|
||||||
def read_mime_types(file):
|
def read_mime_types(file):
|
||||||
try:
|
try:
|
||||||
f = open(file)
|
f = open(file)
|
||||||
except IOError:
|
except IOError:
|
||||||
return None
|
return None
|
||||||
map = {}
|
db = MimeTypes()
|
||||||
while 1:
|
db.readfp(f)
|
||||||
line = f.readline()
|
return db.types_map
|
||||||
if not line: break
|
|
||||||
words = line.split()
|
|
||||||
for i in range(len(words)):
|
|
||||||
if words[i][0] == '#':
|
|
||||||
del words[i:]
|
|
||||||
break
|
|
||||||
if not words: continue
|
|
||||||
type, suffixes = words[0], words[1:]
|
|
||||||
for suff in suffixes:
|
|
||||||
map['.'+suff] = type
|
|
||||||
f.close()
|
|
||||||
return map
|
|
||||||
|
|
||||||
suffix_map = {
|
suffix_map = {
|
||||||
'.tgz': '.tar.gz',
|
'.tgz': '.tar.gz',
|
||||||
'.taz': '.tar.gz',
|
'.taz': '.tar.gz',
|
||||||
'.tz': '.tar.gz',
|
'.tz': '.tar.gz',
|
||||||
}
|
}
|
||||||
|
|
||||||
encodings_map = {
|
encodings_map = {
|
||||||
'.gz': 'gzip',
|
'.gz': 'gzip',
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue