Issue #3187: Better support for "undecodable" filenames. Code by Victor

Stinner, with small tweaks by GvR.
This commit is contained in:
Guido van Rossum 2008-10-02 18:55:37 +00:00
parent fefeca53ee
commit f0af3e30db
11 changed files with 359 additions and 145 deletions

View file

@ -11,6 +11,7 @@ for manipulation of the pathname component of URLs.
"""
import os
import sys
import stat
import genericpath
from genericpath import *
@ -23,7 +24,8 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
"curdir","pardir","sep","pathsep","defpath","altsep","extsep",
"devnull","realpath","supports_unicode_filenames","relpath"]
# strings representing various path-related bits and pieces
# Strings representing various path-related bits and pieces.
# These are primarily for export; internally, they are hardcoded.
curdir = '.'
pardir = '..'
extsep = '.'
@ -33,6 +35,12 @@ defpath = ':/bin:/usr/bin'
altsep = None
devnull = '/dev/null'
def _get_sep(path):
if isinstance(path, bytes):
return b'/'
else:
return '/'
# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
# On MS-DOS this may also turn slashes into backslashes; however, other
# normalizations (such as optimizing '../' away) are not allowed
@ -40,6 +48,7 @@ devnull = '/dev/null'
def normcase(s):
"""Normalize case of pathname. Has no effect under Posix"""
# TODO: on Mac OS X, this should really return s.lower().
return s
@ -48,7 +57,8 @@ def normcase(s):
def isabs(s):
"""Test whether a path is absolute"""
return s.startswith('/')
sep = _get_sep(s)
return s.startswith(sep)
# Join pathnames.
@ -59,14 +69,15 @@ def join(a, *p):
"""Join two or more pathname components, inserting '/' as needed.
If any component is an absolute path, all previous path components
will be discarded."""
sep = _get_sep(a)
path = a
for b in p:
if b.startswith('/'):
if b.startswith(sep):
path = b
elif path == '' or path.endswith('/'):
elif not path or path.endswith(sep):
path += b
else:
path += '/' + b
path += sep + b
return path
@ -78,10 +89,11 @@ def join(a, *p):
def split(p):
"""Split a pathname. Returns tuple "(head, tail)" where "tail" is
everything after the final slash. Either part may be empty."""
i = p.rfind('/') + 1
sep = _get_sep(p)
i = p.rfind(sep) + 1
head, tail = p[:i], p[i:]
if head and head != '/'*len(head):
head = head.rstrip('/')
if head and head != sep*len(head):
head = head.rstrip(sep)
return head, tail
@ -91,7 +103,13 @@ def split(p):
# It is always true that root + ext == p.
def splitext(p):
return genericpath._splitext(p, sep, altsep, extsep)
if isinstance(p, bytes):
sep = b'/'
extsep = b'.'
else:
sep = '/'
extsep = '.'
return genericpath._splitext(p, sep, None, extsep)
splitext.__doc__ = genericpath._splitext.__doc__
# Split a pathname into a drive specification and the rest of the
@ -100,14 +118,15 @@ splitext.__doc__ = genericpath._splitext.__doc__
def splitdrive(p):
"""Split a pathname into drive and path. On Posix, drive is always
empty."""
return '', p
return p[:0], p
# Return the tail (basename) part of a path, same as split(path)[1].
def basename(p):
"""Returns the final component of a pathname"""
i = p.rfind('/') + 1
sep = _get_sep(p)
i = p.rfind(sep) + 1
return p[i:]
@ -115,10 +134,11 @@ def basename(p):
def dirname(p):
"""Returns the directory component of a pathname"""
i = p.rfind('/') + 1
sep = _get_sep(p)
i = p.rfind(sep) + 1
head = p[:i]
if head and head != '/'*len(head):
head = head.rstrip('/')
if head and head != sep*len(head):
head = head.rstrip(sep)
return head
@ -179,7 +199,11 @@ def ismount(path):
"""Test whether a path is a mount point"""
try:
s1 = os.lstat(path)
s2 = os.lstat(join(path, '..'))
if isinstance(path, bytes):
parent = join(path, b'..')
else:
parent = join(path, '..')
s2 = os.lstat(parent)
except os.error:
return False # It doesn't exist -- so not a mount point :-)
dev1 = s1.st_dev
@ -205,9 +229,14 @@ def ismount(path):
def expanduser(path):
"""Expand ~ and ~user constructions. If user or $HOME is unknown,
do nothing."""
if not path.startswith('~'):
if isinstance(path, bytes):
tilde = b'~'
else:
tilde = '~'
if not path.startswith(tilde):
return path
i = path.find('/', 1)
sep = _get_sep(path)
i = path.find(sep, 1)
if i < 0:
i = len(path)
if i == 1:
@ -218,12 +247,17 @@ def expanduser(path):
userhome = os.environ['HOME']
else:
import pwd
name = path[1:i]
if isinstance(name, bytes):
name = str(name, 'ASCII')
try:
pwent = pwd.getpwnam(path[1:i])
pwent = pwd.getpwnam(name)
except KeyError:
return path
userhome = pwent.pw_dir
userhome = userhome.rstrip('/')
if isinstance(path, bytes):
userhome = userhome.encode(sys.getfilesystemencoding())
userhome = userhome.rstrip(sep)
return userhome + path[i:]
@ -232,28 +266,47 @@ def expanduser(path):
# Non-existent variables are left unchanged.
_varprog = None
_varprogb = None
def expandvars(path):
"""Expand shell variables of form $var and ${var}. Unknown variables
are left unchanged."""
global _varprog
if '$' not in path:
return path
if not _varprog:
import re
_varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
global _varprog, _varprogb
if isinstance(path, bytes):
if b'$' not in path:
return path
if not _varprogb:
import re
_varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
search = _varprogb.search
start = b'{'
end = b'}'
else:
if '$' not in path:
return path
if not _varprog:
import re
_varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
search = _varprog.search
start = '{'
end = '}'
i = 0
while True:
m = _varprog.search(path, i)
m = search(path, i)
if not m:
break
i, j = m.span(0)
name = m.group(1)
if name.startswith('{') and name.endswith('}'):
if name.startswith(start) and name.endswith(end):
name = name[1:-1]
if isinstance(name, bytes):
name = str(name, 'ASCII')
if name in os.environ:
tail = path[j:]
path = path[:i] + os.environ[name]
value = os.environ[name]
if isinstance(path, bytes):
value = value.encode('ASCII')
path = path[:i] + value
i = len(path)
path += tail
else:
@ -267,35 +320,49 @@ def expandvars(path):
def normpath(path):
"""Normalize path, eliminating double slashes, etc."""
if path == '':
return '.'
initial_slashes = path.startswith('/')
if isinstance(path, bytes):
sep = b'/'
empty = b''
dot = b'.'
dotdot = b'..'
else:
sep = '/'
empty = ''
dot = '.'
dotdot = '..'
if path == empty:
return dot
initial_slashes = path.startswith(sep)
# POSIX allows one or two initial slashes, but treats three or more
# as single slash.
if (initial_slashes and
path.startswith('//') and not path.startswith('///')):
path.startswith(sep*2) and not path.startswith(sep*3)):
initial_slashes = 2
comps = path.split('/')
comps = path.split(sep)
new_comps = []
for comp in comps:
if comp in ('', '.'):
if comp in (empty, dot):
continue
if (comp != '..' or (not initial_slashes and not new_comps) or
(new_comps and new_comps[-1] == '..')):
if (comp != dotdot or (not initial_slashes and not new_comps) or
(new_comps and new_comps[-1] == dotdot)):
new_comps.append(comp)
elif new_comps:
new_comps.pop()
comps = new_comps
path = '/'.join(comps)
path = sep.join(comps)
if initial_slashes:
path = '/'*initial_slashes + path
return path or '.'
path = sep*initial_slashes + path
return path or dot
def abspath(path):
"""Return an absolute path."""
if not isabs(path):
path = join(os.getcwd(), path)
if isinstance(path, bytes):
cwd = os.getcwdb()
else:
cwd = os.getcwd()
path = join(cwd, path)
return normpath(path)
@ -305,10 +372,16 @@ def abspath(path):
def realpath(filename):
"""Return the canonical path of the specified filename, eliminating any
symbolic links encountered in the path."""
if isabs(filename):
bits = ['/'] + filename.split('/')[1:]
if isinstance(filename, bytes):
sep = b'/'
empty = b''
else:
bits = [''] + filename.split('/')
sep = '/'
empty = ''
if isabs(filename):
bits = [sep] + filename.split(sep)[1:]
else:
bits = [empty] + filename.split(sep)
for i in range(2, len(bits)+1):
component = join(*bits[0:i])
@ -347,12 +420,24 @@ def _resolve_link(path):
supports_unicode_filenames = False
def relpath(path, start=curdir):
def relpath(path, start=None):
"""Return a relative version of a path"""
if not path:
raise ValueError("no path specified")
if isinstance(path, bytes):
curdir = b'.'
sep = b'/'
pardir = b'..'
else:
curdir = '.'
sep = '/'
pardir = '..'
if start is None:
start = curdir
start_list = abspath(start).split(sep)
path_list = abspath(path).split(sep)