GH-126601: pathname2url(): handle NTFS alternate data streams (#126760)

Adjust `pathname2url()` to encode embedded colon characters in Windows
paths, rather than bailing out with an `OSError`.

Co-authored-by: Steve Dower <steve.dower@microsoft.com>
This commit is contained in:
Barney Gale 2024-11-22 00:29:05 +00:00 committed by GitHub
parent e8bb053941
commit fd133d4f21
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 21 additions and 14 deletions

View file

@ -40,6 +40,7 @@ def pathname2url(p):
# C:\foo\bar\spam.foo
# becomes
# ///C:/foo/bar/spam.foo
import ntpath
import urllib.parse
# First, clean up some special forms. We are going to sacrifice
# the additional information anyway
@ -48,16 +49,13 @@ def pathname2url(p):
p = p[4:]
if p[:4].upper() == 'UNC/':
p = '//' + p[4:]
elif p[1:2] != ':':
raise OSError('Bad path: ' + p)
if not ':' in p:
# No DOS drive specified, just quote the pathname
return urllib.parse.quote(p)
comp = p.split(':', maxsplit=2)
if len(comp) != 2 or len(comp[0]) > 1:
error = 'Bad path: ' + p
raise OSError(error)
drive, tail = ntpath.splitdrive(p)
if drive[1:] == ':':
# DOS drive specified. Add three slashes to the start, producing
# an authority section with a zero-length authority, and a path
# section starting with a single slash.
drive = f'///{drive.upper()}'
drive = urllib.parse.quote(comp[0].upper())
tail = urllib.parse.quote(comp[1])
return '///' + drive + ':' + tail
drive = urllib.parse.quote(drive, safe='/:')
tail = urllib.parse.quote(tail)
return drive + tail