gh-77102: site: try utf-8 and fallback to locale encoding when reading .pth file (#117802)

This commit is contained in:
Inada Naoki 2024-04-16 12:56:16 +09:00 committed by GitHub
parent 3831144f9c
commit 6dc661bc9f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 49 additions and 26 deletions

View file

@ -74,6 +74,10 @@ with ``import`` (followed by space or tab) are executed.
Limiting a code chunk to a single line is a deliberate measure Limiting a code chunk to a single line is a deliberate measure
to discourage putting anything more complex here. to discourage putting anything more complex here.
.. versionchanged:: 3.13
The :file:`.pth` files are now decoded by UTF-8 at first and then by the
:term:`locale encoding` if it fails.
.. index:: .. index::
single: package single: package
triple: path; configuration; file triple: path; configuration; file

View file

@ -630,6 +630,13 @@ re
* Rename :exc:`!re.error` to :exc:`re.PatternError` for improved clarity. * Rename :exc:`!re.error` to :exc:`re.PatternError` for improved clarity.
:exc:`!re.error` is kept for backward compatibility. :exc:`!re.error` is kept for backward compatibility.
site
----
* :file:`.pth` files are now decoded by UTF-8 first, and then by the
:term:`locale encoding` if the UTF-8 decoding fails.
(Contributed by Inada Naoki in :gh:`117802`.)
sqlite3 sqlite3
------- -------

View file

@ -179,35 +179,44 @@ def addpackage(sitedir, name, known_paths):
return return
_trace(f"Processing .pth file: {fullname!r}") _trace(f"Processing .pth file: {fullname!r}")
try: try:
# locale encoding is not ideal especially on Windows. But we have used with io.open_code(fullname) as f:
# it for a long time. setuptools uses the locale encoding too. pth_content = f.read()
f = io.TextIOWrapper(io.open_code(fullname), encoding="locale")
except OSError: except OSError:
return return
with f:
for n, line in enumerate(f): try:
if line.startswith("#"): pth_content = pth_content.decode()
except UnicodeDecodeError:
# Fallback to locale encoding for backward compatibility.
# We will deprecate this fallback in the future.
import locale
pth_content = pth_content.decode(locale.getencoding())
_trace(f"Cannot read {fullname!r} as UTF-8. "
f"Using fallback encoding {locale.getencoding()!r}")
for n, line in enumerate(pth_content.splitlines(), 1):
if line.startswith("#"):
continue
if line.strip() == "":
continue
try:
if line.startswith(("import ", "import\t")):
exec(line)
continue continue
if line.strip() == "": line = line.rstrip()
continue dir, dircase = makepath(sitedir, line)
try: if dircase not in known_paths and os.path.exists(dir):
if line.startswith(("import ", "import\t")): sys.path.append(dir)
exec(line) known_paths.add(dircase)
continue except Exception as exc:
line = line.rstrip() print(f"Error processing line {n:d} of {fullname}:\n",
dir, dircase = makepath(sitedir, line) file=sys.stderr)
if not dircase in known_paths and os.path.exists(dir): import traceback
sys.path.append(dir) for record in traceback.format_exception(exc):
known_paths.add(dircase) for line in record.splitlines():
except Exception as exc: print(' '+line, file=sys.stderr)
print("Error processing line {:d} of {}:\n".format(n+1, fullname), print("\nRemainder of file ignored", file=sys.stderr)
file=sys.stderr) break
import traceback
for record in traceback.format_exception(exc):
for line in record.splitlines():
print(' '+line, file=sys.stderr)
print("\nRemainder of file ignored", file=sys.stderr)
break
if reset: if reset:
known_paths = None known_paths = None
return known_paths return known_paths

View file

@ -0,0 +1,3 @@
:mod:`site` module now parses ``.pth`` file with UTF-8 first, and
:term:`locale encoding` if ``UnicodeDecodeError`` happened. It supported
only locale encoding before.