mirror of
https://github.com/python/cpython.git
synced 2025-08-08 10:58:51 +00:00
gh-101144: Allow open and read_text encoding to be positional. (#101145)
The zipfile.Path open() and read_text() encoding parameter can be supplied as a positional argument without causing a TypeError again. 3.10.0b1 included a regression that made it keyword only. Documentation update included as users writing code to be compatible with a wide range of versions will need to consider this for some time.
This commit is contained in:
parent
9e025d305f
commit
5927013e47
4 changed files with 95 additions and 10 deletions
|
@ -551,6 +551,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
|
||||||
Added support for text and binary modes for open. Default
|
Added support for text and binary modes for open. Default
|
||||||
mode is now text.
|
mode is now text.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.11.2
|
||||||
|
The ``encoding`` parameter can be supplied as a positional argument
|
||||||
|
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
|
||||||
|
be compatible with unpatched 3.10 and 3.11 versions must pass all
|
||||||
|
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.
|
||||||
|
|
||||||
.. method:: Path.iterdir()
|
.. method:: Path.iterdir()
|
||||||
|
|
||||||
Enumerate the children of the current directory.
|
Enumerate the children of the current directory.
|
||||||
|
@ -596,6 +602,12 @@ Path objects are traversable using the ``/`` operator or ``joinpath``.
|
||||||
:class:`io.TextIOWrapper` (except ``buffer``, which is
|
:class:`io.TextIOWrapper` (except ``buffer``, which is
|
||||||
implied by the context).
|
implied by the context).
|
||||||
|
|
||||||
|
.. versionchanged:: 3.11.2
|
||||||
|
The ``encoding`` parameter can be supplied as a positional argument
|
||||||
|
without causing a :exc:`TypeError`. As it could in 3.9. Code needing to
|
||||||
|
be compatible with unpatched 3.10 and 3.11 versions must pass all
|
||||||
|
:class:`io.TextIOWrapper` arguments, ``encoding`` included, as keywords.
|
||||||
|
|
||||||
.. method:: Path.read_bytes()
|
.. method:: Path.read_bytes()
|
||||||
|
|
||||||
Read the current file as bytes.
|
Read the current file as bytes.
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
import io
|
import io
|
||||||
import zipfile
|
import itertools
|
||||||
import contextlib
|
import contextlib
|
||||||
import pathlib
|
import pathlib
|
||||||
import unittest
|
|
||||||
import string
|
|
||||||
import pickle
|
import pickle
|
||||||
import itertools
|
import string
|
||||||
|
from test.support.script_helper import assert_python_ok
|
||||||
|
import unittest
|
||||||
|
import zipfile
|
||||||
|
|
||||||
from ._test_params import parameterize, Invoked
|
from ._test_params import parameterize, Invoked
|
||||||
from ._functools import compose
|
from ._functools import compose
|
||||||
|
@ -145,7 +146,69 @@ class TestPath(unittest.TestCase):
|
||||||
a, b, g = root.iterdir()
|
a, b, g = root.iterdir()
|
||||||
with a.open(encoding="utf-8") as strm:
|
with a.open(encoding="utf-8") as strm:
|
||||||
data = strm.read()
|
data = strm.read()
|
||||||
assert data == "content of a"
|
self.assertEqual(data, "content of a")
|
||||||
|
with a.open('r', "utf-8") as strm: # not a kw, no gh-101144 TypeError
|
||||||
|
data = strm.read()
|
||||||
|
self.assertEqual(data, "content of a")
|
||||||
|
|
||||||
|
def test_open_encoding_utf16(self):
|
||||||
|
in_memory_file = io.BytesIO()
|
||||||
|
zf = zipfile.ZipFile(in_memory_file, "w")
|
||||||
|
zf.writestr("path/16.txt", "This was utf-16".encode("utf-16"))
|
||||||
|
zf.filename = "test_open_utf16.zip"
|
||||||
|
root = zipfile.Path(zf)
|
||||||
|
(path,) = root.iterdir()
|
||||||
|
u16 = path.joinpath("16.txt")
|
||||||
|
with u16.open('r', "utf-16") as strm:
|
||||||
|
data = strm.read()
|
||||||
|
self.assertEqual(data, "This was utf-16")
|
||||||
|
with u16.open(encoding="utf-16") as strm:
|
||||||
|
data = strm.read()
|
||||||
|
self.assertEqual(data, "This was utf-16")
|
||||||
|
|
||||||
|
def test_open_encoding_errors(self):
|
||||||
|
in_memory_file = io.BytesIO()
|
||||||
|
zf = zipfile.ZipFile(in_memory_file, "w")
|
||||||
|
zf.writestr("path/bad-utf8.bin", b"invalid utf-8: \xff\xff.")
|
||||||
|
zf.filename = "test_read_text_encoding_errors.zip"
|
||||||
|
root = zipfile.Path(zf)
|
||||||
|
(path,) = root.iterdir()
|
||||||
|
u16 = path.joinpath("bad-utf8.bin")
|
||||||
|
|
||||||
|
# encoding= as a positional argument for gh-101144.
|
||||||
|
data = u16.read_text("utf-8", errors="ignore")
|
||||||
|
self.assertEqual(data, "invalid utf-8: .")
|
||||||
|
with u16.open("r", "utf-8", errors="surrogateescape") as f:
|
||||||
|
self.assertEqual(f.read(), "invalid utf-8: \udcff\udcff.")
|
||||||
|
|
||||||
|
# encoding= both positional and keyword is an error; gh-101144.
|
||||||
|
with self.assertRaisesRegex(TypeError, "encoding"):
|
||||||
|
data = u16.read_text("utf-8", encoding="utf-8")
|
||||||
|
|
||||||
|
# both keyword arguments work.
|
||||||
|
with u16.open("r", encoding="utf-8", errors="strict") as f:
|
||||||
|
# error during decoding with wrong codec.
|
||||||
|
with self.assertRaises(UnicodeDecodeError):
|
||||||
|
f.read()
|
||||||
|
|
||||||
|
def test_encoding_warnings(self):
|
||||||
|
"""EncodingWarning must blame the read_text and open calls."""
|
||||||
|
code = '''\
|
||||||
|
import io, zipfile
|
||||||
|
with zipfile.ZipFile(io.BytesIO(), "w") as zf:
|
||||||
|
zf.filename = '<test_encoding_warnings in memory zip file>'
|
||||||
|
zf.writestr("path/file.txt", b"Spanish Inquisition")
|
||||||
|
root = zipfile.Path(zf)
|
||||||
|
(path,) = root.iterdir()
|
||||||
|
file_path = path.joinpath("file.txt")
|
||||||
|
unused = file_path.read_text() # should warn
|
||||||
|
file_path.open("r").close() # should warn
|
||||||
|
'''
|
||||||
|
proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code)
|
||||||
|
warnings = proc.err.splitlines()
|
||||||
|
self.assertEqual(len(warnings), 2, proc.err)
|
||||||
|
self.assertRegex(warnings[0], rb"^<string>:8: EncodingWarning:")
|
||||||
|
self.assertRegex(warnings[1], rb"^<string>:9: EncodingWarning:")
|
||||||
|
|
||||||
def test_open_write(self):
|
def test_open_write(self):
|
||||||
"""
|
"""
|
||||||
|
@ -187,6 +250,7 @@ class TestPath(unittest.TestCase):
|
||||||
root = zipfile.Path(alpharep)
|
root = zipfile.Path(alpharep)
|
||||||
a, b, g = root.iterdir()
|
a, b, g = root.iterdir()
|
||||||
assert a.read_text(encoding="utf-8") == "content of a"
|
assert a.read_text(encoding="utf-8") == "content of a"
|
||||||
|
a.read_text("utf-8") # No positional arg TypeError per gh-101144.
|
||||||
assert a.read_bytes() == b"content of a"
|
assert a.read_bytes() == b"content of a"
|
||||||
|
|
||||||
@pass_alpharep
|
@pass_alpharep
|
||||||
|
|
|
@ -148,6 +148,11 @@ class FastLookup(CompleteDirs):
|
||||||
return self.__lookup
|
return self.__lookup
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_text_encoding(encoding=None, *args, **kwargs):
|
||||||
|
# stacklevel=3 so that the caller of the caller see any warning.
|
||||||
|
return io.text_encoding(encoding, 3), args, kwargs
|
||||||
|
|
||||||
|
|
||||||
class Path:
|
class Path:
|
||||||
"""
|
"""
|
||||||
A pathlib-compatible interface for zip files.
|
A pathlib-compatible interface for zip files.
|
||||||
|
@ -257,9 +262,9 @@ class Path:
|
||||||
if args or kwargs:
|
if args or kwargs:
|
||||||
raise ValueError("encoding args invalid for binary operation")
|
raise ValueError("encoding args invalid for binary operation")
|
||||||
return stream
|
return stream
|
||||||
else:
|
# Text mode:
|
||||||
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
|
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
|
||||||
return io.TextIOWrapper(stream, *args, **kwargs)
|
return io.TextIOWrapper(stream, encoding, *args, **kwargs)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self):
|
def name(self):
|
||||||
|
@ -282,8 +287,8 @@ class Path:
|
||||||
return pathlib.Path(self.root.filename).joinpath(self.at)
|
return pathlib.Path(self.root.filename).joinpath(self.at)
|
||||||
|
|
||||||
def read_text(self, *args, **kwargs):
|
def read_text(self, *args, **kwargs):
|
||||||
kwargs["encoding"] = io.text_encoding(kwargs.get("encoding"))
|
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
|
||||||
with self.open('r', *args, **kwargs) as strm:
|
with self.open('r', encoding, *args, **kwargs) as strm:
|
||||||
return strm.read()
|
return strm.read()
|
||||||
|
|
||||||
def read_bytes(self):
|
def read_bytes(self):
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
Make :func:`zipfile.Path.open` and :func:`zipfile.Path.read_text` also accept
|
||||||
|
``encoding`` as a positional argument. This was the behavior in Python 3.9 and
|
||||||
|
earlier. 3.10 introduced a regression where supplying it as a positional
|
||||||
|
argument would lead to a :exc:`TypeError`.
|
Loading…
Add table
Add a link
Reference in a new issue