Issue #6135: Adds encoding and errors parameters to subprocess

This commit is contained in:
Steve Dower 2016-09-06 20:16:17 -07:00
parent 173a1f3dc7
commit 050acaed99
5 changed files with 154 additions and 100 deletions

View file

@ -38,7 +38,8 @@ compatibility with older versions, see the :ref:`call-function-trio` section.
.. function:: run(args, *, stdin=None, input=None, stdout=None, stderr=None,\ .. function:: run(args, *, stdin=None, input=None, stdout=None, stderr=None,\
shell=False, timeout=None, check=False) shell=False, timeout=None, check=False, \
encoding=None, errors=None)
Run the command described by *args*. Wait for command to complete, then Run the command described by *args*. Wait for command to complete, then
return a :class:`CompletedProcess` instance. return a :class:`CompletedProcess` instance.
@ -60,15 +61,20 @@ compatibility with older versions, see the :ref:`call-function-trio` section.
The *input* argument is passed to :meth:`Popen.communicate` and thus to the The *input* argument is passed to :meth:`Popen.communicate` and thus to the
subprocess's stdin. If used it must be a byte sequence, or a string if subprocess's stdin. If used it must be a byte sequence, or a string if
``universal_newlines=True``. When used, the internal :class:`Popen` object *encoding* or *errors* is specified or *universal_newlines* is True. When
is automatically created with ``stdin=PIPE``, and the *stdin* argument may used, the internal :class:`Popen` object is automatically created with
not be used as well. ``stdin=PIPE``, and the *stdin* argument may not be used as well.
If *check* is True, and the process exits with a non-zero exit code, a If *check* is True, and the process exits with a non-zero exit code, a
:exc:`CalledProcessError` exception will be raised. Attributes of that :exc:`CalledProcessError` exception will be raised. Attributes of that
exception hold the arguments, the exit code, and stdout and stderr if they exception hold the arguments, the exit code, and stdout and stderr if they
were captured. were captured.
If *encoding* or *errors* are specified, or *universal_newlines* is True,
file objects for stdin, stdout and stderr are opened in text mode using the
specified *encoding* and *errors* or the :class:`io.TextIOWrapper` default.
Otherwise, file objects are opened in binary mode.
Examples:: Examples::
>>> subprocess.run(["ls", "-l"]) # doesn't capture output >>> subprocess.run(["ls", "-l"]) # doesn't capture output
@ -85,6 +91,10 @@ compatibility with older versions, see the :ref:`call-function-trio` section.
.. versionadded:: 3.5 .. versionadded:: 3.5
.. versionchanged:: 3.6
Added *encoding* and *errors* parameters
.. class:: CompletedProcess .. class:: CompletedProcess
The return value from :func:`run`, representing a process that has finished. The return value from :func:`run`, representing a process that has finished.
@ -104,8 +114,8 @@ compatibility with older versions, see the :ref:`call-function-trio` section.
.. attribute:: stdout .. attribute:: stdout
Captured stdout from the child process. A bytes sequence, or a string if Captured stdout from the child process. A bytes sequence, or a string if
:func:`run` was called with ``universal_newlines=True``. None if stdout :func:`run` was called with an encoding or errors. None if stdout was not
was not captured. captured.
If you ran the process with ``stderr=subprocess.STDOUT``, stdout and If you ran the process with ``stderr=subprocess.STDOUT``, stdout and
stderr will be combined in this attribute, and :attr:`stderr` will be stderr will be combined in this attribute, and :attr:`stderr` will be
@ -114,8 +124,8 @@ compatibility with older versions, see the :ref:`call-function-trio` section.
.. attribute:: stderr .. attribute:: stderr
Captured stderr from the child process. A bytes sequence, or a string if Captured stderr from the child process. A bytes sequence, or a string if
:func:`run` was called with ``universal_newlines=True``. None if stderr :func:`run` was called with an encoding or errors. None if stderr was not
was not captured. captured.
.. method:: check_returncode() .. method:: check_returncode()
@ -249,19 +259,22 @@ default values. The arguments that are most commonly needed are:
.. index:: .. index::
single: universal newlines; subprocess module single: universal newlines; subprocess module
If *universal_newlines* is ``False`` the file objects *stdin*, *stdout* and If *encoding* or *errors* are specified, or *universal_newlines* is True,
*stderr* will be opened as binary streams, and no line ending conversion is the file objects *stdin*, *stdout* and *stderr* will be opened in text
done. mode using the *encoding* and *errors* specified in the call or the
defaults for :class:`io.TextIOWrapper`.
If *universal_newlines* is ``True``, these file objects For *stdin*, line ending characters ``'\n'`` in the input will be converted
will be opened as text streams in :term:`universal newlines` mode to the default line separator :data:`os.linesep`. For *stdout* and *stderr*,
using the encoding returned by :func:`locale.getpreferredencoding(False) all line endings in the output will be converted to ``'\n'``. For more
<locale.getpreferredencoding>`. For *stdin*, line ending characters information see the documentation of the :class:`io.TextIOWrapper` class
``'\n'`` in the input will be converted to the default line separator when the *newline* argument to its constructor is ``None``.
:data:`os.linesep`. For *stdout* and *stderr*, all line endings in the
output will be converted to ``'\n'``. For more information see the If text mode is not used, *stdin*, *stdout* and *stderr* will be opened as
documentation of the :class:`io.TextIOWrapper` class when the *newline* binary streams. No encoding or line ending conversion is performed.
argument to its constructor is ``None``.
.. versionadded:: 3.6
Added *encoding* and *errors* parameters.
.. note:: .. note::
@ -306,7 +319,8 @@ functions.
stderr=None, preexec_fn=None, close_fds=True, shell=False, \ stderr=None, preexec_fn=None, close_fds=True, shell=False, \
cwd=None, env=None, universal_newlines=False, \ cwd=None, env=None, universal_newlines=False, \
startupinfo=None, creationflags=0, restore_signals=True, \ startupinfo=None, creationflags=0, restore_signals=True, \
start_new_session=False, pass_fds=()) start_new_session=False, pass_fds=(), *, \
encoding=None, errors=None)
Execute a child program in a new process. On POSIX, the class uses Execute a child program in a new process. On POSIX, the class uses
:meth:`os.execvp`-like behavior to execute the child program. On Windows, :meth:`os.execvp`-like behavior to execute the child program. On Windows,
@ -482,10 +496,14 @@ functions.
.. _side-by-side assembly: https://en.wikipedia.org/wiki/Side-by-Side_Assembly .. _side-by-side assembly: https://en.wikipedia.org/wiki/Side-by-Side_Assembly
If *universal_newlines* is ``True``, the file objects *stdin*, *stdout* If *encoding* or *errors* are specified, the file objects *stdin*, *stdout*
and *stderr* are opened as text streams in universal newlines mode, as and *stderr* are opened in text mode with the specified encoding and
described above in :ref:`frequently-used-arguments`, otherwise they are *errors*, as described above in :ref:`frequently-used-arguments`. If
opened as binary streams. *universal_newlines* is ``True``, they are opened in text mode with default
encoding. Otherwise, they are opened as binary streams.
.. versionadded:: 3.6
*encoding* and *errors* were added.
If given, *startupinfo* will be a :class:`STARTUPINFO` object, which is If given, *startupinfo* will be a :class:`STARTUPINFO` object, which is
passed to the underlying ``CreateProcess`` function. passed to the underlying ``CreateProcess`` function.
@ -601,11 +619,12 @@ Instances of the :class:`Popen` class have the following methods:
Interact with process: Send data to stdin. Read data from stdout and stderr, Interact with process: Send data to stdin. Read data from stdout and stderr,
until end-of-file is reached. Wait for process to terminate. The optional until end-of-file is reached. Wait for process to terminate. The optional
*input* argument should be data to be sent to the child process, or *input* argument should be data to be sent to the child process, or
``None``, if no data should be sent to the child. The type of *input* ``None``, if no data should be sent to the child. If streams were opened in
must be bytes or, if *universal_newlines* was ``True``, a string. text mode, *input* must be a string. Otherwise, it must be bytes.
:meth:`communicate` returns a tuple ``(stdout_data, stderr_data)``. :meth:`communicate` returns a tuple ``(stdout_data, stderr_data)``.
The data will be bytes or, if *universal_newlines* was ``True``, strings. The data will be strings if streams were opened in text mode; otherwise,
bytes.
Note that if you want to send data to the process's stdin, you need to create Note that if you want to send data to the process's stdin, you need to create
the Popen object with ``stdin=PIPE``. Similarly, to get anything other than the Popen object with ``stdin=PIPE``. Similarly, to get anything other than
@ -672,28 +691,30 @@ The following attributes are also available:
.. attribute:: Popen.stdin .. attribute:: Popen.stdin
If the *stdin* argument was :data:`PIPE`, this attribute is a writeable If the *stdin* argument was :data:`PIPE`, this attribute is a writeable
stream object as returned by :func:`open`. If the *universal_newlines* stream object as returned by :func:`open`. If the *encoding* or *errors*
argument was ``True``, the stream is a text stream, otherwise it is a byte arguments were specified or the *universal_newlines* argument was ``True``,
stream. If the *stdin* argument was not :data:`PIPE`, this attribute is the stream is a text stream, otherwise it is a byte stream. If the *stdin*
``None``. argument was not :data:`PIPE`, this attribute is ``None``.
.. attribute:: Popen.stdout .. attribute:: Popen.stdout
If the *stdout* argument was :data:`PIPE`, this attribute is a readable If the *stdout* argument was :data:`PIPE`, this attribute is a readable
stream object as returned by :func:`open`. Reading from the stream provides stream object as returned by :func:`open`. Reading from the stream provides
output from the child process. If the *universal_newlines* argument was output from the child process. If the *encoding* or *errors* arguments were
``True``, the stream is a text stream, otherwise it is a byte stream. If the specified or the *universal_newlines* argument was ``True``, the stream is a
*stdout* argument was not :data:`PIPE`, this attribute is ``None``. text stream, otherwise it is a byte stream. If the *stdout* argument was not
:data:`PIPE`, this attribute is ``None``.
.. attribute:: Popen.stderr .. attribute:: Popen.stderr
If the *stderr* argument was :data:`PIPE`, this attribute is a readable If the *stderr* argument was :data:`PIPE`, this attribute is a readable
stream object as returned by :func:`open`. Reading from the stream provides stream object as returned by :func:`open`. Reading from the stream provides
error output from the child process. If the *universal_newlines* argument was error output from the child process. If the *encoding* or *errors* arguments
``True``, the stream is a text stream, otherwise it is a byte stream. If the were specified or the *universal_newlines* argument was ``True``, the stream
*stderr* argument was not :data:`PIPE`, this attribute is ``None``. is a text stream, otherwise it is a byte stream. If the *stderr* argument was
not :data:`PIPE`, this attribute is ``None``.
.. warning:: .. warning::
@ -886,7 +907,9 @@ calls these functions.
*timeout* was added. *timeout* was added.
.. function:: check_output(args, *, stdin=None, stderr=None, shell=False, universal_newlines=False, timeout=None) .. function:: check_output(args, *, stdin=None, stderr=None, shell=False, \
encoding=None, errors=None, \
universal_newlines=False, timeout=None)
Run command with arguments and return its output. Run command with arguments and return its output.
@ -1142,7 +1165,7 @@ handling consistency are valid for these functions.
Return ``(status, output)`` of executing *cmd* in a shell. Return ``(status, output)`` of executing *cmd* in a shell.
Execute the string *cmd* in a shell with :meth:`Popen.check_output` and Execute the string *cmd* in a shell with :meth:`Popen.check_output` and
return a 2-tuple ``(status, output)``. Universal newlines mode is used; return a 2-tuple ``(status, output)``. The locale encoding is used;
see the notes on :ref:`frequently-used-arguments` for more details. see the notes on :ref:`frequently-used-arguments` for more details.
A trailing newline is stripped from the output. A trailing newline is stripped from the output.

View file

@ -589,6 +589,9 @@ proc: ...``) or call explicitly the :meth:`~subprocess.Popen.wait` method to
read the exit status of the child process (Contributed by Victor Stinner in read the exit status of the child process (Contributed by Victor Stinner in
:issue:`26741`). :issue:`26741`).
The :class:`subprocess.Popen` constructor and all functions that pass arguments
through to it now accept *encoding* and *errors* arguments. Specifying either
of these will enable text mode for the *stdin*, *stdout* and *stderr* streams.
telnetlib telnetlib
--------- ---------

View file

@ -30,7 +30,8 @@ class Popen(args, bufsize=-1, executable=None,
preexec_fn=None, close_fds=True, shell=False, preexec_fn=None, close_fds=True, shell=False,
cwd=None, env=None, universal_newlines=False, cwd=None, env=None, universal_newlines=False,
startupinfo=None, creationflags=0, startupinfo=None, creationflags=0,
restore_signals=True, start_new_session=False, pass_fds=()): restore_signals=True, start_new_session=False, pass_fds=(),
*, encoding=None, errors=None):
Arguments are: Arguments are:
@ -104,20 +105,13 @@ in the child process prior to executing the command.
If env is not None, it defines the environment variables for the new If env is not None, it defines the environment variables for the new
process. process.
If universal_newlines is False, the file objects stdin, stdout and stderr If encoding or errors are specified or universal_newlines is True, the file
are opened as binary files, and no line ending conversion is done. objects stdout and stderr are opened in text mode. See io.TextIOWrapper for
the interpretation of these parameters are used.
If universal_newlines is True, the file objects stdout and stderr are If no encoding is specified and universal_newlines is False, the file
opened as a text file, but lines may be terminated by any of '\n', objects stdin, stdout and stderr are opened as binary files, and no
the Unix end-of-line convention, '\r', the old Macintosh convention or line ending conversion is done.
'\r\n', the Windows convention. All of these external representations
are seen as '\n' by the Python program. Also, the newlines attribute
of the file objects stdout, stdin and stderr are not updated by the
communicate() method.
In either case, the process being communicated with should start up
expecting to receive bytes on its standard input and decode them with
the same encoding they are sent in.
The startupinfo and creationflags, if given, will be passed to the The startupinfo and creationflags, if given, will be passed to the
underlying CreateProcess() function. They can specify things such as underlying CreateProcess() function. They can specify things such as
@ -234,11 +228,8 @@ communicate(input=None)
and stderr, until end-of-file is reached. Wait for process to and stderr, until end-of-file is reached. Wait for process to
terminate. The optional input argument should be data to be terminate. The optional input argument should be data to be
sent to the child process, or None, if no data should be sent to sent to the child process, or None, if no data should be sent to
the child. If the Popen instance was constructed with universal_newlines the child. If the Popen instance was constructed in text mode, the
set to True, the input argument should be a string and will be encoded input argument should be a string. Otherwise, it should be bytes.
using the preferred system encoding (see locale.getpreferredencoding);
if universal_newlines is False, the input argument should be a
byte string.
communicate() returns a tuple (stdout, stderr). communicate() returns a tuple (stdout, stderr).
@ -808,8 +799,8 @@ def getstatusoutput(cmd):
""" Return (status, output) of executing cmd in a shell. """ Return (status, output) of executing cmd in a shell.
Execute the string 'cmd' in a shell with 'check_output' and Execute the string 'cmd' in a shell with 'check_output' and
return a 2-tuple (status, output). Universal newlines mode is used, return a 2-tuple (status, output). The locale encoding is used
meaning that the result with be decoded to a string. to decode the output and process newlines.
A trailing newline is stripped from the output. A trailing newline is stripped from the output.
The exit status for the command can be interpreted The exit status for the command can be interpreted
@ -859,7 +850,7 @@ class Popen(object):
shell=False, cwd=None, env=None, universal_newlines=False, shell=False, cwd=None, env=None, universal_newlines=False,
startupinfo=None, creationflags=0, startupinfo=None, creationflags=0,
restore_signals=True, start_new_session=False, restore_signals=True, start_new_session=False,
pass_fds=()): pass_fds=(), *, encoding=None, errors=None):
"""Create new Popen instance.""" """Create new Popen instance."""
_cleanup() _cleanup()
# Held while anything is calling waitpid before returncode has been # Held while anything is calling waitpid before returncode has been
@ -912,6 +903,8 @@ class Popen(object):
self.pid = None self.pid = None
self.returncode = None self.returncode = None
self.universal_newlines = universal_newlines self.universal_newlines = universal_newlines
self.encoding = encoding
self.errors = errors
# Input and output objects. The general principle is like # Input and output objects. The general principle is like
# this: # this:
@ -944,22 +937,28 @@ class Popen(object):
if errread != -1: if errread != -1:
errread = msvcrt.open_osfhandle(errread.Detach(), 0) errread = msvcrt.open_osfhandle(errread.Detach(), 0)
if p2cwrite != -1: text_mode = encoding or errors or universal_newlines
self.stdin = io.open(p2cwrite, 'wb', bufsize)
if universal_newlines:
self.stdin = io.TextIOWrapper(self.stdin, write_through=True,
line_buffering=(bufsize == 1))
if c2pread != -1:
self.stdout = io.open(c2pread, 'rb', bufsize)
if universal_newlines:
self.stdout = io.TextIOWrapper(self.stdout)
if errread != -1:
self.stderr = io.open(errread, 'rb', bufsize)
if universal_newlines:
self.stderr = io.TextIOWrapper(self.stderr)
self._closed_child_pipe_fds = False self._closed_child_pipe_fds = False
try: try:
if p2cwrite != -1:
self.stdin = io.open(p2cwrite, 'wb', bufsize)
if text_mode:
self.stdin = io.TextIOWrapper(self.stdin, write_through=True,
line_buffering=(bufsize == 1),
encoding=encoding, errors=errors)
if c2pread != -1:
self.stdout = io.open(c2pread, 'rb', bufsize)
if text_mode:
self.stdout = io.TextIOWrapper(self.stdout,
encoding=encoding, errors=errors)
if errread != -1:
self.stderr = io.open(errread, 'rb', bufsize)
if text_mode:
self.stderr = io.TextIOWrapper(self.stderr,
encoding=encoding, errors=errors)
self._execute_child(args, executable, preexec_fn, close_fds, self._execute_child(args, executable, preexec_fn, close_fds,
pass_fds, cwd, env, pass_fds, cwd, env,
startupinfo, creationflags, shell, startupinfo, creationflags, shell,
@ -993,8 +992,8 @@ class Popen(object):
raise raise
def _translate_newlines(self, data, encoding): def _translate_newlines(self, data, encoding, errors):
data = data.decode(encoding) data = data.decode(encoding, errors)
return data.replace("\r\n", "\n").replace("\r", "\n") return data.replace("\r\n", "\n").replace("\r", "\n")
def __enter__(self): def __enter__(self):
@ -1779,13 +1778,15 @@ class Popen(object):
# Translate newlines, if requested. # Translate newlines, if requested.
# This also turns bytes into strings. # This also turns bytes into strings.
if self.universal_newlines: if self.encoding or self.errors or self.universal_newlines:
if stdout is not None: if stdout is not None:
stdout = self._translate_newlines(stdout, stdout = self._translate_newlines(stdout,
self.stdout.encoding) self.stdout.encoding,
self.stdout.errors)
if stderr is not None: if stderr is not None:
stderr = self._translate_newlines(stderr, stderr = self._translate_newlines(stderr,
self.stderr.encoding) self.stderr.encoding,
self.stderr.errors)
return (stdout, stderr) return (stdout, stderr)
@ -1797,8 +1798,10 @@ class Popen(object):
if self.stdin and self._input is None: if self.stdin and self._input is None:
self._input_offset = 0 self._input_offset = 0
self._input = input self._input = input
if self.universal_newlines and input is not None: if input is not None and (
self._input = self._input.encode(self.stdin.encoding) self.encoding or self.errors or self.universal_newlines):
self._input = self._input.encode(self.stdin.encoding,
self.stdin.errors)
def send_signal(self, sig): def send_signal(self, sig):

View file

@ -894,31 +894,42 @@ class ProcessTestCase(BaseTestCase):
# #
# UTF-16 and UTF-32-BE are sufficient to check both with BOM and # UTF-16 and UTF-32-BE are sufficient to check both with BOM and
# without, and UTF-16 and UTF-32. # without, and UTF-16 and UTF-32.
import _bootlocale
for encoding in ['utf-16', 'utf-32-be']: for encoding in ['utf-16', 'utf-32-be']:
old_getpreferredencoding = _bootlocale.getpreferredencoding
# Indirectly via io.TextIOWrapper, Popen() defaults to
# locale.getpreferredencoding(False) and earlier in Python 3.2 to
# locale.getpreferredencoding().
def getpreferredencoding(do_setlocale=True):
return encoding
code = ("import sys; " code = ("import sys; "
r"sys.stdout.buffer.write('1\r\n2\r3\n4'.encode('%s'))" % r"sys.stdout.buffer.write('1\r\n2\r3\n4'.encode('%s'))" %
encoding) encoding)
args = [sys.executable, '-c', code] args = [sys.executable, '-c', code]
try: # We set stdin to be non-None because, as of this writing,
_bootlocale.getpreferredencoding = getpreferredencoding # a different code path is used when the number of pipes is
# We set stdin to be non-None because, as of this writing, # zero or one.
# a different code path is used when the number of pipes is popen = subprocess.Popen(args,
# zero or one. stdin=subprocess.PIPE,
popen = subprocess.Popen(args, universal_newlines=True, stdout=subprocess.PIPE,
stdin=subprocess.PIPE, encoding=encoding)
stdout=subprocess.PIPE) stdout, stderr = popen.communicate(input='')
stdout, stderr = popen.communicate(input='')
finally:
_bootlocale.getpreferredencoding = old_getpreferredencoding
self.assertEqual(stdout, '1\n2\n3\n4') self.assertEqual(stdout, '1\n2\n3\n4')
def test_communicate_errors(self):
for errors, expected in [
('ignore', ''),
('replace', '\ufffd\ufffd'),
('surrogateescape', '\udc80\udc80'),
('backslashreplace', '\\x80\\x80'),
]:
code = ("import sys; "
r"sys.stdout.buffer.write(b'[\x80\x80]')")
args = [sys.executable, '-c', code]
# We set stdin to be non-None because, as of this writing,
# a different code path is used when the number of pipes is
# zero or one.
popen = subprocess.Popen(args,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
encoding='utf-8',
errors=errors)
stdout, stderr = popen.communicate(input='')
self.assertEqual(stdout, '[{}]'.format(expected))
def test_no_leaking(self): def test_no_leaking(self):
# Make sure we leak no resources # Make sure we leak no resources
if not mswindows: if not mswindows:
@ -2539,6 +2550,18 @@ class Win32ProcessTestCase(BaseTestCase):
with p: with p:
self.assertIn(b"physalis", p.stdout.read()) self.assertIn(b"physalis", p.stdout.read())
def test_shell_encodings(self):
# Run command through the shell (string)
for enc in ['ansi', 'oem']:
newenv = os.environ.copy()
newenv["FRUIT"] = "physalis"
p = subprocess.Popen("set", shell=1,
stdout=subprocess.PIPE,
env=newenv,
encoding=enc)
with p:
self.assertIn("physalis", p.stdout.read(), enc)
def test_call_string(self): def test_call_string(self):
# call() function with string argument on Windows # call() function with string argument on Windows
rc = subprocess.call(sys.executable + rc = subprocess.call(sys.executable +

View file

@ -237,6 +237,8 @@ Build
Windows Windows
------- -------
- Issue #6135: Adds encoding and errors parameters to subprocess.
- Issue #27959: Adds oem encoding, alias ansi to mbcs, move aliasmbcs to - Issue #27959: Adds oem encoding, alias ansi to mbcs, move aliasmbcs to
codec lookup. codec lookup.