mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
gh-103997: Automatically dedent the argument to "-c" (#103998)
Co-authored-by: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Co-authored-by: Kirill Podoprigora <80244920+Eclips4@users.noreply.github.com> Co-authored-by: Inada Naoki <songofacandy@gmail.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
parent
50e518e886
commit
fc0ec29889
7 changed files with 266 additions and 0 deletions
|
@ -73,6 +73,9 @@ source.
|
||||||
|
|
||||||
.. audit-event:: cpython.run_command command cmdoption-c
|
.. audit-event:: cpython.run_command command cmdoption-c
|
||||||
|
|
||||||
|
.. versionchanged:: next
|
||||||
|
*command* is automatically dedented before execution.
|
||||||
|
|
||||||
.. option:: -m <module-name>
|
.. option:: -m <module-name>
|
||||||
|
|
||||||
Search :data:`sys.path` for the named module and execute its contents as
|
Search :data:`sys.path` for the named module and execute its contents as
|
||||||
|
|
|
@ -474,6 +474,12 @@ Other language changes
|
||||||
explicitly overridden in the subclass.
|
explicitly overridden in the subclass.
|
||||||
(Contributed by Tomasz Pytel in :gh:`132329`.)
|
(Contributed by Tomasz Pytel in :gh:`132329`.)
|
||||||
|
|
||||||
|
* The command line option :option:`-c` now automatically dedents its code
|
||||||
|
argument before execution. The auto-dedentation behavior mirrors
|
||||||
|
:func:`textwrap.dedent`.
|
||||||
|
(Contributed by Jon Crall and Steven Sun in :gh:`103998`.)
|
||||||
|
|
||||||
|
|
||||||
.. _whatsnew314-pep765:
|
.. _whatsnew314-pep765:
|
||||||
|
|
||||||
PEP 765: Disallow return/break/continue that exit a finally block
|
PEP 765: Disallow return/break/continue that exit a finally block
|
||||||
|
|
|
@ -247,6 +247,12 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
|
||||||
Py_UCS4 *maxchar,
|
Py_UCS4 *maxchar,
|
||||||
int forward);
|
int forward);
|
||||||
|
|
||||||
|
/* Dedent a string.
|
||||||
|
Behaviour is expected to be an exact match of `textwrap.dedent`.
|
||||||
|
Return a new reference on success, NULL with exception set on error.
|
||||||
|
*/
|
||||||
|
extern PyObject* _PyUnicode_Dedent(PyObject *unicode);
|
||||||
|
|
||||||
/* --- Misc functions ----------------------------------------------------- */
|
/* --- Misc functions ----------------------------------------------------- */
|
||||||
|
|
||||||
extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int);
|
extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int);
|
||||||
|
|
|
@ -17,6 +17,8 @@ from test.support.script_helper import (
|
||||||
spawn_python, kill_python, assert_python_ok, assert_python_failure,
|
spawn_python, kill_python, assert_python_ok, assert_python_failure,
|
||||||
interpreter_requires_environment
|
interpreter_requires_environment
|
||||||
)
|
)
|
||||||
|
from textwrap import dedent
|
||||||
|
|
||||||
|
|
||||||
if not support.has_subprocess_support:
|
if not support.has_subprocess_support:
|
||||||
raise unittest.SkipTest("test module requires subprocess")
|
raise unittest.SkipTest("test module requires subprocess")
|
||||||
|
@ -1051,6 +1053,88 @@ class CmdLineTest(unittest.TestCase):
|
||||||
)
|
)
|
||||||
self.assertEqual(res2int(res), (6000, 6000))
|
self.assertEqual(res2int(res), (6000, 6000))
|
||||||
|
|
||||||
|
def test_cmd_dedent(self):
|
||||||
|
# test that -c auto-dedents its arguments
|
||||||
|
test_cases = [
|
||||||
|
(
|
||||||
|
"""
|
||||||
|
print('space-auto-dedent')
|
||||||
|
""",
|
||||||
|
"space-auto-dedent",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
dedent(
|
||||||
|
"""
|
||||||
|
^^^print('tab-auto-dedent')
|
||||||
|
"""
|
||||||
|
).replace("^", "\t"),
|
||||||
|
"tab-auto-dedent",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
dedent(
|
||||||
|
"""
|
||||||
|
^^if 1:
|
||||||
|
^^^^print('mixed-auto-dedent-1')
|
||||||
|
^^print('mixed-auto-dedent-2')
|
||||||
|
"""
|
||||||
|
).replace("^", "\t \t"),
|
||||||
|
"mixed-auto-dedent-1\nmixed-auto-dedent-2",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'''
|
||||||
|
data = """$
|
||||||
|
|
||||||
|
this data has an empty newline above and a newline with spaces below $
|
||||||
|
$
|
||||||
|
"""$
|
||||||
|
if 1: $
|
||||||
|
print(repr(data))$
|
||||||
|
'''.replace(
|
||||||
|
"$", ""
|
||||||
|
),
|
||||||
|
# Note: entirely blank lines are normalized to \n, even if they
|
||||||
|
# are part of a data string. This is consistent with
|
||||||
|
# textwrap.dedent behavior, but might not be intuitive.
|
||||||
|
"'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
'',
|
||||||
|
'',
|
||||||
|
),
|
||||||
|
(
|
||||||
|
' \t\n\t\n \t\t\t \t\t \t\n\t\t \n\n\n\t\t\t ',
|
||||||
|
'',
|
||||||
|
),
|
||||||
|
]
|
||||||
|
for code, expected in test_cases:
|
||||||
|
# Run the auto-dedent case
|
||||||
|
args1 = sys.executable, '-c', code
|
||||||
|
proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
|
||||||
|
self.assertEqual(proc1.returncode, 0, proc1)
|
||||||
|
output1 = proc1.stdout.strip().decode(encoding='utf-8')
|
||||||
|
|
||||||
|
# Manually dedent beforehand, check the result is the same.
|
||||||
|
args2 = sys.executable, '-c', dedent(code)
|
||||||
|
proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
|
||||||
|
self.assertEqual(proc2.returncode, 0, proc2)
|
||||||
|
output2 = proc2.stdout.strip().decode(encoding='utf-8')
|
||||||
|
|
||||||
|
self.assertEqual(output1, output2)
|
||||||
|
self.assertEqual(output1.replace('\r\n', '\n'), expected)
|
||||||
|
|
||||||
|
def test_cmd_dedent_failcase(self):
|
||||||
|
# Mixing tabs and spaces is not allowed
|
||||||
|
from textwrap import dedent
|
||||||
|
template = dedent(
|
||||||
|
'''
|
||||||
|
-+if 1:
|
||||||
|
+-++ print('will fail')
|
||||||
|
''')
|
||||||
|
code = template.replace('-', ' ').replace('+', '\t')
|
||||||
|
assert_python_failure('-c', code)
|
||||||
|
code = template.replace('-', '\t').replace('+', ' ')
|
||||||
|
assert_python_failure('-c', code)
|
||||||
|
|
||||||
def test_cpu_count(self):
|
def test_cpu_count(self):
|
||||||
code = "import os; print(os.cpu_count(), os.process_cpu_count())"
|
code = "import os; print(os.cpu_count(), os.process_cpu_count())"
|
||||||
res = assert_python_ok('-X', 'cpu_count=4321', '-c', code)
|
res = assert_python_ok('-X', 'cpu_count=4321', '-c', code)
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
String arguments passed to "-c" are now automatically dedented as if by
|
||||||
|
:func:`textwrap.dedent`. This allows "python -c" invocations to be indented
|
||||||
|
in shell scripts without causing indentation errors. (Patch by Jon Crall and
|
||||||
|
Steven Sun)
|
|
@ -11,6 +11,7 @@
|
||||||
#include "pycore_pylifecycle.h" // _Py_PreInitializeFromPyArgv()
|
#include "pycore_pylifecycle.h" // _Py_PreInitializeFromPyArgv()
|
||||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||||
#include "pycore_pythonrun.h" // _PyRun_AnyFileObject()
|
#include "pycore_pythonrun.h" // _PyRun_AnyFileObject()
|
||||||
|
#include "pycore_unicodeobject.h" // _PyUnicode_Dedent()
|
||||||
|
|
||||||
/* Includes for exit_sigint() */
|
/* Includes for exit_sigint() */
|
||||||
#include <stdio.h> // perror()
|
#include <stdio.h> // perror()
|
||||||
|
@ -244,6 +245,11 @@ pymain_run_command(wchar_t *command)
|
||||||
return pymain_exit_err_print();
|
return pymain_exit_err_print();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Py_SETREF(unicode, _PyUnicode_Dedent(unicode));
|
||||||
|
if (unicode == NULL) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
bytes = PyUnicode_AsUTF8String(unicode);
|
bytes = PyUnicode_AsUTF8String(unicode);
|
||||||
Py_DECREF(unicode);
|
Py_DECREF(unicode);
|
||||||
if (bytes == NULL) {
|
if (bytes == NULL) {
|
||||||
|
|
|
@ -14270,6 +14270,163 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
|
||||||
return Py_BuildValue("(N)", copy);
|
return Py_BuildValue("(N)", copy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
This function searchs the longest common leading whitespace
|
||||||
|
of all lines in the [src, end).
|
||||||
|
It returns the length of the common leading whitespace and sets `output` to
|
||||||
|
point to the beginning of the common leading whitespace if length > 0.
|
||||||
|
*/
|
||||||
|
static Py_ssize_t
|
||||||
|
search_longest_common_leading_whitespace(
|
||||||
|
const char *const src,
|
||||||
|
const char *const end,
|
||||||
|
const char **output)
|
||||||
|
{
|
||||||
|
// [_start, _start + _len)
|
||||||
|
// describes the current longest common leading whitespace
|
||||||
|
const char *_start = NULL;
|
||||||
|
Py_ssize_t _len = 0;
|
||||||
|
|
||||||
|
for (const char *iter = src; iter < end; ++iter) {
|
||||||
|
const char *line_start = iter;
|
||||||
|
const char *leading_whitespace_end = NULL;
|
||||||
|
|
||||||
|
// scan the whole line
|
||||||
|
while (iter < end && *iter != '\n') {
|
||||||
|
if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
|
||||||
|
/* `iter` points to the first non-whitespace character
|
||||||
|
in this line */
|
||||||
|
if (iter == line_start) {
|
||||||
|
// some line has no indent, fast exit!
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
leading_whitespace_end = iter;
|
||||||
|
}
|
||||||
|
++iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if this line has all white space, skip it
|
||||||
|
if (!leading_whitespace_end) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!_start) {
|
||||||
|
// update the first leading whitespace
|
||||||
|
_start = line_start;
|
||||||
|
_len = leading_whitespace_end - line_start;
|
||||||
|
assert(_len > 0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* We then compare with the current longest leading whitespace.
|
||||||
|
|
||||||
|
[line_start, leading_whitespace_end) is the leading
|
||||||
|
whitespace of this line,
|
||||||
|
|
||||||
|
[_start, _start + _len) is the leading whitespace of the
|
||||||
|
current longest leading whitespace. */
|
||||||
|
Py_ssize_t new_len = 0;
|
||||||
|
const char *_iter = _start, *line_iter = line_start;
|
||||||
|
|
||||||
|
while (_iter < _start + _len && line_iter < leading_whitespace_end
|
||||||
|
&& *_iter == *line_iter)
|
||||||
|
{
|
||||||
|
++_iter;
|
||||||
|
++line_iter;
|
||||||
|
++new_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
_len = new_len;
|
||||||
|
if (_len == 0) {
|
||||||
|
// No common things now, fast exit!
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(_len >= 0);
|
||||||
|
if (_len > 0) {
|
||||||
|
*output = _start;
|
||||||
|
}
|
||||||
|
return _len;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Dedent a string.
|
||||||
|
Behaviour is expected to be an exact match of `textwrap.dedent`.
|
||||||
|
Return a new reference on success, NULL with exception set on error.
|
||||||
|
*/
|
||||||
|
PyObject *
|
||||||
|
_PyUnicode_Dedent(PyObject *unicode)
|
||||||
|
{
|
||||||
|
Py_ssize_t src_len = 0;
|
||||||
|
const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
|
||||||
|
if (!src) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
assert(src_len >= 0);
|
||||||
|
if (src_len == 0) {
|
||||||
|
return Py_NewRef(unicode);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *const end = src + src_len;
|
||||||
|
|
||||||
|
// [whitespace_start, whitespace_start + whitespace_len)
|
||||||
|
// describes the current longest common leading whitespace
|
||||||
|
const char *whitespace_start = NULL;
|
||||||
|
Py_ssize_t whitespace_len = search_longest_common_leading_whitespace(
|
||||||
|
src, end, &whitespace_start);
|
||||||
|
|
||||||
|
if (whitespace_len == 0) {
|
||||||
|
return Py_NewRef(unicode);
|
||||||
|
}
|
||||||
|
|
||||||
|
// now we should trigger a dedent
|
||||||
|
char *dest = PyMem_Malloc(src_len);
|
||||||
|
if (!dest) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
char *dest_iter = dest;
|
||||||
|
|
||||||
|
for (const char *iter = src; iter < end; ++iter) {
|
||||||
|
const char *line_start = iter;
|
||||||
|
bool in_leading_space = true;
|
||||||
|
|
||||||
|
// iterate over a line to find the end of a line
|
||||||
|
while (iter < end && *iter != '\n') {
|
||||||
|
if (in_leading_space && *iter != ' ' && *iter != '\t') {
|
||||||
|
in_leading_space = false;
|
||||||
|
}
|
||||||
|
++iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
// invariant: *iter == '\n' or iter == end
|
||||||
|
bool append_newline = iter < end;
|
||||||
|
|
||||||
|
// if this line has all white space, write '\n' and continue
|
||||||
|
if (in_leading_space && append_newline) {
|
||||||
|
*dest_iter++ = '\n';
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* copy [new_line_start + whitespace_len, iter) to buffer, then
|
||||||
|
conditionally append '\n' */
|
||||||
|
|
||||||
|
Py_ssize_t new_line_len = iter - line_start - whitespace_len;
|
||||||
|
assert(new_line_len >= 0);
|
||||||
|
memcpy(dest_iter, line_start + whitespace_len, new_line_len);
|
||||||
|
|
||||||
|
dest_iter += new_line_len;
|
||||||
|
|
||||||
|
if (append_newline) {
|
||||||
|
*dest_iter++ = '\n';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *res = PyUnicode_FromStringAndSize(dest, dest_iter - dest);
|
||||||
|
PyMem_Free(dest);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
static PyMethodDef unicode_methods[] = {
|
static PyMethodDef unicode_methods[] = {
|
||||||
UNICODE_ENCODE_METHODDEF
|
UNICODE_ENCODE_METHODDEF
|
||||||
UNICODE_REPLACE_METHODDEF
|
UNICODE_REPLACE_METHODDEF
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue