gh-103997: Automatically dedent the argument to "-c" (#103998)

Co-authored-by: sunmy2019 <59365878+sunmy2019@users.noreply.github.com> Co-authored-by: Kirill Podoprigora <80244920+Eclips4@users.noreply.github.com> Co-authored-by: Inada Naoki <songofacandy@gmail.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
2025-09-26 18:29:57 +00:00 · 2025-04-18 04:39:30 -04:00 · 2025-04-18 04:39:30 -04:00 · fc0ec29889
commit fc0ec29889
parent 50e518e886
7 changed files with 266 additions and 0 deletions
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@ -73,6 +73,9 @@ source.
   .. audit-event:: cpython.run_command command cmdoption-c
   .. versionchanged:: next
      *command* is automatically dedented before execution.
 .. option:: -m <module-name>
   Search :data:`sys.path` for the named module and execute its contents as
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@ -474,6 +474,12 @@ Other language changes
  explicitly overridden in the subclass.
  (Contributed by Tomasz Pytel in :gh:`132329`.)
 * The command line option :option:`-c` now automatically dedents its code
  argument before execution. The auto-dedentation behavior mirrors
  :func:`textwrap.dedent`.
  (Contributed by Jon Crall and Steven Sun in :gh:`103998`.)
 .. _whatsnew314-pep765:
 PEP 765: Disallow return/break/continue that exit a finally block
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@ -247,6 +247,12 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
    Py_UCS4 *maxchar,
    int forward);
 /* Dedent a string.
   Behaviour is expected to be an exact match of `textwrap.dedent`.
   Return a new reference on success, NULL with exception set on error.
   */
 extern PyObject* _PyUnicode_Dedent(PyObject *unicode);
 /* --- Misc functions ----------------------------------------------------- */
 extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int);
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@ -17,6 +17,8 @@ from test.support.script_helper import (
    spawn_python, kill_python, assert_python_ok, assert_python_failure,
    interpreter_requires_environment
 )
 from textwrap import dedent
 if not support.has_subprocess_support:
    raise unittest.SkipTest("test module requires subprocess")
@ -1051,6 +1053,88 @@ class CmdLineTest(unittest.TestCase):
        )
        self.assertEqual(res2int(res), (6000, 6000))
    def test_cmd_dedent(self):
        # test that -c auto-dedents its arguments
        test_cases = [
            (
                """
                    print('space-auto-dedent')
                """,
                "space-auto-dedent",
            ),
            (
                dedent(
                    """
                ^^^print('tab-auto-dedent')
                """
                ).replace("^", "\t"),
                "tab-auto-dedent",
            ),
            (
                dedent(
                    """
                ^^if 1:
                ^^^^print('mixed-auto-dedent-1')
                ^^print('mixed-auto-dedent-2')
                """
                ).replace("^", "\t \t"),
                "mixed-auto-dedent-1\nmixed-auto-dedent-2",
            ),
            (
                '''
                    data = """$
                    this data has an empty newline above and a newline with spaces below $
                                            $
                    """$
                    if 1:         $
                        print(repr(data))$
                '''.replace(
                    "$", ""
                ),
                # Note: entirely blank lines are normalized to \n, even if they
                # are part of a data string. This is consistent with
                # textwrap.dedent behavior, but might not be intuitive.
                "'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
            ),
            (
                '',
                '',
            ),
            (
                '  \t\n\t\n \t\t\t  \t\t \t\n\t\t \n\n\n\t\t\t   ',
                '',
            ),
        ]
        for code, expected in test_cases:
            # Run the auto-dedent case
            args1 = sys.executable, '-c', code
            proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
            self.assertEqual(proc1.returncode, 0, proc1)
            output1 = proc1.stdout.strip().decode(encoding='utf-8')
            # Manually dedent beforehand, check the result is the same.
            args2 = sys.executable, '-c', dedent(code)
            proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
            self.assertEqual(proc2.returncode, 0, proc2)
            output2 = proc2.stdout.strip().decode(encoding='utf-8')
            self.assertEqual(output1, output2)
            self.assertEqual(output1.replace('\r\n', '\n'), expected)
    def test_cmd_dedent_failcase(self):
        # Mixing tabs and spaces is not allowed
        from textwrap import dedent
        template = dedent(
            '''
            -+if 1:
            +-++ print('will fail')
            ''')
        code = template.replace('-', ' ').replace('+', '\t')
        assert_python_failure('-c', code)
        code = template.replace('-', '\t').replace('+', ' ')
        assert_python_failure('-c', code)
    def test_cpu_count(self):
        code = "import os; print(os.cpu_count(), os.process_cpu_count())"
        res = assert_python_ok('-X', 'cpu_count=4321', '-c', code)
--- a/Misc/NEWS.d/next/Core_and_Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
@ -0,0 +1,4 @@
 String arguments passed to "-c" are now automatically dedented as if by
 :func:`textwrap.dedent`. This allows "python -c" invocations to be indented
 in shell scripts without causing indentation errors. (Patch by Jon Crall and
 Steven Sun)
--- a/Modules/main.c
+++ b/Modules/main.c
@ -11,6 +11,7 @@
 #include "pycore_pylifecycle.h"   // _Py_PreInitializeFromPyArgv()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 #include "pycore_pythonrun.h"     // _PyRun_AnyFileObject()
 #include "pycore_unicodeobject.h" // _PyUnicode_Dedent()
 /* Includes for exit_sigint() */
 #include <stdio.h>                // perror()
@ -244,6 +245,11 @@ pymain_run_command(wchar_t *command)
        return pymain_exit_err_print();
    }
    Py_SETREF(unicode, _PyUnicode_Dedent(unicode));
    if (unicode == NULL) {
        goto error;
    }
    bytes = PyUnicode_AsUTF8String(unicode);
    Py_DECREF(unicode);
    if (bytes == NULL) {
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -14270,6 +14270,163 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
    return Py_BuildValue("(N)", copy);
 }
 /*
 This function searchs the longest common leading whitespace
 of all lines in the [src, end).
 It returns the length of the common leading whitespace and sets `output` to
 point to the beginning of the common leading whitespace if length > 0.
 */
 static Py_ssize_t
 search_longest_common_leading_whitespace(
    const char *const src,
    const char *const end,
    const char **output)
 {
    // [_start, _start + _len)
    // describes the current longest common leading whitespace
    const char *_start = NULL;
    Py_ssize_t _len = 0;
    for (const char *iter = src; iter < end; ++iter) {
        const char *line_start = iter;
        const char *leading_whitespace_end = NULL;
        // scan the whole line
        while (iter < end && *iter != '\n') {
            if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
                /* `iter` points to the first non-whitespace character
                   in this line */
                if (iter == line_start) {
                    // some line has no indent, fast exit!
                    return 0;
                }
                leading_whitespace_end = iter;
            }
            ++iter;
        }
        // if this line has all white space, skip it
        if (!leading_whitespace_end) {
            continue;
        }
        if (!_start) {
            // update the first leading whitespace
            _start = line_start;
            _len = leading_whitespace_end - line_start;
            assert(_len > 0);
        }
        else {
            /* We then compare with the current longest leading whitespace.
               [line_start, leading_whitespace_end) is the leading
               whitespace of this line,
               [_start, _start + _len) is the leading whitespace of the
               current longest leading whitespace. */
            Py_ssize_t new_len = 0;
            const char *_iter = _start, *line_iter = line_start;
            while (_iter < _start + _len && line_iter < leading_whitespace_end
                   && *_iter == *line_iter)
            {
                ++_iter;
                ++line_iter;
                ++new_len;
            }
            _len = new_len;
            if (_len == 0) {
                // No common things now, fast exit!
                return 0;
            }
        }
    }
    assert(_len >= 0);
    if (_len > 0) {
        *output = _start;
    }
    return _len;
 }
 /* Dedent a string.
   Behaviour is expected to be an exact match of `textwrap.dedent`.
   Return a new reference on success, NULL with exception set on error.
   */
 PyObject *
 _PyUnicode_Dedent(PyObject *unicode)
 {
    Py_ssize_t src_len = 0;
    const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
    if (!src) {
        return NULL;
    }
    assert(src_len >= 0);
    if (src_len == 0) {
        return Py_NewRef(unicode);
    }
    const char *const end = src + src_len;
    // [whitespace_start, whitespace_start + whitespace_len)
    // describes the current longest common leading whitespace
    const char *whitespace_start = NULL;
    Py_ssize_t whitespace_len = search_longest_common_leading_whitespace(
        src, end, &whitespace_start);
    if (whitespace_len == 0) {
        return Py_NewRef(unicode);
    }
    // now we should trigger a dedent
    char *dest = PyMem_Malloc(src_len);
    if (!dest) {
        PyErr_NoMemory();
        return NULL;
    }
    char *dest_iter = dest;
    for (const char *iter = src; iter < end; ++iter) {
        const char *line_start = iter;
        bool in_leading_space = true;
        // iterate over a line to find the end of a line
        while (iter < end && *iter != '\n') {
            if (in_leading_space && *iter != ' ' && *iter != '\t') {
                in_leading_space = false;
            }
            ++iter;
        }
        // invariant: *iter == '\n' or iter == end
        bool append_newline = iter < end;
        // if this line has all white space, write '\n' and continue
        if (in_leading_space && append_newline) {
            *dest_iter++ = '\n';
            continue;
        }
        /* copy [new_line_start + whitespace_len, iter) to buffer, then
            conditionally append '\n' */
        Py_ssize_t new_line_len = iter - line_start - whitespace_len;
        assert(new_line_len >= 0);
        memcpy(dest_iter, line_start + whitespace_len, new_line_len);
        dest_iter += new_line_len;
        if (append_newline) {
            *dest_iter++ = '\n';
        }
    }
    PyObject *res = PyUnicode_FromStringAndSize(dest, dest_iter - dest);
    PyMem_Free(dest);
    return res;
 }
 static PyMethodDef unicode_methods[] = {
    UNICODE_ENCODE_METHODDEF
    UNICODE_REPLACE_METHODDEF