gh-62090: Simplify argparse usage formatting (GH-105039)

Rationale ========= argparse performs a complex formatting of the usage for argument grouping and for line wrapping to fit the terminal width. This formatting has been a constant source of bugs for at least 10 years (see linked issues below) where defensive assertion errors are triggered or brackets and paranthesis are not properly handeled. Problem ======= The current implementation of argparse usage formatting relies on regular expressions to group arguments usage only to separate them again later with another set of regular expressions. This is a complex and error prone approach that caused all the issues linked below. Special casing certain argument formats has not solved the problem. The following are some of the most common issues: - empty `metavar` - mutually exclusive groups with `SUPPRESS`ed arguments - metavars with whitespace - metavars with brackets or paranthesis Solution ======== The following two comments summarize the solution: - https://github.com/python/cpython/issues/82091#issuecomment-1093832187 - https://github.com/python/cpython/issues/77048#issuecomment-1093776995 Mainly, the solution is to rewrite the usage formatting to avoid the group-then-separate approach. Instead, the usage parts are kept separate and only joined together at the end. This allows for a much simpler implementation that is easier to understand and maintain. It avoids the regular expressions approach and fixes the corresponding issues. This closes the following GitHub issues: - #62090 - #62549 - #77048 - #82091 - #89743 - #96310 - #98666 These PRs become obsolete: - #15372 - #96311
2025-10-17 04:08:28 +00:00 · 2024-05-07 10:28:51 +03:00 · 2024-05-07 10:28:51 +03:00 · de1428f8c2
commit de1428f8c2
parent 49258efada
3 changed files with 164 additions and 72 deletions
--- a/Lib/argparse.py
+++ b/Lib/argparse.py
@ -328,17 +328,8 @@ class HelpFormatter(object):
            if len(prefix) + len(usage) > text_width:
                # break usage into wrappable parts
-                part_regexp = (
+                opt_parts = self._get_actions_usage_parts(optionals, groups)
-                    r'\(.*?\)+(?=\s|$)|'
+                pos_parts = self._get_actions_usage_parts(positionals, groups)
                    r'\[.*?\]+(?=\s|$)|'
                    r'\S+'
                )
                opt_usage = format(optionals, groups)
                pos_usage = format(positionals, groups)
                opt_parts = _re.findall(part_regexp, opt_usage)
                pos_parts = _re.findall(part_regexp, pos_usage)
                assert ' '.join(opt_parts) == opt_usage
                assert ' '.join(pos_parts) == pos_usage
                # helper for wrapping lines
                def get_lines(parts, indent, prefix=None):
@ -391,6 +382,9 @@ class HelpFormatter(object):
        return '%s%s\n\n' % (prefix, usage)
    def _format_actions_usage(self, actions, groups):
        return ' '.join(self._get_actions_usage_parts(actions, groups))
    def _get_actions_usage_parts(self, actions, groups):
        # find group indices and identify actions in groups
        group_actions = set()
        inserts = {}
@ -398,58 +392,26 @@ class HelpFormatter(object):
            if not group._group_actions:
                raise ValueError(f'empty group {group}')
            if all(action.help is SUPPRESS for action in group._group_actions):
                continue
            try:
                start = actions.index(group._group_actions[0])
            except ValueError:
                continue
            else:
-                group_action_count = len(group._group_actions)
+                end = start + len(group._group_actions)
                end = start + group_action_count
                if actions[start:end] == group._group_actions:
-
+                    group_actions.update(group._group_actions)
-                    suppressed_actions_count = 0
+                    inserts[start, end] = group
                    for action in group._group_actions:
                        group_actions.add(action)
                        if action.help is SUPPRESS:
                            suppressed_actions_count += 1
                    exposed_actions_count = group_action_count - suppressed_actions_count
                    if not exposed_actions_count:
                        continue
                    if not group.required:
                        if start in inserts:
                            inserts[start] += ' ['
                        else:
                            inserts[start] = '['
                        if end in inserts:
                            inserts[end] += ']'
                        else:
                            inserts[end] = ']'
                    elif exposed_actions_count > 1:
                        if start in inserts:
                            inserts[start] += ' ('
                        else:
                            inserts[start] = '('
                        if end in inserts:
                            inserts[end] += ')'
                        else:
                            inserts[end] = ')'
                    for i in range(start + 1, end):
                        inserts[i] = '|'
        # collect all actions format strings
        parts = []
-        for i, action in enumerate(actions):
+        for action in actions:
            # suppressed arguments are marked with None
            # remove | separators for suppressed arguments
            if action.help is SUPPRESS:
-                parts.append(None)
+                part = None
                if inserts.get(i) == '|':
                    inserts.pop(i)
                elif inserts.get(i + 1) == '|':
                    inserts.pop(i + 1)
            # produce all arg strings
            elif not action.option_strings:
@ -461,9 +423,6 @@ class HelpFormatter(object):
                    if part[0] == '[' and part[-1] == ']':
                        part = part[1:-1]
                # add the action string to the list
                parts.append(part)
            # produce the first way to invoke the option in brackets
            else:
                option_string = action.option_strings[0]
@ -484,26 +443,23 @@ class HelpFormatter(object):
                if not action.required and action not in group_actions:
                    part = '[%s]' % part
-                # add the action string to the list
+            # add the action string to the list
-                parts.append(part)
+            parts.append(part)
-        # insert things at the necessary indices
+        # group mutually exclusive actions
-        for i in sorted(inserts, reverse=True):
+        for start, end in sorted(inserts, reverse=True):
-            parts[i:i] = [inserts[i]]
+            group = inserts[start, end]
            group_parts = [item for item in parts[start:end] if item is not None]
            if group.required:
                open, close = "()" if len(group_parts) > 1 else ("", "")
            else:
                open, close = "[]"
            parts[start] = open + " | ".join(group_parts) + close
            for i in range(start + 1, end):
                parts[i] = None
-        # join all the action items with spaces
+        # return the usage parts
-        text = ' '.join([item for item in parts if item is not None])
+        return [item for item in parts if item is not None]
        # clean up separators for mutually exclusive groups
        open = r'[\[(]'
        close = r'[\])]'
        text = _re.sub(r'(%s) ' % open, r'\1', text)
        text = _re.sub(r' (%s)' % close, r'\1', text)
        text = _re.sub(r'%s *%s' % (open, close), r'', text)
        text = text.strip()
        # return the text
        return text
    def _format_text(self, text):
        if '%(prog)' in text:
--- a/Lib/test/test_argparse.py
+++ b/Lib/test/test_argparse.py
@ -4255,6 +4255,140 @@ class TestHelpUsagePositionalsOnlyWrap(HelpTestCase):
    version = ''
 class TestHelpUsageMetavarsSpacesParentheses(HelpTestCase):
    # https://github.com/python/cpython/issues/62549
    # https://github.com/python/cpython/issues/89743
    parser_signature = Sig(prog='PROG')
    argument_signatures = [
        Sig('-n1', metavar='()', help='n1'),
        Sig('-o1', metavar='(1, 2)', help='o1'),
        Sig('-u1', metavar=' (uu) ', help='u1'),
        Sig('-v1', metavar='( vv )', help='v1'),
        Sig('-w1', metavar='(w)w', help='w1'),
        Sig('-x1', metavar='x(x)', help='x1'),
        Sig('-y1', metavar='yy)', help='y1'),
        Sig('-z1', metavar='(zz', help='z1'),
        Sig('-n2', metavar='[]', help='n2'),
        Sig('-o2', metavar='[1, 2]', help='o2'),
        Sig('-u2', metavar=' [uu] ', help='u2'),
        Sig('-v2', metavar='[ vv ]', help='v2'),
        Sig('-w2', metavar='[w]w', help='w2'),
        Sig('-x2', metavar='x[x]', help='x2'),
        Sig('-y2', metavar='yy]', help='y2'),
        Sig('-z2', metavar='[zz', help='z2'),
    ]
    usage = '''\
        usage: PROG [-h] [-n1 ()] [-o1 (1, 2)] [-u1  (uu) ] [-v1 ( vv )] [-w1 (w)w]
                    [-x1 x(x)] [-y1 yy)] [-z1 (zz] [-n2 []] [-o2 [1, 2]] [-u2  [uu] ]
                    [-v2 [ vv ]] [-w2 [w]w] [-x2 x[x]] [-y2 yy]] [-z2 [zz]
        '''
    help = usage + '''\
        options:
          -h, --help  show this help message and exit
          -n1 ()      n1
          -o1 (1, 2)  o1
          -u1  (uu)   u1
          -v1 ( vv )  v1
          -w1 (w)w    w1
          -x1 x(x)    x1
          -y1 yy)     y1
          -z1 (zz     z1
          -n2 []      n2
          -o2 [1, 2]  o2
          -u2  [uu]   u2
          -v2 [ vv ]  v2
          -w2 [w]w    w2
          -x2 x[x]    x2
          -y2 yy]     y2
          -z2 [zz     z2
        '''
    version = ''
 class TestHelpUsageNoWhitespaceCrash(TestCase):
    def test_all_suppressed_mutex_followed_by_long_arg(self):
        # https://github.com/python/cpython/issues/62090
        # https://github.com/python/cpython/issues/96310
        parser = argparse.ArgumentParser(prog='PROG')
        mutex = parser.add_mutually_exclusive_group()
        mutex.add_argument('--spam', help=argparse.SUPPRESS)
        parser.add_argument('--eggs-eggs-eggs-eggs-eggs-eggs')
        usage = textwrap.dedent('''\
        usage: PROG [-h]
                    [--eggs-eggs-eggs-eggs-eggs-eggs EGGS_EGGS_EGGS_EGGS_EGGS_EGGS]
        ''')
        self.assertEqual(parser.format_usage(), usage)
    def test_newline_in_metavar(self):
        # https://github.com/python/cpython/issues/77048
        mapping = ['123456', '12345', '12345', '123']
        parser = argparse.ArgumentParser('11111111111111')
        parser.add_argument('-v', '--verbose',
                            help='verbose mode', action='store_true')
        parser.add_argument('targets',
                            help='installation targets',
                            nargs='+',
                            metavar='\n'.join(mapping))
        usage = textwrap.dedent('''\
        usage: 11111111111111 [-h] [-v]
                              123456
        12345
        12345
        123 [123456
        12345
        12345
        123 ...]
        ''')
        self.assertEqual(parser.format_usage(), usage)
    def test_empty_metavar_required_arg(self):
        # https://github.com/python/cpython/issues/82091
        parser = argparse.ArgumentParser(prog='PROG')
        parser.add_argument('--nil', metavar='', required=True)
        parser.add_argument('--a', metavar='A' * 70)
        usage = (
            'usage: PROG [-h] --nil \n'
            '            [--a AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
            'AAAAAAAAAAAAAAAAAAAAAAA]\n'
        )
        self.assertEqual(parser.format_usage(), usage)
    def test_all_suppressed_mutex_with_optional_nargs(self):
        # https://github.com/python/cpython/issues/98666
        parser = argparse.ArgumentParser(prog='PROG')
        mutex = parser.add_mutually_exclusive_group()
        mutex.add_argument(
            '--param1',
            nargs='?', const='default', metavar='NAME', help=argparse.SUPPRESS)
        mutex.add_argument(
            '--param2',
            nargs='?', const='default', metavar='NAME', help=argparse.SUPPRESS)
        usage = 'usage: PROG [-h]\n'
        self.assertEqual(parser.format_usage(), usage)
    def test_nested_mutex_groups(self):
        parser = argparse.ArgumentParser(prog='PROG')
        g = parser.add_mutually_exclusive_group()
        g.add_argument("--spam")
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', DeprecationWarning)
            gg = g.add_mutually_exclusive_group()
        gg.add_argument("--hax")
        gg.add_argument("--hox", help=argparse.SUPPRESS)
        gg.add_argument("--hex")
        g.add_argument("--eggs")
        parser.add_argument("--num")
        usage = textwrap.dedent('''\
        usage: PROG [-h] [--spam SPAM | [--hax HAX | --hex HEX] | --eggs EGGS]
                    [--num NUM]
        ''')
        self.assertEqual(parser.format_usage(), usage)
 class TestHelpVariableExpansion(HelpTestCase):
    """Test that variables are expanded properly in help messages"""
--- a/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst
+++ b/Misc/NEWS.d/next/Library/2023-05-28-11-25-18.gh-issue-62090.opAhDn.rst
@ -0,0 +1,2 @@
 Fix assertion errors caused by whitespace in metavars or ``SUPPRESS``-ed groups
 in :mod:`argparse` by simplifying usage formatting. Patch by Ali Hamdan.
		`@ -0,0 +1,2 @@`
							Fix assertion errors caused by whitespace in metavars or ``SUPPRESS``-ed groups
							in :mod:`argparse` by simplifying usage formatting. Patch by Ali Hamdan.