[3.12] gh-41872: Fix quick extraction of module docstrings from a file in pydoc (GH-127520) (GH-128621)

It now supports docstrings with single quotes, escape sequences, raw string literals, and other Python syntax. (cherry picked from commit 474e419792) Co-authored-by: Srinivas Reddy Thatiparthy (తాటిపర్తి శ్రీనివాస్ రెడ్డి) <thatiparthysreenivas@gmail.com> Co-authored-by: Éric <merwok@netwok.org> Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
2025-08-08 10:58:51 +00:00 · 2025-01-08 13:21:30 +01:00 · 2025-01-08 13:21:30 +01:00 · b69b9da9b5
commit b69b9da9b5
parent d8890fb82d
3 changed files with 104 additions and 15 deletions
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@ -54,6 +54,7 @@ Richard Chamberlain, for the first implementation of textdoc.
 #     the current directory is changed with os.chdir(), an incorrect
 #     path will be displayed.
 import ast
 import __future__
 import builtins
 import importlib._bootstrap
@ -346,21 +347,29 @@ def ispackage(path):
    return False
 def source_synopsis(file):
-    line = file.readline()
+    """Return the one-line summary of a file object, if present"""
-    while line[:1] == '#' or not line.strip():
+
-        line = file.readline()
+    string = ''
-        if not line: break
+    try:
-    line = line.strip()
+        tokens = tokenize.generate_tokens(file.readline)
-    if line[:4] == 'r"""': line = line[1:]
+        for tok_type, tok_string, _, _, _ in tokens:
-    if line[:3] == '"""':
+            if tok_type == tokenize.STRING:
-        line = line[3:]
+                string += tok_string
-        if line[-1:] == '\\': line = line[:-1]
+            elif tok_type == tokenize.NEWLINE:
-        while not line.strip():
+                with warnings.catch_warnings():
-            line = file.readline()
+                    # Ignore the "invalid escape sequence" warning.
-            if not line: break
+                    warnings.simplefilter("ignore", SyntaxWarning)
-        result = line.split('"""')[0].strip()
+                    docstring = ast.literal_eval(string)
-    else: result = None
+                if not isinstance(docstring, str):
-    return result
+                    return None
                return docstring.strip().split('\n')[0].strip()
            elif tok_type == tokenize.OP and tok_string in ('(', ')'):
                string += tok_string
            elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
                return None
    except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
        return None
    return None
 def synopsis(filename, cache={}):
    """Get the one-line summary out of a module file."""
--- a/Lib/test/test_pydoc/test_pydoc.py
+++ b/Lib/test/test_pydoc/test_pydoc.py
@ -3,6 +3,7 @@ import sys
 import contextlib
 import importlib.util
 import inspect
 import io
 import pydoc
 import py_compile
 import keyword
@ -841,6 +842,82 @@ class PydocDocTest(unittest.TestCase):
            synopsis = pydoc.synopsis(TESTFN, {})
            self.assertEqual(synopsis, 'line 1: h\xe9')
    def test_source_synopsis(self):
        def check(source, expected, encoding=None):
            if isinstance(source, str):
                source_file = StringIO(source)
            else:
                source_file = io.TextIOWrapper(io.BytesIO(source), encoding=encoding)
            with source_file:
                result = pydoc.source_synopsis(source_file)
                self.assertEqual(result, expected)
        check('"""Single line docstring."""',
              'Single line docstring.')
        check('"""First line of docstring.\nSecond line.\nThird line."""',
              'First line of docstring.')
        check('"""First line of docstring.\\nSecond line.\\nThird line."""',
              'First line of docstring.')
        check('"""  Whitespace around docstring.  """',
              'Whitespace around docstring.')
        check('import sys\n"""No docstring"""',
              None)
        check('  \n"""Docstring after empty line."""',
              'Docstring after empty line.')
        check('# Comment\n"""Docstring after comment."""',
              'Docstring after comment.')
        check('  # Indented comment\n"""Docstring after comment."""',
              'Docstring after comment.')
        check('""""""', # Empty docstring
              '')
        check('', # Empty file
              None)
        check('"""Embedded\0null byte"""',
              None)
        check('"""Embedded null byte"""\0',
              None)
        check('"""Café and résumé."""',
              'Café and résumé.')
        check("'''Triple single quotes'''",
              'Triple single quotes')
        check('"Single double quotes"',
              'Single double quotes')
        check("'Single single quotes'",
              'Single single quotes')
        check('"""split\\\nline"""',
              'splitline')
        check('"""Unrecognized escape \\sequence"""',
              'Unrecognized escape \\sequence')
        check('"""Invalid escape seq\\uence"""',
              None)
        check('r"""Raw \\stri\\ng"""',
              'Raw \\stri\\ng')
        check('b"""Bytes literal"""',
              None)
        check('f"""f-string"""',
              None)
        check('"""Concatenated""" \\\n"string" \'literals\'',
              'Concatenatedstringliterals')
        check('"""String""" + """expression"""',
              None)
        check('("""In parentheses""")',
              'In parentheses')
        check('("""Multiple lines """\n"""in parentheses""")',
              'Multiple lines in parentheses')
        check('()', # tuple
              None)
        check(b'# coding: iso-8859-15\n"""\xa4uro sign"""',
              '€uro sign', encoding='iso-8859-15')
        check(b'"""\xa4"""', # Decoding error
              None, encoding='utf-8')
        with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as temp_file:
            temp_file.write('"""Real file test."""\n')
            temp_file.flush()
            temp_file.seek(0)
            result = pydoc.source_synopsis(temp_file)
            self.assertEqual(result, "Real file test.")
    @requires_docstrings
    def test_synopsis_sourceless(self):
        os = import_helper.import_fresh_module('os')
--- a/Misc/NEWS.d/next/Library/2024-12-17-15-23-40.gh-issue-41872.31LjKY.rst
+++ b/Misc/NEWS.d/next/Library/2024-12-17-15-23-40.gh-issue-41872.31LjKY.rst
@ -0,0 +1,3 @@
 Fix quick extraction of module docstrings from a file in :mod:`pydoc`.
 It now supports docstrings with single quotes, escape sequences,
 raw string literals, and other Python syntax.