gh-41872: Fix quick extraction of module docstrings from a file in pydoc (GH-127520)

It now supports docstrings with single quotes, escape sequences,
raw string literals, and other Python syntax.

Co-authored-by: Éric <merwok@netwok.org>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Srinivas Reddy Thatiparthy (తాటిపర్తి శ్రీనివాస్ రెడ్డి) 2025-01-08 16:02:07 +05:30 committed by GitHub
parent cdfb8bc93a
commit 474e419792
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 104 additions and 15 deletions

View file

@ -53,6 +53,7 @@ Richard Chamberlain, for the first implementation of textdoc.
# the current directory is changed with os.chdir(), an incorrect
# path will be displayed.
import ast
import __future__
import builtins
import importlib._bootstrap
@ -384,21 +385,29 @@ def ispackage(path):
return False
def source_synopsis(file):
line = file.readline()
while line[:1] == '#' or not line.strip():
line = file.readline()
if not line: break
line = line.strip()
if line[:4] == 'r"""': line = line[1:]
if line[:3] == '"""':
line = line[3:]
if line[-1:] == '\\': line = line[:-1]
while not line.strip():
line = file.readline()
if not line: break
result = line.split('"""')[0].strip()
else: result = None
return result
"""Return the one-line summary of a file object, if present"""
string = ''
try:
tokens = tokenize.generate_tokens(file.readline)
for tok_type, tok_string, _, _, _ in tokens:
if tok_type == tokenize.STRING:
string += tok_string
elif tok_type == tokenize.NEWLINE:
with warnings.catch_warnings():
# Ignore the "invalid escape sequence" warning.
warnings.simplefilter("ignore", SyntaxWarning)
docstring = ast.literal_eval(string)
if not isinstance(docstring, str):
return None
return docstring.strip().split('\n')[0].strip()
elif tok_type == tokenize.OP and tok_string in ('(', ')'):
string += tok_string
elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
return None
except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
return None
return None
def synopsis(filename, cache={}):
"""Get the one-line summary out of a module file."""

View file

@ -4,6 +4,7 @@ import sys
import contextlib
import importlib.util
import inspect
import io
import pydoc
import py_compile
import keyword
@ -899,6 +900,82 @@ class PydocDocTest(unittest.TestCase):
synopsis = pydoc.synopsis(TESTFN, {})
self.assertEqual(synopsis, 'line 1: h\xe9')
def test_source_synopsis(self):
def check(source, expected, encoding=None):
if isinstance(source, str):
source_file = StringIO(source)
else:
source_file = io.TextIOWrapper(io.BytesIO(source), encoding=encoding)
with source_file:
result = pydoc.source_synopsis(source_file)
self.assertEqual(result, expected)
check('"""Single line docstring."""',
'Single line docstring.')
check('"""First line of docstring.\nSecond line.\nThird line."""',
'First line of docstring.')
check('"""First line of docstring.\\nSecond line.\\nThird line."""',
'First line of docstring.')
check('""" Whitespace around docstring. """',
'Whitespace around docstring.')
check('import sys\n"""No docstring"""',
None)
check(' \n"""Docstring after empty line."""',
'Docstring after empty line.')
check('# Comment\n"""Docstring after comment."""',
'Docstring after comment.')
check(' # Indented comment\n"""Docstring after comment."""',
'Docstring after comment.')
check('""""""', # Empty docstring
'')
check('', # Empty file
None)
check('"""Embedded\0null byte"""',
None)
check('"""Embedded null byte"""\0',
None)
check('"""Café and résumé."""',
'Café and résumé.')
check("'''Triple single quotes'''",
'Triple single quotes')
check('"Single double quotes"',
'Single double quotes')
check("'Single single quotes'",
'Single single quotes')
check('"""split\\\nline"""',
'splitline')
check('"""Unrecognized escape \\sequence"""',
'Unrecognized escape \\sequence')
check('"""Invalid escape seq\\uence"""',
None)
check('r"""Raw \\stri\\ng"""',
'Raw \\stri\\ng')
check('b"""Bytes literal"""',
None)
check('f"""f-string"""',
None)
check('"""Concatenated""" \\\n"string" \'literals\'',
'Concatenatedstringliterals')
check('"""String""" + """expression"""',
None)
check('("""In parentheses""")',
'In parentheses')
check('("""Multiple lines """\n"""in parentheses""")',
'Multiple lines in parentheses')
check('()', # tuple
None)
check(b'# coding: iso-8859-15\n"""\xa4uro sign"""',
'€uro sign', encoding='iso-8859-15')
check(b'"""\xa4"""', # Decoding error
None, encoding='utf-8')
with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as temp_file:
temp_file.write('"""Real file test."""\n')
temp_file.flush()
temp_file.seek(0)
result = pydoc.source_synopsis(temp_file)
self.assertEqual(result, "Real file test.")
@requires_docstrings
def test_synopsis_sourceless(self):
os = import_helper.import_fresh_module('os')

View file

@ -0,0 +1,3 @@
Fix quick extraction of module docstrings from a file in :mod:`pydoc`.
It now supports docstrings with single quotes, escape sequences,
raw string literals, and other Python syntax.