mirror of
https://github.com/python/cpython.git
synced 2025-08-08 10:58:51 +00:00
[3.12] gh-41872: Fix quick extraction of module docstrings from a file in pydoc (GH-127520) (GH-128621)
It now supports docstrings with single quotes, escape sequences,
raw string literals, and other Python syntax.
(cherry picked from commit 474e419792
)
Co-authored-by: Srinivas Reddy Thatiparthy (తాటిపర్తి శ్రీనివాస్ రెడ్డి) <thatiparthysreenivas@gmail.com>
Co-authored-by: Éric <merwok@netwok.org>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
d8890fb82d
commit
b69b9da9b5
3 changed files with 104 additions and 15 deletions
39
Lib/pydoc.py
39
Lib/pydoc.py
|
@ -54,6 +54,7 @@ Richard Chamberlain, for the first implementation of textdoc.
|
||||||
# the current directory is changed with os.chdir(), an incorrect
|
# the current directory is changed with os.chdir(), an incorrect
|
||||||
# path will be displayed.
|
# path will be displayed.
|
||||||
|
|
||||||
|
import ast
|
||||||
import __future__
|
import __future__
|
||||||
import builtins
|
import builtins
|
||||||
import importlib._bootstrap
|
import importlib._bootstrap
|
||||||
|
@ -346,21 +347,29 @@ def ispackage(path):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def source_synopsis(file):
|
def source_synopsis(file):
|
||||||
line = file.readline()
|
"""Return the one-line summary of a file object, if present"""
|
||||||
while line[:1] == '#' or not line.strip():
|
|
||||||
line = file.readline()
|
string = ''
|
||||||
if not line: break
|
try:
|
||||||
line = line.strip()
|
tokens = tokenize.generate_tokens(file.readline)
|
||||||
if line[:4] == 'r"""': line = line[1:]
|
for tok_type, tok_string, _, _, _ in tokens:
|
||||||
if line[:3] == '"""':
|
if tok_type == tokenize.STRING:
|
||||||
line = line[3:]
|
string += tok_string
|
||||||
if line[-1:] == '\\': line = line[:-1]
|
elif tok_type == tokenize.NEWLINE:
|
||||||
while not line.strip():
|
with warnings.catch_warnings():
|
||||||
line = file.readline()
|
# Ignore the "invalid escape sequence" warning.
|
||||||
if not line: break
|
warnings.simplefilter("ignore", SyntaxWarning)
|
||||||
result = line.split('"""')[0].strip()
|
docstring = ast.literal_eval(string)
|
||||||
else: result = None
|
if not isinstance(docstring, str):
|
||||||
return result
|
return None
|
||||||
|
return docstring.strip().split('\n')[0].strip()
|
||||||
|
elif tok_type == tokenize.OP and tok_string in ('(', ')'):
|
||||||
|
string += tok_string
|
||||||
|
elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
|
||||||
|
return None
|
||||||
|
except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
def synopsis(filename, cache={}):
|
def synopsis(filename, cache={}):
|
||||||
"""Get the one-line summary out of a module file."""
|
"""Get the one-line summary out of a module file."""
|
||||||
|
|
|
@ -3,6 +3,7 @@ import sys
|
||||||
import contextlib
|
import contextlib
|
||||||
import importlib.util
|
import importlib.util
|
||||||
import inspect
|
import inspect
|
||||||
|
import io
|
||||||
import pydoc
|
import pydoc
|
||||||
import py_compile
|
import py_compile
|
||||||
import keyword
|
import keyword
|
||||||
|
@ -841,6 +842,82 @@ class PydocDocTest(unittest.TestCase):
|
||||||
synopsis = pydoc.synopsis(TESTFN, {})
|
synopsis = pydoc.synopsis(TESTFN, {})
|
||||||
self.assertEqual(synopsis, 'line 1: h\xe9')
|
self.assertEqual(synopsis, 'line 1: h\xe9')
|
||||||
|
|
||||||
|
def test_source_synopsis(self):
|
||||||
|
def check(source, expected, encoding=None):
|
||||||
|
if isinstance(source, str):
|
||||||
|
source_file = StringIO(source)
|
||||||
|
else:
|
||||||
|
source_file = io.TextIOWrapper(io.BytesIO(source), encoding=encoding)
|
||||||
|
with source_file:
|
||||||
|
result = pydoc.source_synopsis(source_file)
|
||||||
|
self.assertEqual(result, expected)
|
||||||
|
|
||||||
|
check('"""Single line docstring."""',
|
||||||
|
'Single line docstring.')
|
||||||
|
check('"""First line of docstring.\nSecond line.\nThird line."""',
|
||||||
|
'First line of docstring.')
|
||||||
|
check('"""First line of docstring.\\nSecond line.\\nThird line."""',
|
||||||
|
'First line of docstring.')
|
||||||
|
check('""" Whitespace around docstring. """',
|
||||||
|
'Whitespace around docstring.')
|
||||||
|
check('import sys\n"""No docstring"""',
|
||||||
|
None)
|
||||||
|
check(' \n"""Docstring after empty line."""',
|
||||||
|
'Docstring after empty line.')
|
||||||
|
check('# Comment\n"""Docstring after comment."""',
|
||||||
|
'Docstring after comment.')
|
||||||
|
check(' # Indented comment\n"""Docstring after comment."""',
|
||||||
|
'Docstring after comment.')
|
||||||
|
check('""""""', # Empty docstring
|
||||||
|
'')
|
||||||
|
check('', # Empty file
|
||||||
|
None)
|
||||||
|
check('"""Embedded\0null byte"""',
|
||||||
|
None)
|
||||||
|
check('"""Embedded null byte"""\0',
|
||||||
|
None)
|
||||||
|
check('"""Café and résumé."""',
|
||||||
|
'Café and résumé.')
|
||||||
|
check("'''Triple single quotes'''",
|
||||||
|
'Triple single quotes')
|
||||||
|
check('"Single double quotes"',
|
||||||
|
'Single double quotes')
|
||||||
|
check("'Single single quotes'",
|
||||||
|
'Single single quotes')
|
||||||
|
check('"""split\\\nline"""',
|
||||||
|
'splitline')
|
||||||
|
check('"""Unrecognized escape \\sequence"""',
|
||||||
|
'Unrecognized escape \\sequence')
|
||||||
|
check('"""Invalid escape seq\\uence"""',
|
||||||
|
None)
|
||||||
|
check('r"""Raw \\stri\\ng"""',
|
||||||
|
'Raw \\stri\\ng')
|
||||||
|
check('b"""Bytes literal"""',
|
||||||
|
None)
|
||||||
|
check('f"""f-string"""',
|
||||||
|
None)
|
||||||
|
check('"""Concatenated""" \\\n"string" \'literals\'',
|
||||||
|
'Concatenatedstringliterals')
|
||||||
|
check('"""String""" + """expression"""',
|
||||||
|
None)
|
||||||
|
check('("""In parentheses""")',
|
||||||
|
'In parentheses')
|
||||||
|
check('("""Multiple lines """\n"""in parentheses""")',
|
||||||
|
'Multiple lines in parentheses')
|
||||||
|
check('()', # tuple
|
||||||
|
None)
|
||||||
|
check(b'# coding: iso-8859-15\n"""\xa4uro sign"""',
|
||||||
|
'€uro sign', encoding='iso-8859-15')
|
||||||
|
check(b'"""\xa4"""', # Decoding error
|
||||||
|
None, encoding='utf-8')
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as temp_file:
|
||||||
|
temp_file.write('"""Real file test."""\n')
|
||||||
|
temp_file.flush()
|
||||||
|
temp_file.seek(0)
|
||||||
|
result = pydoc.source_synopsis(temp_file)
|
||||||
|
self.assertEqual(result, "Real file test.")
|
||||||
|
|
||||||
@requires_docstrings
|
@requires_docstrings
|
||||||
def test_synopsis_sourceless(self):
|
def test_synopsis_sourceless(self):
|
||||||
os = import_helper.import_fresh_module('os')
|
os = import_helper.import_fresh_module('os')
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Fix quick extraction of module docstrings from a file in :mod:`pydoc`.
|
||||||
|
It now supports docstrings with single quotes, escape sequences,
|
||||||
|
raw string literals, and other Python syntax.
|
Loading…
Add table
Add a link
Reference in a new issue