[3.13] gh-41872: Fix quick extraction of module docstrings from a file in pydoc (GH-127520) (GH-128620)

It now supports docstrings with single quotes, escape sequences,
raw string literals, and other Python syntax.

(cherry picked from commit 474e419792)

Co-authored-by: Srinivas Reddy Thatiparthy (తాటిపర్తి శ్రీనివాస్  రెడ్డి) <thatiparthysreenivas@gmail.com>
Co-authored-by: Éric <merwok@netwok.org>
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2025-01-08 13:21:15 +01:00 committed by GitHub
parent c0ba0bc547
commit dda55ccf1a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 104 additions and 15 deletions

View file

@ -54,6 +54,7 @@ Richard Chamberlain, for the first implementation of textdoc.
# the current directory is changed with os.chdir(), an incorrect
# path will be displayed.
import ast
import __future__
import builtins
import importlib._bootstrap
@ -381,21 +382,29 @@ def ispackage(path):
return False
def source_synopsis(file):
line = file.readline()
while line[:1] == '#' or not line.strip():
line = file.readline()
if not line: break
line = line.strip()
if line[:4] == 'r"""': line = line[1:]
if line[:3] == '"""':
line = line[3:]
if line[-1:] == '\\': line = line[:-1]
while not line.strip():
line = file.readline()
if not line: break
result = line.split('"""')[0].strip()
else: result = None
return result
"""Return the one-line summary of a file object, if present"""
string = ''
try:
tokens = tokenize.generate_tokens(file.readline)
for tok_type, tok_string, _, _, _ in tokens:
if tok_type == tokenize.STRING:
string += tok_string
elif tok_type == tokenize.NEWLINE:
with warnings.catch_warnings():
# Ignore the "invalid escape sequence" warning.
warnings.simplefilter("ignore", SyntaxWarning)
docstring = ast.literal_eval(string)
if not isinstance(docstring, str):
return None
return docstring.strip().split('\n')[0].strip()
elif tok_type == tokenize.OP and tok_string in ('(', ')'):
string += tok_string
elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
return None
except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
return None
return None
def synopsis(filename, cache={}):
"""Get the one-line summary out of a module file."""