gh-104400: pygettext: use an AST parser instead of a tokenizer (GH-104402)

This greatly simplifies the code and fixes many corner cases.
This commit is contained in:
Tomas R. 2025-02-11 12:51:42 +01:00 committed by GitHub
parent 1da412e574
commit 374abded07
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 174 additions and 251 deletions

View file

@ -1,3 +1,5 @@
"""Module docstring"""
# Test docstring extraction
from gettext import gettext as _
@ -10,10 +12,10 @@ def test(x):
# Leading empty line
def test2(x):
"""docstring""" # XXX This should be extracted but isn't.
"""docstring"""
# XXX Multiline docstrings should be cleaned with `inspect.cleandoc`.
# Multiline docstrings are cleaned with `inspect.cleandoc`.
def test3(x):
"""multiline
docstring
@ -27,15 +29,15 @@ def test4(x):
def test5(x):
"""Hello, {}!""".format("world!") # XXX This should not be extracted.
"""Hello, {}!""".format("world!") # This should not be extracted.
# Nested docstrings
def test6(x):
def inner(y):
"""nested docstring""" # XXX This should be extracted but isn't.
"""nested docstring"""
class Outer:
class Inner:
"nested class docstring" # XXX This should be extracted but isn't.
"nested class docstring"