gh-104400: pygettext: use an AST parser instead of a tokenizer (GH-104402)

This greatly simplifies the code and fixes many corner cases.
2025-10-17 12:18:23 +00:00 · 2025-02-11 12:51:42 +01:00 · 2025-02-11 12:51:42 +01:00 · 374abded07
commit 374abded07
parent 1da412e574
7 changed files with 174 additions and 251 deletions
--- a/Lib/test/test_tools/i18n_data/docstrings.py
+++ b/Lib/test/test_tools/i18n_data/docstrings.py
@ -1,3 +1,5 @@
+"""Module docstring"""
+
 # Test docstring extraction
 from gettext import gettext as _

@ -10,10 +12,10 @@ def test(x):
 # Leading empty line
 def test2(x):

-    """docstring"""  # XXX This should be extracted but isn't.
+    """docstring"""


-# XXX Multiline docstrings should be cleaned with `inspect.cleandoc`.
+# Multiline docstrings are cleaned with `inspect.cleandoc`.
 def test3(x):
    """multiline
    docstring
@ -27,15 +29,15 @@ def test4(x):


 def test5(x):
-    """Hello, {}!""".format("world!")  # XXX This should not be extracted.
+    """Hello, {}!""".format("world!")  # This should not be extracted.


 # Nested docstrings
 def test6(x):
    def inner(y):
-        """nested docstring"""  # XXX This should be extracted but isn't.
+        """nested docstring"""


 class Outer:
    class Inner:
-        "nested class docstring"  # XXX This should be extracted but isn't.
+        "nested class docstring"