mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
gh-130587: Add hand-written docs for non-OP tokens (GH-130588)
Co-authored-by: Blaise Pabon <blaise@gmail.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
This commit is contained in:
parent
b8367e7cf3
commit
4bced29a74
4 changed files with 293 additions and 258 deletions
330
Doc/library/token-list.inc
generated
330
Doc/library/token-list.inc
generated
|
@ -1,230 +1,104 @@
|
|||
.. Auto-generated by Tools/build/generate_token.py
|
||||
.. data:: ENDMARKER
|
||||
|
||||
.. data:: NAME
|
||||
|
||||
.. data:: NUMBER
|
||||
|
||||
.. data:: STRING
|
||||
|
||||
.. data:: NEWLINE
|
||||
|
||||
.. data:: INDENT
|
||||
|
||||
.. data:: DEDENT
|
||||
|
||||
.. data:: LPAR
|
||||
|
||||
Token value for ``"("``.
|
||||
|
||||
.. data:: RPAR
|
||||
|
||||
Token value for ``")"``.
|
||||
|
||||
.. data:: LSQB
|
||||
|
||||
Token value for ``"["``.
|
||||
|
||||
.. data:: RSQB
|
||||
|
||||
Token value for ``"]"``.
|
||||
|
||||
.. data:: COLON
|
||||
|
||||
Token value for ``":"``.
|
||||
|
||||
.. data:: COMMA
|
||||
|
||||
Token value for ``","``.
|
||||
|
||||
.. data:: SEMI
|
||||
|
||||
Token value for ``";"``.
|
||||
|
||||
.. data:: PLUS
|
||||
|
||||
Token value for ``"+"``.
|
||||
|
||||
.. data:: MINUS
|
||||
|
||||
Token value for ``"-"``.
|
||||
|
||||
.. data:: STAR
|
||||
|
||||
Token value for ``"*"``.
|
||||
|
||||
.. data:: SLASH
|
||||
|
||||
Token value for ``"/"``.
|
||||
|
||||
.. data:: VBAR
|
||||
|
||||
Token value for ``"|"``.
|
||||
|
||||
.. data:: AMPER
|
||||
|
||||
Token value for ``"&"``.
|
||||
|
||||
.. data:: LESS
|
||||
|
||||
Token value for ``"<"``.
|
||||
|
||||
.. data:: GREATER
|
||||
|
||||
Token value for ``">"``.
|
||||
|
||||
.. data:: EQUAL
|
||||
|
||||
Token value for ``"="``.
|
||||
|
||||
.. data:: DOT
|
||||
|
||||
Token value for ``"."``.
|
||||
|
||||
.. data:: PERCENT
|
||||
|
||||
Token value for ``"%"``.
|
||||
|
||||
.. data:: LBRACE
|
||||
|
||||
Token value for ``"{"``.
|
||||
|
||||
.. data:: RBRACE
|
||||
|
||||
Token value for ``"}"``.
|
||||
|
||||
.. data:: EQEQUAL
|
||||
|
||||
Token value for ``"=="``.
|
||||
|
||||
.. data:: NOTEQUAL
|
||||
|
||||
Token value for ``"!="``.
|
||||
|
||||
.. data:: LESSEQUAL
|
||||
|
||||
Token value for ``"<="``.
|
||||
|
||||
.. data:: GREATEREQUAL
|
||||
|
||||
Token value for ``">="``.
|
||||
|
||||
.. data:: TILDE
|
||||
|
||||
Token value for ``"~"``.
|
||||
|
||||
.. data:: CIRCUMFLEX
|
||||
|
||||
Token value for ``"^"``.
|
||||
|
||||
.. data:: LEFTSHIFT
|
||||
|
||||
Token value for ``"<<"``.
|
||||
|
||||
.. data:: RIGHTSHIFT
|
||||
|
||||
Token value for ``">>"``.
|
||||
|
||||
.. data:: DOUBLESTAR
|
||||
|
||||
Token value for ``"**"``.
|
||||
|
||||
.. data:: PLUSEQUAL
|
||||
|
||||
Token value for ``"+="``.
|
||||
|
||||
.. data:: MINEQUAL
|
||||
|
||||
Token value for ``"-="``.
|
||||
|
||||
.. data:: STAREQUAL
|
||||
|
||||
Token value for ``"*="``.
|
||||
|
||||
.. data:: SLASHEQUAL
|
||||
|
||||
Token value for ``"/="``.
|
||||
|
||||
.. data:: PERCENTEQUAL
|
||||
|
||||
Token value for ``"%="``.
|
||||
|
||||
.. data:: AMPEREQUAL
|
||||
|
||||
Token value for ``"&="``.
|
||||
|
||||
.. data:: VBAREQUAL
|
||||
|
||||
Token value for ``"|="``.
|
||||
|
||||
.. data:: CIRCUMFLEXEQUAL
|
||||
|
||||
Token value for ``"^="``.
|
||||
|
||||
.. data:: LEFTSHIFTEQUAL
|
||||
|
||||
Token value for ``"<<="``.
|
||||
|
||||
.. data:: RIGHTSHIFTEQUAL
|
||||
|
||||
Token value for ``">>="``.
|
||||
|
||||
.. data:: DOUBLESTAREQUAL
|
||||
|
||||
Token value for ``"**="``.
|
||||
|
||||
.. data:: DOUBLESLASH
|
||||
|
||||
Token value for ``"//"``.
|
||||
|
||||
.. data:: DOUBLESLASHEQUAL
|
||||
|
||||
Token value for ``"//="``.
|
||||
|
||||
.. data:: AT
|
||||
|
||||
Token value for ``"@"``.
|
||||
|
||||
.. data:: ATEQUAL
|
||||
|
||||
Token value for ``"@="``.
|
||||
|
||||
.. data:: RARROW
|
||||
|
||||
Token value for ``"->"``.
|
||||
|
||||
.. data:: ELLIPSIS
|
||||
|
||||
Token value for ``"..."``.
|
||||
|
||||
.. data:: COLONEQUAL
|
||||
|
||||
Token value for ``":="``.
|
||||
|
||||
.. data:: EXCLAMATION
|
||||
|
||||
Token value for ``"!"``.
|
||||
|
||||
.. data:: OP
|
||||
|
||||
.. data:: TYPE_IGNORE
|
||||
|
||||
.. data:: TYPE_COMMENT
|
||||
|
||||
.. data:: SOFT_KEYWORD
|
||||
|
||||
.. data:: FSTRING_START
|
||||
|
||||
.. data:: FSTRING_MIDDLE
|
||||
|
||||
.. data:: FSTRING_END
|
||||
|
||||
.. data:: COMMENT
|
||||
|
||||
.. data:: NL
|
||||
|
||||
.. data:: ERRORTOKEN
|
||||
|
||||
.. data:: N_TOKENS
|
||||
|
||||
.. data:: NT_OFFSET
|
||||
.. list-table::
|
||||
:align: left
|
||||
:header-rows: 1
|
||||
|
||||
* - Token
|
||||
- Value
|
||||
* - .. data:: LPAR
|
||||
- ``"("``
|
||||
* - .. data:: RPAR
|
||||
- ``")"``
|
||||
* - .. data:: LSQB
|
||||
- ``"["``
|
||||
* - .. data:: RSQB
|
||||
- ``"]"``
|
||||
* - .. data:: COLON
|
||||
- ``":"``
|
||||
* - .. data:: COMMA
|
||||
- ``","``
|
||||
* - .. data:: SEMI
|
||||
- ``";"``
|
||||
* - .. data:: PLUS
|
||||
- ``"+"``
|
||||
* - .. data:: MINUS
|
||||
- ``"-"``
|
||||
* - .. data:: STAR
|
||||
- ``"*"``
|
||||
* - .. data:: SLASH
|
||||
- ``"/"``
|
||||
* - .. data:: VBAR
|
||||
- ``"|"``
|
||||
* - .. data:: AMPER
|
||||
- ``"&"``
|
||||
* - .. data:: LESS
|
||||
- ``"<"``
|
||||
* - .. data:: GREATER
|
||||
- ``">"``
|
||||
* - .. data:: EQUAL
|
||||
- ``"="``
|
||||
* - .. data:: DOT
|
||||
- ``"."``
|
||||
* - .. data:: PERCENT
|
||||
- ``"%"``
|
||||
* - .. data:: LBRACE
|
||||
- ``"{"``
|
||||
* - .. data:: RBRACE
|
||||
- ``"}"``
|
||||
* - .. data:: EQEQUAL
|
||||
- ``"=="``
|
||||
* - .. data:: NOTEQUAL
|
||||
- ``"!="``
|
||||
* - .. data:: LESSEQUAL
|
||||
- ``"<="``
|
||||
* - .. data:: GREATEREQUAL
|
||||
- ``">="``
|
||||
* - .. data:: TILDE
|
||||
- ``"~"``
|
||||
* - .. data:: CIRCUMFLEX
|
||||
- ``"^"``
|
||||
* - .. data:: LEFTSHIFT
|
||||
- ``"<<"``
|
||||
* - .. data:: RIGHTSHIFT
|
||||
- ``">>"``
|
||||
* - .. data:: DOUBLESTAR
|
||||
- ``"**"``
|
||||
* - .. data:: PLUSEQUAL
|
||||
- ``"+="``
|
||||
* - .. data:: MINEQUAL
|
||||
- ``"-="``
|
||||
* - .. data:: STAREQUAL
|
||||
- ``"*="``
|
||||
* - .. data:: SLASHEQUAL
|
||||
- ``"/="``
|
||||
* - .. data:: PERCENTEQUAL
|
||||
- ``"%="``
|
||||
* - .. data:: AMPEREQUAL
|
||||
- ``"&="``
|
||||
* - .. data:: VBAREQUAL
|
||||
- ``"|="``
|
||||
* - .. data:: CIRCUMFLEXEQUAL
|
||||
- ``"^="``
|
||||
* - .. data:: LEFTSHIFTEQUAL
|
||||
- ``"<<="``
|
||||
* - .. data:: RIGHTSHIFTEQUAL
|
||||
- ``">>="``
|
||||
* - .. data:: DOUBLESTAREQUAL
|
||||
- ``"**="``
|
||||
* - .. data:: DOUBLESLASH
|
||||
- ``"//"``
|
||||
* - .. data:: DOUBLESLASHEQUAL
|
||||
- ``"//="``
|
||||
* - .. data:: AT
|
||||
- ``"@"``
|
||||
* - .. data:: ATEQUAL
|
||||
- ``"@="``
|
||||
* - .. data:: RARROW
|
||||
- ``"->"``
|
||||
* - .. data:: ELLIPSIS
|
||||
- ``"..."``
|
||||
* - .. data:: COLONEQUAL
|
||||
- ``":="``
|
||||
* - .. data:: EXCLAMATION
|
||||
- ``"!"``
|
||||
|
|
|
@ -19,6 +19,10 @@ change between Python versions.
|
|||
The module also provides a mapping from numeric codes to names and some
|
||||
functions. The functions mirror definitions in the Python C header files.
|
||||
|
||||
Note that a token's value may depend on tokenizer options. For example, a
|
||||
``"+"`` token may be reported as either :data:`PLUS` or :data:`OP`, or
|
||||
a ``"match"`` token may be either :data:`NAME` or :data:`SOFT_KEYWORD`.
|
||||
|
||||
|
||||
.. data:: tok_name
|
||||
|
||||
|
@ -44,25 +48,93 @@ functions. The functions mirror definitions in the Python C header files.
|
|||
|
||||
The token constants are:
|
||||
|
||||
.. include:: token-list.inc
|
||||
.. data:: NAME
|
||||
|
||||
The following token type values aren't used by the C tokenizer but are needed for
|
||||
the :mod:`tokenize` module.
|
||||
Token value that indicates an :ref:`identifier <identifiers>`.
|
||||
Note that keywords are also initially tokenized an ``NAME`` tokens.
|
||||
|
||||
.. data:: NUMBER
|
||||
|
||||
Token value that indicates a :ref:`numeric literal <numbers>`
|
||||
|
||||
.. data:: STRING
|
||||
|
||||
Token value that indicates a :ref:`string or byte literal <strings>`,
|
||||
excluding :ref:`formatted string literals <f-strings>`.
|
||||
The token string is not interpreted:
|
||||
it includes the surrounding quotation marks and the prefix (if given);
|
||||
backslashes are included literally, without processing escape sequences.
|
||||
|
||||
.. data:: OP
|
||||
|
||||
A generic token value that indicates an
|
||||
:ref:`operator <operators>` or :ref:`delimiter <delimiters>`.
|
||||
|
||||
.. impl-detail::
|
||||
|
||||
This value is only reported by the :mod:`tokenize` module.
|
||||
Internally, the tokenizer uses
|
||||
:ref:`exact token types <token_operators_delimiters>` instead.
|
||||
|
||||
.. data:: COMMENT
|
||||
:noindex:
|
||||
|
||||
Token value used to indicate a comment.
|
||||
The parser ignores :data:`!COMMENT` tokens.
|
||||
|
||||
.. data:: NEWLINE
|
||||
|
||||
Token value that indicates the end of a :ref:`logical line <logical-lines>`.
|
||||
|
||||
.. data:: NL
|
||||
:noindex:
|
||||
|
||||
Token value used to indicate a non-terminating newline. The
|
||||
:data:`NEWLINE` token indicates the end of a logical line of Python code;
|
||||
``NL`` tokens are generated when a logical line of code is continued over
|
||||
multiple physical lines.
|
||||
Token value used to indicate a non-terminating newline.
|
||||
:data:`!NL` tokens are generated when a logical line of code is continued
|
||||
over multiple physical lines. The parser ignores :data:`!NL` tokens.
|
||||
|
||||
.. data:: INDENT
|
||||
|
||||
Token value used at the beginning of a :ref:`logical line <logical-lines>`
|
||||
to indicate the start of an :ref:`indented block <indentation>`.
|
||||
|
||||
.. data:: DEDENT
|
||||
|
||||
Token value used at the beginning of a :ref:`logical line <logical-lines>`
|
||||
to indicate the end of an :ref:`indented block <indentation>`.
|
||||
|
||||
.. data:: FSTRING_START
|
||||
|
||||
Token value used to indicate the beginning of an
|
||||
:ref:`f-string literal <f-strings>`.
|
||||
|
||||
.. impl-detail::
|
||||
|
||||
The token string includes the prefix and the opening quote(s), but none
|
||||
of the contents of the literal.
|
||||
|
||||
.. data:: FSTRING_MIDDLE
|
||||
|
||||
Token value used for literal text inside an :ref:`f-string literal <f-strings>`,
|
||||
including format specifications.
|
||||
|
||||
.. impl-detail::
|
||||
|
||||
Replacement fields (that is, the non-literal parts of f-strings) use
|
||||
the same tokens as other expressions, and are delimited by
|
||||
:data:`LBRACE`, :data:`RBRACE`, :data:`EXCLAMATION` and :data:`COLON`
|
||||
tokens.
|
||||
|
||||
.. data:: FSTRING_END
|
||||
|
||||
Token value used to indicate the end of a :ref:`f-string <f-strings>`.
|
||||
|
||||
.. impl-detail::
|
||||
|
||||
The token string contains the closing quote(s).
|
||||
|
||||
.. data:: ENDMARKER
|
||||
|
||||
Token value that indicates the end of input.
|
||||
Used in :ref:`top-level grammar rules <top-level>`.
|
||||
|
||||
.. data:: ENCODING
|
||||
|
||||
|
@ -70,14 +142,63 @@ the :mod:`tokenize` module.
|
|||
into text. The first token returned by :func:`tokenize.tokenize` will
|
||||
always be an ``ENCODING`` token.
|
||||
|
||||
.. impl-detail::
|
||||
|
||||
This token type isn't used by the C tokenizer but is needed for
|
||||
the :mod:`tokenize` module.
|
||||
|
||||
|
||||
The following token types are not produced by the :mod:`tokenize` module,
|
||||
and are defined for special uses in the tokenizer or parser:
|
||||
|
||||
.. data:: TYPE_IGNORE
|
||||
|
||||
Token value indicating that a ``type: ignore`` comment was recognized.
|
||||
Such tokens are produced instead of regular :data:`COMMENT` tokens only
|
||||
with the :data:`~ast.PyCF_TYPE_COMMENTS` flag.
|
||||
|
||||
.. data:: TYPE_COMMENT
|
||||
:noindex:
|
||||
|
||||
Token value indicating that a type comment was recognized. Such
|
||||
tokens are only produced when :func:`ast.parse` is invoked with
|
||||
``type_comments=True``.
|
||||
Token value indicating that a type comment was recognized.
|
||||
Such tokens are produced instead of regular :data:`COMMENT` tokens only
|
||||
with the :data:`~ast.PyCF_TYPE_COMMENTS` flag.
|
||||
|
||||
.. data:: SOFT_KEYWORD
|
||||
|
||||
Token value indicating a :ref:`soft keyword <soft-keywords>`.
|
||||
|
||||
The tokenizer never produces this value.
|
||||
To check for a soft keyword, pass a :data:`NAME` token's string to
|
||||
:func:`keyword.issoftkeyword`.
|
||||
|
||||
.. data:: ERRORTOKEN
|
||||
|
||||
Token value used to indicate wrong input.
|
||||
|
||||
The :mod:`tokenize` module generally indicates errors by
|
||||
raising exceptions instead of emitting this token.
|
||||
It can also emit tokens such as :data:`OP` or :data:`NAME` with strings that
|
||||
are later rejected by the parser.
|
||||
|
||||
|
||||
.. _token_operators_delimiters:
|
||||
|
||||
The remaining tokens represent specific :ref:`operators <operators>` and
|
||||
:ref:`delimiters <delimiters>`.
|
||||
(The :mod:`tokenize` module reports these as :data:`OP`; see ``exact_type``
|
||||
in the :mod:`tokenize` documentation for details.)
|
||||
|
||||
.. include:: token-list.inc
|
||||
|
||||
|
||||
The following non-token constants are provided:
|
||||
|
||||
.. data:: N_TOKENS
|
||||
|
||||
The number of token types defined in this module.
|
||||
|
||||
.. NT_OFFSET is deliberately undocumented; if you need it you should be
|
||||
reading the source
|
||||
|
||||
.. data:: EXACT_TOKEN_TYPES
|
||||
|
||||
|
@ -102,6 +223,9 @@ the :mod:`tokenize` module.
|
|||
to support parsing older Python versions for :func:`ast.parse` with
|
||||
``feature_version`` set to 6 or lower).
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
Added :data:`EXCLAMATION`.
|
||||
|
||||
.. versionchanged:: 3.13
|
||||
Removed :data:`!AWAIT` and :data:`!ASYNC` tokens again.
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ All input read from non-interactive files has the same form:
|
|||
.. grammar-snippet::
|
||||
:group: python-grammar
|
||||
|
||||
file_input: (NEWLINE | `statement`)*
|
||||
file_input: (NEWLINE | `statement`)* ENDMARKER
|
||||
|
||||
This syntax is used in the following situations:
|
||||
|
||||
|
@ -90,7 +90,7 @@ Input in interactive mode is parsed using the following grammar:
|
|||
.. grammar-snippet::
|
||||
:group: python-grammar
|
||||
|
||||
interactive_input: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE
|
||||
interactive_input: [`stmt_list`] NEWLINE | `compound_stmt` NEWLINE | ENDMARKER
|
||||
|
||||
Note that a (top-level) compound statement must be followed by a blank line in
|
||||
interactive mode; this is needed to help the parser detect the end of the input.
|
||||
|
@ -107,5 +107,7 @@ Expression input
|
|||
:func:`eval` is used for expression input. It ignores leading whitespace. The
|
||||
string argument to :func:`eval` must have the following form:
|
||||
|
||||
.. productionlist:: python-grammar
|
||||
eval_input: `expression_list` NEWLINE*
|
||||
.. grammar-snippet::
|
||||
:group: python-grammar
|
||||
|
||||
eval_input: `expression_list` NEWLINE* ENDMARKER
|
||||
|
|
|
@ -1,10 +1,17 @@
|
|||
#! /usr/bin/env python3
|
||||
# This script generates token related files from Grammar/Tokens:
|
||||
#
|
||||
# Doc/library/token-list.inc
|
||||
# Include/token.h
|
||||
# Parser/token.c
|
||||
# Lib/token.py
|
||||
# make_rst:
|
||||
# Doc/library/token-list.inc
|
||||
# Doc/library/token.rst (checked, not generated)
|
||||
# make_h:
|
||||
# Include/token.h
|
||||
# make_c:
|
||||
# Parser/token.c
|
||||
# make_py:
|
||||
# Lib/token.py
|
||||
|
||||
import re
|
||||
|
||||
|
||||
SCRIPT_NAME = 'Tools/build/generate_token.py'
|
||||
|
@ -199,23 +206,51 @@ def make_c(infile, outfile='Parser/token.c'):
|
|||
|
||||
token_inc_template = f"""\
|
||||
.. {AUTO_GENERATED_BY_SCRIPT}
|
||||
%s
|
||||
.. data:: N_TOKENS
|
||||
|
||||
.. data:: NT_OFFSET
|
||||
.. list-table::
|
||||
:align: left
|
||||
:header-rows: 1
|
||||
|
||||
* - Token
|
||||
- Value
|
||||
%s
|
||||
"""
|
||||
|
||||
def make_rst(infile, outfile='Doc/library/token-list.inc'):
|
||||
def make_rst(infile, outfile='Doc/library/token-list.inc',
|
||||
rstfile='Doc/library/token.rst'):
|
||||
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
|
||||
tok_to_string = {value: s for s, value in string_to_tok.items()}
|
||||
|
||||
needs_handwritten_doc = set()
|
||||
|
||||
names = []
|
||||
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
|
||||
names.append('.. data:: %s' % (name,))
|
||||
for value, name in enumerate(tok_names):
|
||||
if value in tok_to_string:
|
||||
names.append('')
|
||||
names.append(' Token value for ``"%s"``.' % tok_to_string[value])
|
||||
names.append('')
|
||||
assert name.isupper()
|
||||
names.append(f' * - .. data:: {name}')
|
||||
names.append(f' - ``"{tok_to_string[value]}"``')
|
||||
else:
|
||||
needs_handwritten_doc.add(name)
|
||||
|
||||
has_handwritten_doc = set()
|
||||
with open(rstfile) as fileobj:
|
||||
tokendef_re = re.compile(r'.. data:: ([0-9A-Z_]+)\s*')
|
||||
for line in fileobj:
|
||||
if match := tokendef_re.fullmatch(line):
|
||||
has_handwritten_doc.add(match[1])
|
||||
|
||||
# Exclude non-token constants in token.py
|
||||
has_handwritten_doc -= {'N_TOKENS', 'NT_OFFSET', 'EXACT_TOKEN_TYPES'}
|
||||
|
||||
if needs_handwritten_doc != has_handwritten_doc:
|
||||
message_parts = [f'ERROR: {rstfile} does not document all tokens!']
|
||||
undocumented = needs_handwritten_doc - has_handwritten_doc
|
||||
extra = has_handwritten_doc - needs_handwritten_doc
|
||||
if undocumented:
|
||||
message_parts.append(f'Undocumented tokens: {undocumented}')
|
||||
if extra:
|
||||
message_parts.append(f'Documented nonexistent tokens: {extra}')
|
||||
exit('\n'.join(message_parts))
|
||||
|
||||
if update_file(outfile, token_inc_template % '\n'.join(names)):
|
||||
print("%s regenerated from %s" % (outfile, infile))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue