cpython/Parser/Python.asdl
Ivan Levkivskyi 9932a22897
bpo-33416: Add end positions to Python AST (GH-11605)
The majority of this PR is tediously passing `end_lineno` and `end_col_offset` everywhere. Here are non-trivial points:
* It is not possible to reconstruct end positions in AST "on the fly", some information is lost after an AST node is constructed, so we need two more attributes for every AST node `end_lineno` and `end_col_offset`.
* I add end position information to both CST and AST.  Although it may be technically possible to avoid adding end positions to CST, the code becomes more cumbersome and less efficient.
* Since the end position is not known for non-leaf CST nodes while the next token is added, this requires a bit of extra care (see `_PyNode_FinalizeEndPos`). Unless I made some mistake, the algorithm should be linear.
* For statements, I "trim" the end position of suites to not include the terminal newlines and dedent (this seems to be what people would expect), for example in
  ```python
  class C:
      pass

  pass
  ```
  the end line and end column for the class definition is (2, 8).
* For `end_col_offset` I use the common Python convention for indexing, for example for `pass` the `end_col_offset` is 4 (not 3), so that `[0:4]` gives one the source code that corresponds to the node.
* I added a helper function `ast.get_source_segment()`, to get source text segment corresponding to a given AST node. It is also useful for testing.

An (inevitable) downside of this PR is that AST now takes almost 25% more memory. I think however it is probably justified by the benefits.
2019-01-22 11:18:22 +00:00

124 lines
4.9 KiB
Text

-- ASDL's 5 builtin types are:
-- identifier, int, string, object, constant
module Python
{
mod = Module(stmt* body)
| Interactive(stmt* body)
| Expression(expr body)
-- not really an actual node but useful in Jython's typesystem.
| Suite(stmt* body)
stmt = FunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns)
| AsyncFunctionDef(identifier name, arguments args,
stmt* body, expr* decorator_list, expr? returns)
| ClassDef(identifier name,
expr* bases,
keyword* keywords,
stmt* body,
expr* decorator_list)
| Return(expr? value)
| Delete(expr* targets)
| Assign(expr* targets, expr value)
| AugAssign(expr target, operator op, expr value)
-- 'simple' indicates that we annotate simple name without parens
| AnnAssign(expr target, expr annotation, expr? value, int simple)
-- use 'orelse' because else is a keyword in target languages
| For(expr target, expr iter, stmt* body, stmt* orelse)
| AsyncFor(expr target, expr iter, stmt* body, stmt* orelse)
| While(expr test, stmt* body, stmt* orelse)
| If(expr test, stmt* body, stmt* orelse)
| With(withitem* items, stmt* body)
| AsyncWith(withitem* items, stmt* body)
| Raise(expr? exc, expr? cause)
| Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
| Assert(expr test, expr? msg)
| Import(alias* names)
| ImportFrom(identifier? module, alias* names, int? level)
| Global(identifier* names)
| Nonlocal(identifier* names)
| Expr(expr value)
| Pass | Break | Continue
-- XXX Jython will be different
-- col_offset is the byte offset in the utf8 string the parser uses
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
-- BoolOp() can use left & right?
expr = BoolOp(boolop op, expr* values)
| BinOp(expr left, operator op, expr right)
| UnaryOp(unaryop op, expr operand)
| Lambda(arguments args, expr body)
| IfExp(expr test, expr body, expr orelse)
| Dict(expr* keys, expr* values)
| Set(expr* elts)
| ListComp(expr elt, comprehension* generators)
| SetComp(expr elt, comprehension* generators)
| DictComp(expr key, expr value, comprehension* generators)
| GeneratorExp(expr elt, comprehension* generators)
-- the grammar constrains where yield expressions can occur
| Await(expr value)
| Yield(expr? value)
| YieldFrom(expr value)
-- need sequences for compare to distinguish between
-- x < 4 < 3 and (x < 4) < 3
| Compare(expr left, cmpop* ops, expr* comparators)
| Call(expr func, expr* args, keyword* keywords)
| FormattedValue(expr value, int? conversion, expr? format_spec)
| JoinedStr(expr* values)
| Constant(constant value)
-- the following expression can appear in assignment context
| Attribute(expr value, identifier attr, expr_context ctx)
| Subscript(expr value, slice slice, expr_context ctx)
| Starred(expr value, expr_context ctx)
| Name(identifier id, expr_context ctx)
| List(expr* elts, expr_context ctx)
| Tuple(expr* elts, expr_context ctx)
-- col_offset is the byte offset in the utf8 string the parser uses
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
expr_context = Load | Store | Del | AugLoad | AugStore | Param
slice = Slice(expr? lower, expr? upper, expr? step)
| ExtSlice(slice* dims)
| Index(expr value)
boolop = And | Or
operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift
| RShift | BitOr | BitXor | BitAnd | FloorDiv
unaryop = Invert | Not | UAdd | USub
cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
comprehension = (expr target, expr iter, expr* ifs, int is_async)
excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body)
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults,
arg? kwarg, expr* defaults)
arg = (identifier arg, expr? annotation)
attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
-- keyword arguments supplied to call (NULL identifier for **kwargs)
keyword = (identifier? arg, expr value)
-- import name with optional 'as' alias.
alias = (identifier name, identifier? asname)
withitem = (expr context_expr, expr? optional_vars)
}