Add support for help end IPython escape commands (#6358)

## Summary

This PR adds support for a stricter version of help end escape
commands[^1] in the parser. By stricter, I mean that the escape tokens
are only at the end of the command and there are no tokens at the start.
This makes it difficult to implement it in the lexer without having to
do a lot of look aheads or keeping track of previous tokens.

Now, as we're adding this in the parser, the lexer needs to recognize
and emit a new token for `?`. So, `Question` token is added which will
be recognized only in `Jupyter` mode.

The conditions applied are the same as the ones in the original
implementation in IPython codebase (which is a regex):
* There can only be either 1 or 2 question mark(s) at the end
* The node before the question mark can be a `Name`, `Attribute`,
`Subscript` (only with integer constants in slice position), or any
combination of the 3 nodes.

## Test Plan

Added test cases for various combination of the possible nodes in the
command value position and update the snapshots.

fixes: #6359
fixes: #5030 (This is the final piece)

[^1]: https://github.com/astral-sh/ruff/pull/6272#issue-1833094281
This commit is contained in:
Dhruv Manilawala 2023-08-09 10:28:52 +05:30 committed by GitHub
parent 887a47cad9
commit e257c5af32
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 40274 additions and 34786 deletions

View file

@ -780,6 +780,9 @@ impl<'source> Lexer<'source> {
self.lex_magic_command(kind)
}
'?' if self.mode == Mode::Jupyter => Tok::Question,
'/' => {
if self.cursor.eat_char('=') {
Tok::SlashEqual

View file

@ -1180,6 +1180,15 @@ foo = %foo \
% foo
foo = %foo # comment
# Help end line magics
foo?
foo.bar??
foo.bar.baz?
foo[0]??
foo[0][1]?
foo.bar[0].baz[1]??
foo.bar[0].baz[2].egg??
"#
.trim(),
Mode::Jupyter,

View file

@ -14,6 +14,7 @@ use crate::{
string::parse_strings,
token::{self, StringKind},
};
use lalrpop_util::ParseError;
grammar(mode: Mode);
@ -89,6 +90,7 @@ SmallStatement: ast::Stmt = {
AssertStatement,
TypeAliasStatement,
LineMagicStatement,
HelpEndLineMagic,
};
PassStatement: ast::Stmt = {
@ -366,6 +368,78 @@ LineMagicExpr: ast::Expr = {
}
}
HelpEndLineMagic: ast::Stmt = {
// We are permissive than the original implementation because we would allow whitespace
// between the expression and the suffix while the IPython implementation doesn't allow it.
// For example, `foo ?` would be valid in our case but invalid from IPython.
<location:@L> <e:Expression<"All">> <suffix:("?")+> <end_location:@R> =>? {
fn unparse_expr(expr: &ast::Expr, buffer: &mut String) -> Result<(), LexicalError> {
match expr {
ast::Expr::Name(ast::ExprName { id, .. }) => {
buffer.push_str(id.as_str());
},
ast::Expr::Subscript(ast::ExprSubscript { value, slice, range, .. }) => {
let ast::Expr::Constant(ast::ExprConstant { value: ast::Constant::Int(integer), .. }) = slice.as_ref() else {
return Err(LexicalError {
error: LexicalErrorType::OtherError("only integer constants are allowed in Subscript expressions in help end escape command".to_string()),
location: range.start(),
});
};
unparse_expr(value, buffer)?;
buffer.push('[');
buffer.push_str(&format!("{}", integer));
buffer.push(']');
},
ast::Expr::Attribute(ast::ExprAttribute { value, attr, .. }) => {
unparse_expr(value, buffer)?;
buffer.push('.');
buffer.push_str(attr.as_str());
},
_ => {
return Err(LexicalError {
error: LexicalErrorType::OtherError("only Name, Subscript and Attribute expressions are allowed in help end escape command".to_string()),
location: expr.range().start(),
});
}
}
Ok(())
}
if mode != Mode::Jupyter {
return Err(ParseError::User {
error: LexicalError {
error: LexicalErrorType::OtherError("IPython escape commands are only allowed in Jupyter mode".to_string()),
location,
},
});
}
let kind = match suffix.len() {
1 => MagicKind::Help,
2 => MagicKind::Help2,
_ => {
return Err(ParseError::User {
error: LexicalError {
error: LexicalErrorType::OtherError("maximum of 2 `?` tokens are allowed in help end escape command".to_string()),
location,
},
});
}
};
let mut value = String::new();
unparse_expr(&e, &mut value)?;
Ok(ast::Stmt::LineMagic(
ast::StmtLineMagic {
kind,
value,
range: (location..end_location).into()
}
))
}
}
CompoundStatement: ast::Stmt = {
MatchStatement,
IfStatement,
@ -1732,6 +1806,7 @@ extern {
Dedent => token::Tok::Dedent,
StartModule => token::Tok::StartModule,
StartExpression => token::Tok::StartExpression,
"?" => token::Tok::Question,
"+" => token::Tok::Plus,
"-" => token::Tok::Minus,
"~" => token::Tok::Tilde,

File diff suppressed because it is too large Load diff

View file

@ -4,7 +4,7 @@ expression: parse_ast
---
Module(
ModModule {
range: 0..803,
range: 0..919,
body: [
Expr(
StmtExpr {
@ -346,6 +346,55 @@ Module(
),
},
),
LineMagic(
StmtLineMagic {
range: 828..832,
kind: Help,
value: "foo",
},
),
LineMagic(
StmtLineMagic {
range: 833..842,
kind: Help2,
value: "foo.bar",
},
),
LineMagic(
StmtLineMagic {
range: 843..855,
kind: Help,
value: "foo.bar.baz",
},
),
LineMagic(
StmtLineMagic {
range: 856..864,
kind: Help2,
value: "foo[0]",
},
),
LineMagic(
StmtLineMagic {
range: 865..875,
kind: Help,
value: "foo[0][1]",
},
),
LineMagic(
StmtLineMagic {
range: 876..895,
kind: Help2,
value: "foo.bar[0].baz[1]",
},
),
LineMagic(
StmtLineMagic {
range: 896..919,
kind: Help2,
value: "foo.bar[0].baz[2].egg",
},
),
],
},
)

View file

@ -64,6 +64,8 @@ pub enum Tok {
/// Token value for a dedent.
Dedent,
EndOfFile,
/// Token value for a question mark `?`. This is only used in [`Mode::Jupyter`].
Question,
/// Token value for a left parenthesis `(`.
Lpar,
/// Token value for a right parenthesis `)`.
@ -240,6 +242,7 @@ impl fmt::Display for Tok {
StartModule => f.write_str("StartProgram"),
StartExpression => f.write_str("StartExpression"),
EndOfFile => f.write_str("EOF"),
Question => f.write_str("'?'"),
Lpar => f.write_str("'('"),
Rpar => f.write_str("')'"),
Lsqb => f.write_str("'['"),
@ -461,6 +464,8 @@ pub enum TokenKind {
/// Token value for a dedent.
Dedent,
EndOfFile,
/// Token value for a question mark `?`.
Question,
/// Token value for a left parenthesis `(`.
Lpar,
/// Token value for a right parenthesis `)`.
@ -783,6 +788,7 @@ impl TokenKind {
Tok::Indent => TokenKind::Indent,
Tok::Dedent => TokenKind::Dedent,
Tok::EndOfFile => TokenKind::EndOfFile,
Tok::Question => TokenKind::Question,
Tok::Lpar => TokenKind::Lpar,
Tok::Rpar => TokenKind::Rpar,
Tok::Lsqb => TokenKind::Lsqb,