Remove source path from parser errors (#9322)

## Summary

I always found it odd that we had to pass this in, since it's really
higher-level context for the error. The awkwardness is further evidenced
by the fact that we pass in fake values everywhere (even outside of
tests). The source path isn't actually used to display the error; it's
only accessed elsewhere to _re-display_ the error in certain cases. This
PR modifies to instead pass the path directly in those cases.
This commit is contained in:
Charlie Marsh 2023-12-30 16:33:05 -04:00 committed by GitHub
parent eb9a1bc5f1
commit e80260a3c5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
45 changed files with 623 additions and 714 deletions

View file

@ -12,8 +12,6 @@
//! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
//! [`Mode`]: crate::mode
use std::{fmt, iter};
use itertools::Itertools;
pub(super) use lalrpop_util::ParseError as LalrpopError;
use ruff_text_size::{Ranged, TextRange, TextSize};
@ -51,19 +49,19 @@ use ruff_python_ast::{
///
/// print(foo())
/// "#;
/// let program = parser::parse_program(source, "<embedded>");
/// let program = parser::parse_program(source);
/// assert!(program.is_ok());
/// ```
pub fn parse_program(source: &str, source_path: &str) -> Result<ModModule, ParseError> {
pub fn parse_program(source: &str) -> Result<ModModule, ParseError> {
let lexer = lex(source, Mode::Module);
match parse_tokens(lexer, source, Mode::Module, source_path)? {
match parse_tokens(lexer, source, Mode::Module)? {
Mod::Module(m) => Ok(m),
Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"),
}
}
pub fn parse_suite(source: &str, source_path: &str) -> Result<Suite, ParseError> {
parse_program(source, source_path).map(|m| m.body)
pub fn parse_suite(source: &str) -> Result<Suite, ParseError> {
parse_program(source).map(|m| m.body)
}
/// Parses a single Python expression.
@ -77,14 +75,14 @@ pub fn parse_suite(source: &str, source_path: &str) -> Result<Suite, ParseError>
///
/// ```
/// use ruff_python_parser as parser;
/// let expr = parser::parse_expression("1 + 2", "<embedded>");
/// let expr = parser::parse_expression("1 + 2");
///
/// assert!(expr.is_ok());
///
/// ```
pub fn parse_expression(source: &str, source_path: &str) -> Result<Expr, ParseError> {
pub fn parse_expression(source: &str) -> Result<Expr, ParseError> {
let lexer = lex(source, Mode::Expression);
match parse_tokens(lexer, source, Mode::Expression, source_path)? {
match parse_tokens(lexer, source, Mode::Expression)? {
Mod::Expression(expression) => Ok(*expression.body),
Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"),
}
@ -104,16 +102,12 @@ pub fn parse_expression(source: &str, source_path: &str) -> Result<Expr, ParseEr
/// use ruff_python_parser::{parse_expression_starts_at};
/// # use ruff_text_size::TextSize;
///
/// let expr = parse_expression_starts_at("1 + 2", "<embedded>", TextSize::from(400));
/// let expr = parse_expression_starts_at("1 + 2", TextSize::from(400));
/// assert!(expr.is_ok());
/// ```
pub fn parse_expression_starts_at(
source: &str,
source_path: &str,
offset: TextSize,
) -> Result<Expr, ParseError> {
pub fn parse_expression_starts_at(source: &str, offset: TextSize) -> Result<Expr, ParseError> {
let lexer = lex_starts_at(source, Mode::Module, offset);
match parse_tokens(lexer, source, Mode::Expression, source_path)? {
match parse_tokens(lexer, source, Mode::Expression)? {
Mod::Expression(expression) => Ok(*expression.body),
Mod::Module(_m) => unreachable!("Mode::Expression doesn't return other variant"),
}
@ -133,7 +127,7 @@ pub fn parse_expression_starts_at(
/// ```
/// use ruff_python_parser::{Mode, parse};
///
/// let expr = parse("1 + 2", Mode::Expression, "<embedded>");
/// let expr = parse("1 + 2", Mode::Expression);
/// assert!(expr.is_ok());
/// ```
///
@ -148,7 +142,7 @@ pub fn parse_expression_starts_at(
/// def greet(self):
/// print("Hello, world!")
/// "#;
/// let program = parse(source, Mode::Module, "<embedded>");
/// let program = parse(source, Mode::Module);
/// assert!(program.is_ok());
/// ```
///
@ -162,11 +156,11 @@ pub fn parse_expression_starts_at(
/// ?str.replace
/// !ls
/// "#;
/// let program = parse(source, Mode::Ipython, "<embedded>");
/// let program = parse(source, Mode::Ipython);
/// assert!(program.is_ok());
/// ```
pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<Mod, ParseError> {
parse_starts_at(source, mode, source_path, TextSize::default())
pub fn parse(source: &str, mode: Mode) -> Result<Mod, ParseError> {
parse_starts_at(source, mode, TextSize::default())
}
/// Parse the given Python source code using the specified [`Mode`] and [`TextSize`].
@ -189,17 +183,12 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<Mod, ParseEr
///
/// print(fib(42))
/// "#;
/// let program = parse_starts_at(source, Mode::Module, "<embedded>", TextSize::from(0));
/// let program = parse_starts_at(source, Mode::Module, TextSize::from(0));
/// assert!(program.is_ok());
/// ```
pub fn parse_starts_at(
source: &str,
mode: Mode,
source_path: &str,
offset: TextSize,
) -> Result<Mod, ParseError> {
pub fn parse_starts_at(source: &str, mode: Mode, offset: TextSize) -> Result<Mod, ParseError> {
let lxr = lexer::lex_starts_at(source, mode, offset);
parse_tokens(lxr, source, mode, source_path)
parse_tokens(lxr, source, mode)
}
/// Parse an iterator of [`LexResult`]s using the specified [`Mode`].
@ -215,14 +204,13 @@ pub fn parse_starts_at(
/// use ruff_python_parser::{lexer::lex, Mode, parse_tokens};
///
/// let source = "1 + 2";
/// let expr = parse_tokens(lex(source, Mode::Expression), source, Mode::Expression, "<embedded>");
/// let expr = parse_tokens(lex(source, Mode::Expression), source, Mode::Expression);
/// assert!(expr.is_ok());
/// ```
pub fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source: &str,
mode: Mode,
source_path: &str,
) -> Result<Mod, ParseError> {
let lxr = lxr.into_iter();
@ -230,7 +218,6 @@ pub fn parse_tokens(
lxr.filter_ok(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline)),
source,
mode,
source_path,
)
}
@ -239,35 +226,33 @@ pub fn parse_ok_tokens(
lxr: impl IntoIterator<Item = Spanned>,
source: &str,
mode: Mode,
source_path: &str,
) -> Result<Mod, ParseError> {
let lxr = lxr
.into_iter()
.filter(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(marker_token)
let lexer = std::iter::once(marker_token)
.chain(lxr)
.map(|(t, range)| (range.start(), t, range.end()));
python::TopParser::new()
.parse(source, mode, lexer)
.map_err(|e| parse_error_from_lalrpop(e, source_path))
.map_err(parse_error_from_lalrpop)
}
fn parse_filtered_tokens(
lxr: impl IntoIterator<Item = LexResult>,
source: &str,
mode: Mode,
source_path: &str,
) -> Result<Mod, ParseError> {
let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(Ok(marker_token)).chain(lxr);
let lexer = std::iter::once(Ok(marker_token)).chain(lxr);
python::TopParser::new()
.parse(
source,
mode,
lexer.map_ok(|(t, range)| (range.start(), t, range.end())),
)
.map_err(|e| parse_error_from_lalrpop(e, source_path))
.map_err(parse_error_from_lalrpop)
}
/// Represents represent errors that occur during parsing and are
@ -277,7 +262,6 @@ fn parse_filtered_tokens(
pub struct ParseError {
pub error: ParseErrorType,
pub offset: TextSize,
pub source_path: String,
}
impl std::ops::Deref for ParseError {
@ -294,7 +278,7 @@ impl std::error::Error for ParseError {
}
}
impl fmt::Display for ParseError {
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
@ -324,28 +308,20 @@ pub enum ParseErrorType {
impl std::error::Error for ParseErrorType {}
// Convert `lalrpop_util::ParseError` to our internal type
fn parse_error_from_lalrpop(
err: LalrpopError<TextSize, Tok, LexicalError>,
source_path: &str,
) -> ParseError {
let source_path = source_path.to_owned();
fn parse_error_from_lalrpop(err: LalrpopError<TextSize, Tok, LexicalError>) -> ParseError {
match err {
// TODO: Are there cases where this isn't an EOF?
LalrpopError::InvalidToken { location } => ParseError {
error: ParseErrorType::Eof,
offset: location,
source_path,
},
LalrpopError::ExtraToken { token } => ParseError {
error: ParseErrorType::ExtraToken(token.1),
offset: token.0,
source_path,
},
LalrpopError::User { error } => ParseError {
error: ParseErrorType::Lexical(error.error),
offset: error.location,
source_path,
},
LalrpopError::UnrecognizedToken { token, expected } => {
// Hacky, but it's how CPython does it. See PyParser_AddToken,
@ -354,7 +330,6 @@ fn parse_error_from_lalrpop(
ParseError {
error: ParseErrorType::UnrecognizedToken(token.1, expected),
offset: token.0,
source_path,
}
}
LalrpopError::UnrecognizedEof { location, expected } => {
@ -364,13 +339,11 @@ fn parse_error_from_lalrpop(
ParseError {
error: ParseErrorType::Lexical(LexicalErrorType::IndentationError),
offset: location,
source_path,
}
} else {
ParseError {
error: ParseErrorType::Eof,
offset: location,
source_path,
}
}
}
@ -629,63 +602,63 @@ mod tests {
#[test]
fn test_parse_empty() {
let parse_ast = parse_suite("", "<test>").unwrap();
let parse_ast = parse_suite("").unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_string() {
let source = "'Hello world'";
let parse_ast = parse_suite(source, "<test>").unwrap();
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_f_string() {
let source = "f'Hello world'";
let parse_ast = parse_suite(source, "<test>").unwrap();
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_print_hello() {
let source = "print('Hello world')";
let parse_ast = parse_suite(source, "<test>").unwrap();
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_print_2() {
let source = "print('Hello world', 2)";
let parse_ast = parse_suite(source, "<test>").unwrap();
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_kwargs() {
let source = "my_func('positional', keyword=2)";
let parse_ast = parse_suite(source, "<test>").unwrap();
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_if_elif_else() {
let source = "if 1: 10\nelif 2: 20\nelse: 30";
let parse_ast = parse_suite(source, "<test>").unwrap();
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_lambda() {
let source = "lambda x, y: x * y"; // lambda(x, y): x * y";
let parse_ast = parse_suite(source, "<test>").unwrap();
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_lambda_no_args() {
let source = "lambda: 1";
let parse_ast = parse_suite(source, "<test>").unwrap();
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
@ -693,7 +666,7 @@ mod tests {
fn test_parse_tuples() {
let source = "a, b = 4, 5";
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
insta::assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
@ -705,7 +678,7 @@ class Foo(A, B):
def method_with_default(self, arg='default'):
pass
";
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
insta::assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
@ -736,7 +709,7 @@ class Foo[**P](): ...
class Foo[X, Y: str, *U, **P]():
pass
";
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
insta::assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
fn test_parse_function_definition() {
@ -762,76 +735,76 @@ def func[**P](*args: P.args, **kwargs: P.kwargs):
def func[T, U: str, *Ts, **P]():
pass
";
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
insta::assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
fn test_parse_dict_comprehension() {
let source = "{x1: x2 for y in z}";
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_list_comprehension() {
let source = "[x for y in z]";
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_double_list_comprehension() {
let source = "[x for y, y2 in z for a in b if a < 5 if a > 10]";
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_generator_comprehension() {
let source = "(x for y in z)";
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_named_expression_generator_comprehension() {
let source = "(x := y + 1 for y in z)";
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_if_else_generator_comprehension() {
let source = "(x if y else y for y in z)";
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_bool_op_or() {
let source = "x or y";
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_parse_bool_op_and() {
let source = "x and y";
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_slice() {
let source = "x[1:2:3]";
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
#[test]
fn test_named_expression() {
let source = "(x := ( y * z ))";
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
@ -865,7 +838,7 @@ with (0 as a,): pass
with (0 as a, 1 as b): pass
with (0 as a, 1 as b,): pass
";
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
insta::assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
@ -889,7 +862,7 @@ with (yield from a): pass
with ((yield)): pass
with ((yield from a)): pass
";
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
insta::assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
@ -912,7 +885,7 @@ with ((yield from a)): pass
"with a := 0 as x: pass",
"with (a := 0 as x): pass",
] {
assert!(parse_suite(source, "<test>").is_err());
assert!(parse_suite(source).is_err());
}
}
@ -924,7 +897,7 @@ array[0, *indexes, -1] = array_slice
array[*indexes_to_select, *indexes_to_select]
array[3:5, *indexes_to_select]
";
let parse_ast = parse_suite(source, "<test>").unwrap();
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
@ -937,7 +910,7 @@ array[3:5, *indexes_to_select]
("OFFSET %d" % offset) if offset else None,
)
)"#;
let parse_ast = parse_expression(source, "<test>").unwrap();
let parse_ast = parse_expression(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
@ -950,7 +923,6 @@ except TypeError as e:
print(f'caught {type(e)}')
except OSError as e:
print(f'caught {type(e)}')",
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
@ -966,7 +938,6 @@ except* TypeError as e:
print(f'caught {type(e)} with nested {e.exceptions}')
except* OSError as e:
print(f'caught {type(e)} with nested {e.exceptions}')"#,
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
@ -974,7 +945,7 @@ except* OSError as e:
#[test]
fn test_dict_unpacking() {
let parse_ast = parse_expression(r#"{"a": "b", **c, "d": "e"}"#, "<test>").unwrap();
let parse_ast = parse_expression(r#"{"a": "b", **c, "d": "e"}"#).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
@ -982,8 +953,8 @@ except* OSError as e:
fn test_modes() {
let source = "a[0][1][2][3][4]";
assert!(parse(source, Mode::Expression, "<embedded>").is_ok());
assert!(parse(source, Mode::Module, "<embedded>").is_ok());
assert!(parse(source, Mode::Expression).is_ok());
assert!(parse(source, Mode::Module).is_ok());
}
#[test]
@ -1030,7 +1001,7 @@ type X[T] \
type X = int; type X = str; type X = type
class X: type X = int
"#;
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
insta::assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
@ -1068,13 +1039,13 @@ type = x = 1
x = type = 1
lambda x: type
";
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
insta::assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
fn test_invalid_type() {
assert!(parse_suite("a: type X = int", "<test>").is_err());
assert!(parse_suite("lambda: type X = int", "<test>").is_err());
assert!(parse_suite("a: type X = int").is_err());
assert!(parse_suite("lambda: type X = int").is_err());
}
#[test]
@ -1099,7 +1070,7 @@ x = 10000
x = 133333
";
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
insta::assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
@ -1125,7 +1096,7 @@ if 10 .real:
y = 100[no]
y = 100(no)
";
assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
@ -1153,7 +1124,7 @@ match match:
match = lambda query: query == event
print(match(12))
";
insta::assert_debug_snapshot!(parse_suite(source, "<test>").unwrap());
insta::assert_debug_snapshot!(parse_suite(source).unwrap());
}
#[test]
@ -1323,7 +1294,7 @@ match w := x,:
case y as v,:
z = 0
"#;
let parse_ast = parse_suite(source, "<test>").unwrap();
let parse_ast = parse_suite(source).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
@ -1360,7 +1331,6 @@ match x, y,:
case z:
pass
"#,
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
@ -1375,7 +1345,6 @@ match x:
case f"{y}":
pass
"#,
"<test>",
)
.err();
assert!(
@ -1390,7 +1359,6 @@ match x:
r"
def args_to_tuple(*args: *Ts) -> Tuple[*Ts]: ...
",
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
@ -1409,7 +1377,6 @@ class Abcd:
pass
"
.trim(),
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
@ -1486,7 +1453,6 @@ foo.bar[0].baz[2].egg??
"
.trim(),
Mode::Ipython,
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
@ -1500,7 +1466,7 @@ a = 1
"
.trim();
let lxr = lexer::lex_starts_at(source, Mode::Ipython, TextSize::default());
let parse_err = parse_tokens(lxr, source, Mode::Module, "<test>").unwrap_err();
let parse_err = parse_tokens(lxr, source, Mode::Module).unwrap_err();
assert_eq!(
parse_err.to_string(),
"IPython escape commands are only allowed in `Mode::Ipython` at byte offset 6"
@ -1540,7 +1506,6 @@ f"""{
}"""
"#
.trim(),
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
@ -1556,7 +1521,6 @@ u"foo" f"{bar}" "baz" " some"
u"foo" f"bar {baz} really" u"bar" "no"
"#
.trim(),
"<test>",
)
.unwrap();
insta::assert_debug_snapshot!(parse_ast);
@ -1565,7 +1529,7 @@ u"foo" f"bar {baz} really" u"bar" "no"
#[test]
fn test_unicode_aliases() {
// https://github.com/RustPython/RustPython/issues/4566
let parse_ast = parse_suite(r#"x = "\N{BACKSPACE}another cool trick""#, "<test>").unwrap();
let parse_ast = parse_suite(r#"x = "\N{BACKSPACE}another cool trick""#).unwrap();
insta::assert_debug_snapshot!(parse_ast);
}
}