Pass ParserOptions to the parser (#16220)

## Summary

This is part of the preparation for detecting syntax errors in the
parser from https://github.com/astral-sh/ruff/pull/16090/. As suggested
in [this
comment](https://github.com/astral-sh/ruff/pull/16090/#discussion_r1953084509),
I started working on a `ParseOptions` struct that could be stored in the
parser. For this initial refactor, I only made it hold the existing
`Mode` option, but for syntax errors, we will also need it to have a
`PythonVersion`. For that use case, I'm picturing something like a
`ParseOptions::with_python_version` method, so you can extend the
current calls to something like

```rust
ParseOptions::from(mode).with_python_version(settings.target_version)
```

But I thought it was worth adding `ParseOptions` alone without changing
any other behavior first.

Most of the diff is just updating call sites taking `Mode` to take
`ParseOptions::from(Mode)` or those taking `PySourceType`s to take
`ParseOptions::from(PySourceType)`. The interesting changes are in the
new `parser/options.rs` file and smaller parts of `parser/mod.rs` and
`ruff_python_parser/src/lib.rs`.

## Test Plan

Existing tests, this should not change any behavior.
This commit is contained in:
Brent Westbrook 2025-02-19 10:50:50 -05:00 committed by GitHub
parent cfc6941d5c
commit 97d0659ce3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 148 additions and 93 deletions

View file

@ -68,6 +68,7 @@ use std::iter::FusedIterator;
use std::ops::Deref;
pub use crate::error::{FStringErrorType, LexicalErrorType, ParseError, ParseErrorType};
pub use crate::parser::ParseOptions;
pub use crate::token::{Token, TokenKind};
use crate::parser::Parser;
@ -110,7 +111,7 @@ pub mod typing;
/// assert!(module.is_ok());
/// ```
pub fn parse_module(source: &str) -> Result<Parsed<ModModule>, ParseError> {
Parser::new(source, Mode::Module)
Parser::new(source, ParseOptions::from(Mode::Module))
.parse()
.try_into_module()
.unwrap()
@ -133,7 +134,7 @@ pub fn parse_module(source: &str) -> Result<Parsed<ModModule>, ParseError> {
/// assert!(expr.is_ok());
/// ```
pub fn parse_expression(source: &str) -> Result<Parsed<ModExpression>, ParseError> {
Parser::new(source, Mode::Expression)
Parser::new(source, ParseOptions::from(Mode::Expression))
.parse()
.try_into_expression()
.unwrap()
@ -161,7 +162,7 @@ pub fn parse_expression_range(
range: TextRange,
) -> Result<Parsed<ModExpression>, ParseError> {
let source = &source[..range.end().to_usize()];
Parser::new_starts_at(source, Mode::Expression, range.start())
Parser::new_starts_at(source, range.start(), ParseOptions::from(Mode::Expression))
.parse()
.try_into_expression()
.unwrap()
@ -187,8 +188,12 @@ pub fn parse_parenthesized_expression_range(
range: TextRange,
) -> Result<Parsed<ModExpression>, ParseError> {
let source = &source[..range.end().to_usize()];
let parsed =
Parser::new_starts_at(source, Mode::ParenthesizedExpression, range.start()).parse();
let parsed = Parser::new_starts_at(
source,
range.start(),
ParseOptions::from(Mode::ParenthesizedExpression),
)
.parse();
parsed.try_into_expression().unwrap().into_result()
}
@ -227,11 +232,11 @@ pub fn parse_string_annotation(
}
}
/// Parse the given Python source code using the specified [`Mode`].
/// Parse the given Python source code using the specified [`ParseOptions`].
///
/// This function is the most general function to parse Python code. Based on the [`Mode`] supplied,
/// it can be used to parse a single expression, a full Python program, an interactive expression
/// or a Python program containing IPython escape commands.
/// This function is the most general function to parse Python code. Based on the [`Mode`] supplied
/// via the [`ParseOptions`], it can be used to parse a single expression, a full Python program,
/// an interactive expression or a Python program containing IPython escape commands.
///
/// # Example
///
@ -239,16 +244,16 @@ pub fn parse_string_annotation(
/// parsing:
///
/// ```
/// use ruff_python_parser::{Mode, parse};
/// use ruff_python_parser::{parse, Mode, ParseOptions};
///
/// let parsed = parse("1 + 2", Mode::Expression);
/// let parsed = parse("1 + 2", ParseOptions::from(Mode::Expression));
/// assert!(parsed.is_ok());
/// ```
///
/// Alternatively, we can parse a full Python program consisting of multiple lines:
///
/// ```
/// use ruff_python_parser::{Mode, parse};
/// use ruff_python_parser::{parse, Mode, ParseOptions};
///
/// let source = r#"
/// class Greeter:
@ -256,39 +261,39 @@ pub fn parse_string_annotation(
/// def greet(self):
/// print("Hello, world!")
/// "#;
/// let parsed = parse(source, Mode::Module);
/// let parsed = parse(source, ParseOptions::from(Mode::Module));
/// assert!(parsed.is_ok());
/// ```
///
/// Additionally, we can parse a Python program containing IPython escapes:
///
/// ```
/// use ruff_python_parser::{Mode, parse};
/// use ruff_python_parser::{parse, Mode, ParseOptions};
///
/// let source = r#"
/// %timeit 1 + 2
/// ?str.replace
/// !ls
/// "#;
/// let parsed = parse(source, Mode::Ipython);
/// let parsed = parse(source, ParseOptions::from(Mode::Ipython));
/// assert!(parsed.is_ok());
/// ```
pub fn parse(source: &str, mode: Mode) -> Result<Parsed<Mod>, ParseError> {
parse_unchecked(source, mode).into_result()
pub fn parse(source: &str, options: ParseOptions) -> Result<Parsed<Mod>, ParseError> {
parse_unchecked(source, options).into_result()
}
/// Parse the given Python source code using the specified [`Mode`].
/// Parse the given Python source code using the specified [`ParseOptions`].
///
/// This is same as the [`parse`] function except that it doesn't check for any [`ParseError`]
/// and returns the [`Parsed`] as is.
pub fn parse_unchecked(source: &str, mode: Mode) -> Parsed<Mod> {
Parser::new(source, mode).parse()
pub fn parse_unchecked(source: &str, options: ParseOptions) -> Parsed<Mod> {
Parser::new(source, options).parse()
}
/// Parse the given Python source code using the specified [`PySourceType`].
pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed<ModModule> {
// SAFETY: Safe because `PySourceType` always parses to a `ModModule`
Parser::new(source, source_type.as_mode())
Parser::new(source, ParseOptions::from(source_type))
.parse()
.try_into_module()
.unwrap()

View file

@ -2265,7 +2265,7 @@ impl<'src> Parser<'src> {
value,
};
if self.mode != Mode::Ipython {
if self.options.mode != Mode::Ipython {
self.add_error(ParseErrorType::UnexpectedIpythonEscapeCommand, &command);
}

View file

@ -13,8 +13,11 @@ use crate::token_source::{TokenSource, TokenSourceCheckpoint};
use crate::{Mode, ParseError, ParseErrorType, TokenKind};
use crate::{Parsed, Tokens};
pub use crate::parser::options::ParseOptions;
mod expression;
mod helpers;
mod options;
mod pattern;
mod progress;
mod recovery;
@ -32,8 +35,8 @@ pub(crate) struct Parser<'src> {
/// Stores all the syntax errors found during the parsing.
errors: Vec<ParseError>,
/// Specify the mode in which the code will be parsed.
mode: Mode,
/// Options for how the code will be parsed.
options: ParseOptions,
/// The ID of the current token. This is used to track the progress of the parser
/// to avoid infinite loops when the parser is stuck.
@ -51,16 +54,20 @@ pub(crate) struct Parser<'src> {
impl<'src> Parser<'src> {
/// Create a new parser for the given source code.
pub(crate) fn new(source: &'src str, mode: Mode) -> Self {
Parser::new_starts_at(source, mode, TextSize::new(0))
pub(crate) fn new(source: &'src str, options: ParseOptions) -> Self {
Parser::new_starts_at(source, TextSize::new(0), options)
}
/// Create a new parser for the given source code which starts parsing at the given offset.
pub(crate) fn new_starts_at(source: &'src str, mode: Mode, start_offset: TextSize) -> Self {
let tokens = TokenSource::from_source(source, mode, start_offset);
pub(crate) fn new_starts_at(
source: &'src str,
start_offset: TextSize,
options: ParseOptions,
) -> Self {
let tokens = TokenSource::from_source(source, options.mode, start_offset);
Parser {
mode,
options,
source,
errors: Vec::new(),
tokens,
@ -73,7 +80,7 @@ impl<'src> Parser<'src> {
/// Consumes the [`Parser`] and returns the parsed [`Parsed`].
pub(crate) fn parse(mut self) -> Parsed<Mod> {
let syntax = match self.mode {
let syntax = match self.options.mode {
Mode::Expression | Mode::ParenthesizedExpression => {
Mod::Expression(self.parse_single_expression())
}

View file

@ -0,0 +1,41 @@
use ruff_python_ast::PySourceType;
use crate::{AsMode, Mode};
/// Options for controlling how a source file is parsed.
///
/// You can construct a [`ParseOptions`] directly from a [`Mode`]:
///
/// ```
/// use ruff_python_parser::{Mode, ParseOptions};
///
/// let options = ParseOptions::from(Mode::Module);
/// ```
///
/// or from a [`PySourceType`]
///
/// ```
/// use ruff_python_ast::PySourceType;
/// use ruff_python_parser::ParseOptions;
///
/// let options = ParseOptions::from(PySourceType::Python);
/// ```
#[derive(Debug)]
pub struct ParseOptions {
/// Specify the mode in which the code will be parsed.
pub(crate) mode: Mode,
}
impl From<Mode> for ParseOptions {
fn from(mode: Mode) -> Self {
Self { mode }
}
}
impl From<PySourceType> for ParseOptions {
fn from(source_type: PySourceType) -> Self {
Self {
mode: source_type.as_mode(),
}
}
}

View file

@ -304,7 +304,7 @@ impl<'src> Parser<'src> {
op,
start,
))
} else if self.mode == Mode::Ipython && self.at(TokenKind::Question) {
} else if self.options.mode == Mode::Ipython && self.at(TokenKind::Question) {
Stmt::IpyEscapeCommand(
self.parse_ipython_help_end_escape_command_statement(&parsed_expr),
)
@ -932,7 +932,7 @@ impl<'src> Parser<'src> {
};
let range = self.node_range(start);
if self.mode != Mode::Ipython {
if self.options.mode != Mode::Ipython {
self.add_error(ParseErrorType::UnexpectedIpythonEscapeCommand, range);
}

View file

@ -1,11 +1,11 @@
use crate::{parse, parse_expression, parse_module, Mode};
use crate::{parse, parse_expression, parse_module, Mode, ParseOptions};
#[test]
fn test_modes() {
let source = "a[0][1][2][3][4]";
assert!(parse(source, Mode::Expression).is_ok());
assert!(parse(source, Mode::Module).is_ok());
assert!(parse(source, ParseOptions::from(Mode::Expression)).is_ok());
assert!(parse(source, ParseOptions::from(Mode::Module)).is_ok());
}
#[test]
@ -129,7 +129,7 @@ foo.bar[0].baz[1]??
foo.bar[0].baz[2].egg??
"
.trim(),
Mode::Ipython,
ParseOptions::from(Mode::Ipython),
)
.unwrap();
insta::assert_debug_snapshot!(parsed.syntax());