mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-02 01:42:25 +00:00
Pass ParserOptions
to the parser (#16220)
## Summary This is part of the preparation for detecting syntax errors in the parser from https://github.com/astral-sh/ruff/pull/16090/. As suggested in [this comment](https://github.com/astral-sh/ruff/pull/16090/#discussion_r1953084509), I started working on a `ParseOptions` struct that could be stored in the parser. For this initial refactor, I only made it hold the existing `Mode` option, but for syntax errors, we will also need it to have a `PythonVersion`. For that use case, I'm picturing something like a `ParseOptions::with_python_version` method, so you can extend the current calls to something like ```rust ParseOptions::from(mode).with_python_version(settings.target_version) ``` But I thought it was worth adding `ParseOptions` alone without changing any other behavior first. Most of the diff is just updating call sites taking `Mode` to take `ParseOptions::from(Mode)` or those taking `PySourceType`s to take `ParseOptions::from(PySourceType)`. The interesting changes are in the new `parser/options.rs` file and smaller parts of `parser/mod.rs` and `ruff_python_parser/src/lib.rs`. ## Test Plan Existing tests, this should not change any behavior.
This commit is contained in:
parent
cfc6941d5c
commit
97d0659ce3
25 changed files with 148 additions and 93 deletions
|
@ -68,6 +68,7 @@ use std::iter::FusedIterator;
|
|||
use std::ops::Deref;
|
||||
|
||||
pub use crate::error::{FStringErrorType, LexicalErrorType, ParseError, ParseErrorType};
|
||||
pub use crate::parser::ParseOptions;
|
||||
pub use crate::token::{Token, TokenKind};
|
||||
|
||||
use crate::parser::Parser;
|
||||
|
@ -110,7 +111,7 @@ pub mod typing;
|
|||
/// assert!(module.is_ok());
|
||||
/// ```
|
||||
pub fn parse_module(source: &str) -> Result<Parsed<ModModule>, ParseError> {
|
||||
Parser::new(source, Mode::Module)
|
||||
Parser::new(source, ParseOptions::from(Mode::Module))
|
||||
.parse()
|
||||
.try_into_module()
|
||||
.unwrap()
|
||||
|
@ -133,7 +134,7 @@ pub fn parse_module(source: &str) -> Result<Parsed<ModModule>, ParseError> {
|
|||
/// assert!(expr.is_ok());
|
||||
/// ```
|
||||
pub fn parse_expression(source: &str) -> Result<Parsed<ModExpression>, ParseError> {
|
||||
Parser::new(source, Mode::Expression)
|
||||
Parser::new(source, ParseOptions::from(Mode::Expression))
|
||||
.parse()
|
||||
.try_into_expression()
|
||||
.unwrap()
|
||||
|
@ -161,7 +162,7 @@ pub fn parse_expression_range(
|
|||
range: TextRange,
|
||||
) -> Result<Parsed<ModExpression>, ParseError> {
|
||||
let source = &source[..range.end().to_usize()];
|
||||
Parser::new_starts_at(source, Mode::Expression, range.start())
|
||||
Parser::new_starts_at(source, range.start(), ParseOptions::from(Mode::Expression))
|
||||
.parse()
|
||||
.try_into_expression()
|
||||
.unwrap()
|
||||
|
@ -187,8 +188,12 @@ pub fn parse_parenthesized_expression_range(
|
|||
range: TextRange,
|
||||
) -> Result<Parsed<ModExpression>, ParseError> {
|
||||
let source = &source[..range.end().to_usize()];
|
||||
let parsed =
|
||||
Parser::new_starts_at(source, Mode::ParenthesizedExpression, range.start()).parse();
|
||||
let parsed = Parser::new_starts_at(
|
||||
source,
|
||||
range.start(),
|
||||
ParseOptions::from(Mode::ParenthesizedExpression),
|
||||
)
|
||||
.parse();
|
||||
parsed.try_into_expression().unwrap().into_result()
|
||||
}
|
||||
|
||||
|
@ -227,11 +232,11 @@ pub fn parse_string_annotation(
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse the given Python source code using the specified [`Mode`].
|
||||
/// Parse the given Python source code using the specified [`ParseOptions`].
|
||||
///
|
||||
/// This function is the most general function to parse Python code. Based on the [`Mode`] supplied,
|
||||
/// it can be used to parse a single expression, a full Python program, an interactive expression
|
||||
/// or a Python program containing IPython escape commands.
|
||||
/// This function is the most general function to parse Python code. Based on the [`Mode`] supplied
|
||||
/// via the [`ParseOptions`], it can be used to parse a single expression, a full Python program,
|
||||
/// an interactive expression or a Python program containing IPython escape commands.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
|
@ -239,16 +244,16 @@ pub fn parse_string_annotation(
|
|||
/// parsing:
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_python_parser::{Mode, parse};
|
||||
/// use ruff_python_parser::{parse, Mode, ParseOptions};
|
||||
///
|
||||
/// let parsed = parse("1 + 2", Mode::Expression);
|
||||
/// let parsed = parse("1 + 2", ParseOptions::from(Mode::Expression));
|
||||
/// assert!(parsed.is_ok());
|
||||
/// ```
|
||||
///
|
||||
/// Alternatively, we can parse a full Python program consisting of multiple lines:
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_python_parser::{Mode, parse};
|
||||
/// use ruff_python_parser::{parse, Mode, ParseOptions};
|
||||
///
|
||||
/// let source = r#"
|
||||
/// class Greeter:
|
||||
|
@ -256,39 +261,39 @@ pub fn parse_string_annotation(
|
|||
/// def greet(self):
|
||||
/// print("Hello, world!")
|
||||
/// "#;
|
||||
/// let parsed = parse(source, Mode::Module);
|
||||
/// let parsed = parse(source, ParseOptions::from(Mode::Module));
|
||||
/// assert!(parsed.is_ok());
|
||||
/// ```
|
||||
///
|
||||
/// Additionally, we can parse a Python program containing IPython escapes:
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_python_parser::{Mode, parse};
|
||||
/// use ruff_python_parser::{parse, Mode, ParseOptions};
|
||||
///
|
||||
/// let source = r#"
|
||||
/// %timeit 1 + 2
|
||||
/// ?str.replace
|
||||
/// !ls
|
||||
/// "#;
|
||||
/// let parsed = parse(source, Mode::Ipython);
|
||||
/// let parsed = parse(source, ParseOptions::from(Mode::Ipython));
|
||||
/// assert!(parsed.is_ok());
|
||||
/// ```
|
||||
pub fn parse(source: &str, mode: Mode) -> Result<Parsed<Mod>, ParseError> {
|
||||
parse_unchecked(source, mode).into_result()
|
||||
pub fn parse(source: &str, options: ParseOptions) -> Result<Parsed<Mod>, ParseError> {
|
||||
parse_unchecked(source, options).into_result()
|
||||
}
|
||||
|
||||
/// Parse the given Python source code using the specified [`Mode`].
|
||||
/// Parse the given Python source code using the specified [`ParseOptions`].
|
||||
///
|
||||
/// This is same as the [`parse`] function except that it doesn't check for any [`ParseError`]
|
||||
/// and returns the [`Parsed`] as is.
|
||||
pub fn parse_unchecked(source: &str, mode: Mode) -> Parsed<Mod> {
|
||||
Parser::new(source, mode).parse()
|
||||
pub fn parse_unchecked(source: &str, options: ParseOptions) -> Parsed<Mod> {
|
||||
Parser::new(source, options).parse()
|
||||
}
|
||||
|
||||
/// Parse the given Python source code using the specified [`PySourceType`].
|
||||
pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed<ModModule> {
|
||||
// SAFETY: Safe because `PySourceType` always parses to a `ModModule`
|
||||
Parser::new(source, source_type.as_mode())
|
||||
Parser::new(source, ParseOptions::from(source_type))
|
||||
.parse()
|
||||
.try_into_module()
|
||||
.unwrap()
|
||||
|
|
|
@ -2265,7 +2265,7 @@ impl<'src> Parser<'src> {
|
|||
value,
|
||||
};
|
||||
|
||||
if self.mode != Mode::Ipython {
|
||||
if self.options.mode != Mode::Ipython {
|
||||
self.add_error(ParseErrorType::UnexpectedIpythonEscapeCommand, &command);
|
||||
}
|
||||
|
||||
|
|
|
@ -13,8 +13,11 @@ use crate::token_source::{TokenSource, TokenSourceCheckpoint};
|
|||
use crate::{Mode, ParseError, ParseErrorType, TokenKind};
|
||||
use crate::{Parsed, Tokens};
|
||||
|
||||
pub use crate::parser::options::ParseOptions;
|
||||
|
||||
mod expression;
|
||||
mod helpers;
|
||||
mod options;
|
||||
mod pattern;
|
||||
mod progress;
|
||||
mod recovery;
|
||||
|
@ -32,8 +35,8 @@ pub(crate) struct Parser<'src> {
|
|||
/// Stores all the syntax errors found during the parsing.
|
||||
errors: Vec<ParseError>,
|
||||
|
||||
/// Specify the mode in which the code will be parsed.
|
||||
mode: Mode,
|
||||
/// Options for how the code will be parsed.
|
||||
options: ParseOptions,
|
||||
|
||||
/// The ID of the current token. This is used to track the progress of the parser
|
||||
/// to avoid infinite loops when the parser is stuck.
|
||||
|
@ -51,16 +54,20 @@ pub(crate) struct Parser<'src> {
|
|||
|
||||
impl<'src> Parser<'src> {
|
||||
/// Create a new parser for the given source code.
|
||||
pub(crate) fn new(source: &'src str, mode: Mode) -> Self {
|
||||
Parser::new_starts_at(source, mode, TextSize::new(0))
|
||||
pub(crate) fn new(source: &'src str, options: ParseOptions) -> Self {
|
||||
Parser::new_starts_at(source, TextSize::new(0), options)
|
||||
}
|
||||
|
||||
/// Create a new parser for the given source code which starts parsing at the given offset.
|
||||
pub(crate) fn new_starts_at(source: &'src str, mode: Mode, start_offset: TextSize) -> Self {
|
||||
let tokens = TokenSource::from_source(source, mode, start_offset);
|
||||
pub(crate) fn new_starts_at(
|
||||
source: &'src str,
|
||||
start_offset: TextSize,
|
||||
options: ParseOptions,
|
||||
) -> Self {
|
||||
let tokens = TokenSource::from_source(source, options.mode, start_offset);
|
||||
|
||||
Parser {
|
||||
mode,
|
||||
options,
|
||||
source,
|
||||
errors: Vec::new(),
|
||||
tokens,
|
||||
|
@ -73,7 +80,7 @@ impl<'src> Parser<'src> {
|
|||
|
||||
/// Consumes the [`Parser`] and returns the parsed [`Parsed`].
|
||||
pub(crate) fn parse(mut self) -> Parsed<Mod> {
|
||||
let syntax = match self.mode {
|
||||
let syntax = match self.options.mode {
|
||||
Mode::Expression | Mode::ParenthesizedExpression => {
|
||||
Mod::Expression(self.parse_single_expression())
|
||||
}
|
||||
|
|
41
crates/ruff_python_parser/src/parser/options.rs
Normal file
41
crates/ruff_python_parser/src/parser/options.rs
Normal file
|
@ -0,0 +1,41 @@
|
|||
use ruff_python_ast::PySourceType;
|
||||
|
||||
use crate::{AsMode, Mode};
|
||||
|
||||
/// Options for controlling how a source file is parsed.
|
||||
///
|
||||
/// You can construct a [`ParseOptions`] directly from a [`Mode`]:
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_python_parser::{Mode, ParseOptions};
|
||||
///
|
||||
/// let options = ParseOptions::from(Mode::Module);
|
||||
/// ```
|
||||
///
|
||||
/// or from a [`PySourceType`]
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_python_ast::PySourceType;
|
||||
/// use ruff_python_parser::ParseOptions;
|
||||
///
|
||||
/// let options = ParseOptions::from(PySourceType::Python);
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct ParseOptions {
|
||||
/// Specify the mode in which the code will be parsed.
|
||||
pub(crate) mode: Mode,
|
||||
}
|
||||
|
||||
impl From<Mode> for ParseOptions {
|
||||
fn from(mode: Mode) -> Self {
|
||||
Self { mode }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PySourceType> for ParseOptions {
|
||||
fn from(source_type: PySourceType) -> Self {
|
||||
Self {
|
||||
mode: source_type.as_mode(),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -304,7 +304,7 @@ impl<'src> Parser<'src> {
|
|||
op,
|
||||
start,
|
||||
))
|
||||
} else if self.mode == Mode::Ipython && self.at(TokenKind::Question) {
|
||||
} else if self.options.mode == Mode::Ipython && self.at(TokenKind::Question) {
|
||||
Stmt::IpyEscapeCommand(
|
||||
self.parse_ipython_help_end_escape_command_statement(&parsed_expr),
|
||||
)
|
||||
|
@ -932,7 +932,7 @@ impl<'src> Parser<'src> {
|
|||
};
|
||||
|
||||
let range = self.node_range(start);
|
||||
if self.mode != Mode::Ipython {
|
||||
if self.options.mode != Mode::Ipython {
|
||||
self.add_error(ParseErrorType::UnexpectedIpythonEscapeCommand, range);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
use crate::{parse, parse_expression, parse_module, Mode};
|
||||
use crate::{parse, parse_expression, parse_module, Mode, ParseOptions};
|
||||
|
||||
#[test]
|
||||
fn test_modes() {
|
||||
let source = "a[0][1][2][3][4]";
|
||||
|
||||
assert!(parse(source, Mode::Expression).is_ok());
|
||||
assert!(parse(source, Mode::Module).is_ok());
|
||||
assert!(parse(source, ParseOptions::from(Mode::Expression)).is_ok());
|
||||
assert!(parse(source, ParseOptions::from(Mode::Module)).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -129,7 +129,7 @@ foo.bar[0].baz[1]??
|
|||
foo.bar[0].baz[2].egg??
|
||||
"
|
||||
.trim(),
|
||||
Mode::Ipython,
|
||||
ParseOptions::from(Mode::Ipython),
|
||||
)
|
||||
.unwrap();
|
||||
insta::assert_debug_snapshot!(parsed.syntax());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue