mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-17 17:10:53 +00:00
Add basic docs for the parser crate (#11199)
## Summary This PR adds a basic README for the `ruff_python_parser` crate and updates the CONTRIBUTING docs with the fuzzer and benchmark section. Additionally, it also updates some inline documentation within the parser crate and splits the `parse_program` function into `parse_single_expression` and `parse_module` which will be called by matching against the `Mode`. This PR doesn't go into too much internal detail around the parser logic due to the following reasons: 1. Where should the docs go? Should it be as a module docs in `lib.rs` or in README? 2. The parser is still evolving and could include a lot of refactors with the future work (feedback loop and improved error recovery and resilience) --------- Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
This commit is contained in:
parent
0ed7af35ec
commit
04a922866a
7 changed files with 199 additions and 67 deletions
|
@ -26,6 +26,9 @@ mod statement;
|
|||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// Represents the parsed source code.
|
||||
///
|
||||
/// This includes the AST and all of the errors encountered during parsing.
|
||||
#[derive(Debug)]
|
||||
pub struct Program {
|
||||
ast: ast::Mod,
|
||||
|
@ -43,12 +46,12 @@ impl Program {
|
|||
&self.parse_errors
|
||||
}
|
||||
|
||||
/// Consumes the `Program` and returns the parsed AST.
|
||||
/// Consumes the [`Program`] and returns the parsed AST.
|
||||
pub fn into_ast(self) -> ast::Mod {
|
||||
self.ast
|
||||
}
|
||||
|
||||
/// Consumes the `Program` and returns a list of syntax errors found during parsing.
|
||||
/// Consumes the [`Program`] and returns a list of syntax errors found during parsing.
|
||||
pub fn into_errors(self) -> Vec<ParseError> {
|
||||
self.parse_errors
|
||||
}
|
||||
|
@ -58,11 +61,13 @@ impl Program {
|
|||
self.parse_errors.is_empty()
|
||||
}
|
||||
|
||||
/// Parse the given Python source code using the specified [`Mode`].
|
||||
pub fn parse_str(source: &str, mode: Mode) -> Program {
|
||||
let tokens = lex(source, mode);
|
||||
Self::parse_tokens(source, tokens.collect(), mode)
|
||||
}
|
||||
|
||||
/// Parse a vector of [`LexResult`]s using the specified [`Mode`].
|
||||
pub fn parse_tokens(source: &str, tokens: Vec<LexResult>, mode: Mode) -> Program {
|
||||
Parser::new(source, mode, TokenSource::new(tokens)).parse_program()
|
||||
}
|
||||
|
@ -124,49 +129,11 @@ impl<'src> Parser<'src> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Consumes the [`Parser`] and returns the parsed [`Program`].
|
||||
pub(crate) fn parse_program(mut self) -> Program {
|
||||
let ast = if self.mode == Mode::Expression {
|
||||
let start = self.node_start();
|
||||
let parsed_expr = self.parse_expression_list(ExpressionContext::default());
|
||||
|
||||
// All of the remaining newlines are actually going to be non-logical newlines.
|
||||
self.eat(TokenKind::Newline);
|
||||
|
||||
if !self.at(TokenKind::EndOfFile) {
|
||||
self.add_error(
|
||||
ParseErrorType::UnexpectedExpressionToken,
|
||||
self.current_token_range(),
|
||||
);
|
||||
|
||||
// TODO(dhruvmanila): How should error recovery work here? Just truncate after the expression?
|
||||
let mut progress = ParserProgress::default();
|
||||
loop {
|
||||
progress.assert_progressing(&self);
|
||||
if self.at(TokenKind::EndOfFile) {
|
||||
break;
|
||||
}
|
||||
self.next_token();
|
||||
}
|
||||
}
|
||||
|
||||
self.bump(TokenKind::EndOfFile);
|
||||
|
||||
Mod::Expression(ast::ModExpression {
|
||||
body: Box::new(parsed_expr.expr),
|
||||
range: self.node_range(start),
|
||||
})
|
||||
} else {
|
||||
let body = self.parse_list_into_vec(
|
||||
RecoveryContextKind::ModuleStatements,
|
||||
Parser::parse_statement,
|
||||
);
|
||||
|
||||
self.bump(TokenKind::EndOfFile);
|
||||
|
||||
Mod::Module(ast::ModModule {
|
||||
body,
|
||||
range: self.tokens_range,
|
||||
})
|
||||
let ast = match self.mode {
|
||||
Mode::Expression => Mod::Expression(self.parse_single_expression()),
|
||||
Mode::Module | Mode::Ipython => Mod::Module(self.parse_module()),
|
||||
};
|
||||
|
||||
Program {
|
||||
|
@ -175,6 +142,63 @@ impl<'src> Parser<'src> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parses a single expression.
|
||||
///
|
||||
/// This is to be used for [`Mode::Expression`].
|
||||
///
|
||||
/// ## Recovery
|
||||
///
|
||||
/// After parsing a single expression, an error is reported and all remaining tokens are
|
||||
/// dropped by the parser.
|
||||
fn parse_single_expression(&mut self) -> ast::ModExpression {
|
||||
let start = self.node_start();
|
||||
let parsed_expr = self.parse_expression_list(ExpressionContext::default());
|
||||
|
||||
// All remaining newlines are actually going to be non-logical newlines.
|
||||
self.eat(TokenKind::Newline);
|
||||
|
||||
if !self.at(TokenKind::EndOfFile) {
|
||||
self.add_error(
|
||||
ParseErrorType::UnexpectedExpressionToken,
|
||||
self.current_token_range(),
|
||||
);
|
||||
|
||||
// TODO(dhruvmanila): How should error recovery work here? Just truncate after the expression?
|
||||
let mut progress = ParserProgress::default();
|
||||
loop {
|
||||
progress.assert_progressing(self);
|
||||
if self.at(TokenKind::EndOfFile) {
|
||||
break;
|
||||
}
|
||||
self.next_token();
|
||||
}
|
||||
}
|
||||
|
||||
self.bump(TokenKind::EndOfFile);
|
||||
|
||||
ast::ModExpression {
|
||||
body: Box::new(parsed_expr.expr),
|
||||
range: self.node_range(start),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a Python module.
|
||||
///
|
||||
/// This is to be used for [`Mode::Module`] and [`Mode::Ipython`].
|
||||
fn parse_module(&mut self) -> ast::ModModule {
|
||||
let body = self.parse_list_into_vec(
|
||||
RecoveryContextKind::ModuleStatements,
|
||||
Parser::parse_statement,
|
||||
);
|
||||
|
||||
self.bump(TokenKind::EndOfFile);
|
||||
|
||||
ast::ModModule {
|
||||
body,
|
||||
range: self.tokens_range,
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(self) -> Vec<ParseError> {
|
||||
assert_eq!(
|
||||
self.current_token_kind(),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue