mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-08 20:58:05 +00:00
Add basic docs for the parser crate (#11199)
## Summary This PR adds a basic README for the `ruff_python_parser` crate and updates the CONTRIBUTING docs with the fuzzer and benchmark section. Additionally, it also updates some inline documentation within the parser crate and splits the `parse_program` function into `parse_single_expression` and `parse_module` which will be called by matching against the `Mode`. This PR doesn't go into too much internal detail around the parser logic due to the following reasons: 1. Where should the docs go? Should it be as a module docs in `lib.rs` or in README? 2. The parser is still evolving and could include a lot of refactors with the future work (feedback loop and improved error recovery and resilience) --------- Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
This commit is contained in:
parent
0ed7af35ec
commit
04a922866a
7 changed files with 199 additions and 67 deletions
|
@ -1,7 +1,7 @@
|
|||
//! This crate can be used to parse Python source code into an Abstract
|
||||
//! Syntax Tree.
|
||||
//!
|
||||
//! ## Overview:
|
||||
//! ## Overview
|
||||
//!
|
||||
//! The process by which source code is parsed into an AST can be broken down
|
||||
//! into two general stages: [lexical analysis] and [parsing].
|
||||
|
@ -15,7 +15,7 @@
|
|||
//! Name("print"), LeftParen, String("Hello world"), RightParen
|
||||
//! ```
|
||||
//!
|
||||
//! these tokens are then consumed by the `ruff_python_parser`, which matches them against a set of
|
||||
//! These tokens are then consumed by the `ruff_python_parser`, which matches them against a set of
|
||||
//! grammar rules to verify that the source code is syntactically valid and to construct
|
||||
//! an AST that represents the source code.
|
||||
//!
|
||||
|
@ -48,16 +48,16 @@
|
|||
//! },
|
||||
//!```
|
||||
//!
|
||||
//! Note: The Tokens/ASTs shown above are not the exact tokens/ASTs generated by the `ruff_python_parser`.
|
||||
//! **Note:** The Tokens/ASTs shown above are not the exact tokens/ASTs generated by the `ruff_python_parser`.
|
||||
//! Refer to the [playground](https://play.ruff.rs) for the correct representation.
|
||||
//!
|
||||
//! ## Source code layout:
|
||||
//! ## Source code layout
|
||||
//!
|
||||
//! The functionality of this crate is split into several modules:
|
||||
//!
|
||||
//! - token: This module contains the definition of the tokens that are generated by the lexer.
|
||||
//! - [lexer]: This module contains the lexer and is responsible for generating the tokens.
|
||||
//! - `ruff_python_parser`: This module contains an interface to the `ruff_python_parser` and is responsible for generating the AST.
|
||||
//! - Functions and strings have special parsing requirements that are handled in additional files.
|
||||
//! - parser: This module contains an interface to the [Program] and is responsible for generating the AST.
|
||||
//! - mode: This module contains the definition of the different modes that the `ruff_python_parser` can be in.
|
||||
//!
|
||||
//! # Examples
|
||||
|
@ -78,14 +78,15 @@
|
|||
//! These tokens can be directly fed into the `ruff_python_parser` to generate an AST:
|
||||
//!
|
||||
//! ```
|
||||
//! use ruff_python_parser::{Mode, parse_tokens, tokenize_all};
|
||||
//! use ruff_python_parser::lexer::lex;
|
||||
//! use ruff_python_parser::{Mode, parse_tokens};
|
||||
//!
|
||||
//! let python_source = r#"
|
||||
//! def is_odd(i):
|
||||
//! return bool(i & 1)
|
||||
//! "#;
|
||||
//! let tokens = tokenize_all(python_source, Mode::Module);
|
||||
//! let ast = parse_tokens(tokens, python_source, Mode::Module);
|
||||
//! let tokens = lex(python_source, Mode::Module);
|
||||
//! let ast = parse_tokens(tokens.collect(), python_source, Mode::Module);
|
||||
//!
|
||||
//! assert!(ast.is_ok());
|
||||
//! ```
|
||||
|
@ -138,14 +139,16 @@ pub mod typing;
|
|||
/// For example, parsing a simple function definition and a call to that function:
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_python_parser as parser;
|
||||
/// use ruff_python_parser::parse_program;
|
||||
///
|
||||
/// let source = r#"
|
||||
/// def foo():
|
||||
/// return 42
|
||||
///
|
||||
/// print(foo())
|
||||
/// "#;
|
||||
/// let program = parser::parse_program(source);
|
||||
///
|
||||
/// let program = parse_program(source);
|
||||
/// assert!(program.is_ok());
|
||||
/// ```
|
||||
pub fn parse_program(source: &str) -> Result<ModModule, ParseError> {
|
||||
|
@ -156,6 +159,28 @@ pub fn parse_program(source: &str) -> Result<ModModule, ParseError> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a full Python program into a [`Suite`].
|
||||
///
|
||||
/// This function is similar to [`parse_program`] except that it returns the module body
|
||||
/// instead of the module itself.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// For example, parsing a simple function definition and a call to that function:
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_python_parser::parse_suite;
|
||||
///
|
||||
/// let source = r#"
|
||||
/// def foo():
|
||||
/// return 42
|
||||
///
|
||||
/// print(foo())
|
||||
/// "#;
|
||||
///
|
||||
/// let body = parse_suite(source);
|
||||
/// assert!(body.is_ok());
|
||||
/// ```
|
||||
pub fn parse_suite(source: &str) -> Result<Suite, ParseError> {
|
||||
parse_program(source).map(|m| m.body)
|
||||
}
|
||||
|
@ -169,12 +194,11 @@ pub fn parse_suite(source: &str) -> Result<Suite, ParseError> {
|
|||
///
|
||||
/// For example, parsing a single expression denoting the addition of two numbers:
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_python_parser as parser;
|
||||
/// let expr = parser::parse_expression("1 + 2");
|
||||
/// ```
|
||||
/// use ruff_python_parser::parse_expression;
|
||||
///
|
||||
/// let expr = parse_expression("1 + 2");
|
||||
/// assert!(expr.is_ok());
|
||||
///
|
||||
/// ```
|
||||
pub fn parse_expression(source: &str) -> Result<Expr, ParseError> {
|
||||
let lexer = lex(source, Mode::Expression).collect();
|
||||
|
@ -195,7 +219,7 @@ pub fn parse_expression(source: &str) -> Result<Expr, ParseError> {
|
|||
/// somewhat silly, location:
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_python_parser::{parse_expression_starts_at};
|
||||
/// use ruff_python_parser::parse_expression_starts_at;
|
||||
/// # use ruff_text_size::TextSize;
|
||||
///
|
||||
/// let expr = parse_expression_starts_at("1 + 2", TextSize::from(400));
|
||||
|
@ -262,7 +286,7 @@ pub fn parse(source: &str, mode: Mode) -> Result<Mod, ParseError> {
|
|||
|
||||
/// Parse the given Python source code using the specified [`Mode`] and [`TextSize`].
|
||||
///
|
||||
/// This function allows to specify the location of the the source code, other than
|
||||
/// This function allows to specify the location of the source code, other than
|
||||
/// that, it behaves exactly like [`parse`].
|
||||
///
|
||||
/// # Example
|
||||
|
@ -298,10 +322,12 @@ pub fn parse_starts_at(source: &str, mode: Mode, offset: TextSize) -> Result<Mod
|
|||
/// them using the [`lexer::lex`] function:
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_python_parser::{lexer::lex, Mode, parse_tokens};
|
||||
/// use ruff_python_parser::lexer::lex;
|
||||
/// use ruff_python_parser::{Mode, parse_tokens};
|
||||
///
|
||||
/// let source = "1 + 2";
|
||||
/// let expr = parse_tokens(lex(source, Mode::Expression).collect(), source, Mode::Expression);
|
||||
/// let tokens = lex(source, Mode::Expression);
|
||||
/// let expr = parse_tokens(tokens.collect(), source, Mode::Expression);
|
||||
/// assert!(expr.is_ok());
|
||||
/// ```
|
||||
pub fn parse_tokens(tokens: Vec<LexResult>, source: &str, mode: Mode) -> Result<Mod, ParseError> {
|
||||
|
@ -370,13 +396,16 @@ pub fn parse_program_tokens(
|
|||
}
|
||||
|
||||
/// Control in the different modes by which a source file can be parsed.
|
||||
///
|
||||
/// The mode argument specifies in what way code must be parsed.
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
pub enum Mode {
|
||||
/// The code consists of a sequence of statements.
|
||||
Module,
|
||||
|
||||
/// The code consists of a single expression.
|
||||
Expression,
|
||||
|
||||
/// The code consists of a sequence of statements which can include the
|
||||
/// escape commands that are part of IPython syntax.
|
||||
///
|
||||
|
@ -408,6 +437,7 @@ impl std::str::FromStr for Mode {
|
|||
}
|
||||
}
|
||||
|
||||
/// A type that can be represented as [Mode].
|
||||
pub trait AsMode {
|
||||
fn as_mode(&self) -> Mode;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue