Formatter and parser refactoring (#7569)

I got confused and refactored a bit, now the naming should be more
consistent. This is the basis for the range formatting work.

Chages:
* `format_module` -> `format_module_source` (format a string)
* `format_node` -> `format_module_ast` (format a program parsed into an
AST)
* Added `parse_ok_tokens` that takes `Token` instead of `Result<Token>`
* Call the source code `source` consistently
* Added a `tokens_and_ranges` helper
* `python_ast` -> `module` (because that's the type)
This commit is contained in:
konsti 2023-09-26 15:29:43 +02:00 committed by GitHub
parent 2cb5e43dd7
commit 4d16e2308d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 126 additions and 138 deletions

View file

@ -110,8 +110,8 @@
//! [lexer]: crate::lexer
pub use parser::{
parse, parse_expression, parse_expression_starts_at, parse_program, parse_starts_at,
parse_suite, parse_tokens, ParseError, ParseErrorType,
parse, parse_expression, parse_expression_starts_at, parse_ok_tokens, parse_program,
parse_starts_at, parse_suite, parse_tokens, ParseError, ParseErrorType,
};
use ruff_python_ast::{CmpOp, Expr, Mod, PySourceType, Suite};
use ruff_text_size::{Ranged, TextRange, TextSize};

View file

@ -18,7 +18,7 @@ use itertools::Itertools;
pub(super) use lalrpop_util::ParseError as LalrpopError;
use ruff_text_size::{TextRange, TextSize};
use crate::lexer::{lex, lex_starts_at};
use crate::lexer::{lex, lex_starts_at, Spanned};
use crate::{
lexer::{self, LexResult, LexicalError, LexicalErrorType},
python,
@ -159,7 +159,7 @@ pub fn parse_expression_starts_at(
/// let program = parse(source, Mode::Ipython, "<embedded>");
/// assert!(program.is_ok());
/// ```
pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, ParseError> {
pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<Mod, ParseError> {
parse_starts_at(source, mode, source_path, TextSize::default())
}
@ -191,7 +191,7 @@ pub fn parse_starts_at(
mode: Mode,
source_path: &str,
offset: TextSize,
) -> Result<ast::Mod, ParseError> {
) -> Result<Mod, ParseError> {
let lxr = lexer::lex_starts_at(source, mode, offset);
parse_tokens(lxr, mode, source_path)
}
@ -215,7 +215,7 @@ pub fn parse_tokens(
lxr: impl IntoIterator<Item = LexResult>,
mode: Mode,
source_path: &str,
) -> Result<ast::Mod, ParseError> {
) -> Result<Mod, ParseError> {
let lxr = lxr.into_iter();
parse_filtered_tokens(
@ -225,19 +225,35 @@ pub fn parse_tokens(
)
}
/// Parse tokens into an AST like [`parse_tokens`], but we already know all tokens are valid.
pub fn parse_ok_tokens(
lxr: impl IntoIterator<Item = Spanned>,
mode: Mode,
source_path: &str,
) -> Result<Mod, ParseError> {
let lxr = lxr
.into_iter()
.filter(|(tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(marker_token)
.chain(lxr)
.map(|(t, range)| (range.start(), t, range.end()));
python::TopParser::new()
.parse(mode, lexer)
.map_err(|e| parse_error_from_lalrpop(e, source_path))
}
fn parse_filtered_tokens(
lxr: impl IntoIterator<Item = LexResult>,
mode: Mode,
source_path: &str,
) -> Result<ast::Mod, ParseError> {
) -> Result<Mod, ParseError> {
let marker_token = (Tok::start_marker(mode), TextRange::default());
let lexer = iter::once(Ok(marker_token)).chain(lxr);
python::TopParser::new()
.parse(
mode,
lexer
.into_iter()
.map_ok(|(t, range)| (range.start(), t, range.end())),
lexer.map_ok(|(t, range)| (range.start(), t, range.end())),
)
.map_err(|e| parse_error_from_lalrpop(e, source_path))
}