mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-19 02:55:20 +00:00
Approximate tokens len (#9546)
This commit is contained in:
parent
b3a6f0ce81
commit
47ad7b4500
5 changed files with 38 additions and 13 deletions
|
@ -7,7 +7,7 @@ use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError};
|
|||
use ruff_python_formatter::{format_module_ast, PreviewMode, PyFormatOptions};
|
||||
use ruff_python_index::CommentRangesBuilder;
|
||||
use ruff_python_parser::lexer::lex;
|
||||
use ruff_python_parser::{parse_tokens, Mode};
|
||||
use ruff_python_parser::{allocate_tokens_vec, parse_tokens, Mode};
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
#[global_allocator]
|
||||
|
@ -52,7 +52,7 @@ fn benchmark_formatter(criterion: &mut Criterion) {
|
|||
BenchmarkId::from_parameter(case.name()),
|
||||
&case,
|
||||
|b, case| {
|
||||
let mut tokens = Vec::new();
|
||||
let mut tokens = allocate_tokens_vec(case.code());
|
||||
let mut comment_ranges = CommentRangesBuilder::default();
|
||||
|
||||
for result in lex(case.code(), Mode::Module) {
|
||||
|
|
|
@ -2,7 +2,7 @@ use std::fmt::Debug;
|
|||
|
||||
use ruff_python_ast::PySourceType;
|
||||
use ruff_python_parser::lexer::{lex, LexResult, LexicalError};
|
||||
use ruff_python_parser::{AsMode, Tok};
|
||||
use ruff_python_parser::{allocate_tokens_vec, AsMode, Tok};
|
||||
use ruff_python_trivia::CommentRanges;
|
||||
use ruff_text_size::TextRange;
|
||||
|
||||
|
@ -28,7 +28,7 @@ pub fn tokens_and_ranges(
|
|||
source: &str,
|
||||
source_type: PySourceType,
|
||||
) -> Result<(Vec<LexResult>, CommentRanges), LexicalError> {
|
||||
let mut tokens = Vec::new();
|
||||
let mut tokens = allocate_tokens_vec(source);
|
||||
let mut comment_ranges = CommentRangesBuilder::default();
|
||||
|
||||
for result in lex(source, source_type.as_mode()) {
|
||||
|
|
|
@ -78,14 +78,14 @@
|
|||
//! These tokens can be directly fed into the `ruff_python_parser` to generate an AST:
|
||||
//!
|
||||
//! ```
|
||||
//! use ruff_python_parser::{lexer::lex, Mode, parse_tokens};
|
||||
//! use ruff_python_parser::{Mode, parse_tokens, tokenize_all};
|
||||
//!
|
||||
//! let python_source = r#"
|
||||
//! def is_odd(i):
|
||||
//! return bool(i & 1)
|
||||
//! "#;
|
||||
//! let tokens = lex(python_source, Mode::Module);
|
||||
//! let ast = parse_tokens(tokens.collect(), python_source, Mode::Module);
|
||||
//! let tokens = tokenize_all(python_source, Mode::Module);
|
||||
//! let ast = parse_tokens(tokens, python_source, Mode::Module);
|
||||
//!
|
||||
//! assert!(ast.is_ok());
|
||||
//! ```
|
||||
|
@ -133,7 +133,7 @@ pub mod typing;
|
|||
|
||||
/// Collect tokens up to and including the first error.
|
||||
pub fn tokenize(contents: &str, mode: Mode) -> Vec<LexResult> {
|
||||
let mut tokens: Vec<LexResult> = vec![];
|
||||
let mut tokens: Vec<LexResult> = allocate_tokens_vec(contents);
|
||||
for tok in lexer::lex(contents, mode) {
|
||||
let is_err = tok.is_err();
|
||||
tokens.push(tok);
|
||||
|
@ -141,9 +141,35 @@ pub fn tokenize(contents: &str, mode: Mode) -> Vec<LexResult> {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tokens
|
||||
}
|
||||
|
||||
/// Tokenizes all tokens.
|
||||
///
|
||||
/// It differs from [`tokenize`] in that it tokenizes all tokens and doesn't stop
|
||||
/// after the first `Err`.
|
||||
pub fn tokenize_all(contents: &str, mode: Mode) -> Vec<LexResult> {
|
||||
let mut tokens = allocate_tokens_vec(contents);
|
||||
for token in lexer::lex(contents, mode) {
|
||||
tokens.push(token);
|
||||
}
|
||||
tokens
|
||||
}
|
||||
|
||||
/// Allocates a [`Vec`] with an approximated capacity to fit all tokens
|
||||
/// of `contents`.
|
||||
///
|
||||
/// See [#9546](https://github.com/astral-sh/ruff/pull/9546) for a more detailed explanation.
|
||||
pub fn allocate_tokens_vec(contents: &str) -> Vec<LexResult> {
|
||||
Vec::with_capacity(approximate_tokens_lower_bound(contents))
|
||||
}
|
||||
|
||||
/// Approximates the number of tokens when lexing `contents`.
|
||||
fn approximate_tokens_lower_bound(contents: &str) -> usize {
|
||||
contents.len().saturating_mul(15) / 100
|
||||
}
|
||||
|
||||
/// Parse a full Python program from its tokens.
|
||||
pub fn parse_program_tokens(
|
||||
tokens: Vec<LexResult>,
|
||||
|
|
|
@ -31,7 +31,7 @@ use crate::{
|
|||
lexer::{self, LexicalError, LexicalErrorType},
|
||||
python,
|
||||
token::Tok,
|
||||
Mode,
|
||||
tokenize_all, Mode,
|
||||
};
|
||||
|
||||
/// Parse a full Python program usually consisting of multiple lines.
|
||||
|
@ -55,8 +55,7 @@ use crate::{
|
|||
/// assert!(program.is_ok());
|
||||
/// ```
|
||||
pub fn parse_program(source: &str) -> Result<ModModule, ParseError> {
|
||||
let lexer = lex(source, Mode::Module);
|
||||
match parse_tokens(lexer.collect(), source, Mode::Module)? {
|
||||
match parse_tokens(tokenize_all(source, Mode::Module), source, Mode::Module)? {
|
||||
Mod::Module(m) => Ok(m),
|
||||
Mod::Expression(_) => unreachable!("Mode::Module doesn't return other variant"),
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ use ruff_python_codegen::Stylist;
|
|||
use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext, QuoteStyle};
|
||||
use ruff_python_index::{CommentRangesBuilder, Indexer};
|
||||
use ruff_python_parser::lexer::LexResult;
|
||||
use ruff_python_parser::{parse_tokens, AsMode, Mode};
|
||||
use ruff_python_parser::{parse_tokens, tokenize_all, AsMode, Mode};
|
||||
use ruff_python_trivia::CommentRanges;
|
||||
use ruff_source_file::{Locator, SourceLocation};
|
||||
use ruff_text_size::Ranged;
|
||||
|
@ -272,7 +272,7 @@ struct ParsedModule<'a> {
|
|||
|
||||
impl<'a> ParsedModule<'a> {
|
||||
fn from_source(source_code: &'a str) -> Result<Self, Error> {
|
||||
let tokens: Vec<_> = ruff_python_parser::lexer::lex(source_code, Mode::Module).collect();
|
||||
let tokens: Vec<_> = tokenize_all(source_code, Mode::Module);
|
||||
let mut comment_ranges = CommentRangesBuilder::default();
|
||||
|
||||
for (token, range) in tokens.iter().flatten() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue